nasm-know-hows

nasm assembly related stuff

View on GitHub

📘 Topic 12: Arrays & Strings

Master array manipulation and string operations in assembly, including powerful string instructions.


Overview

Arrays and strings are fundamental data structures. Assembly provides:


Part 1: Arrays in Assembly

Declaring Arrays

C Equivalent:

int numbers[] = {10, 20, 30, 40, 50};
char name[] = "Hello";
int buffer[100];  // Uninitialized

Assembly:

section .data
    ; Initialized arrays
    numbers dd 10, 20, 30, 40, 50       ; 5 ints (20 bytes)
    floats  dd 1.0, 2.5, 3.14          ; 3 floats
    name    db "Hello", 0               ; String (6 bytes including null)
    bytes   db 0x41, 0x42, 0x43        ; 3 bytes
    
section .bss
    ; Uninitialized arrays
    buffer  resb 100                    ; 100 bytes
    int_arr resd 50                     ; 50 ints (200 bytes)
    long_arr resq 20                    ; 20 longs (160 bytes)

Accessing Array Elements

C Equivalent:

int get_element(int *array, int index) {
    return array[index];
}

Assembly:

; int get_element(int *array, int index)
; Args: array=RDI, index=ESI
get_element:
    mov eax, [rdi + rsi*4]      ; array[index], scale=4 for int
    ret

Part 2: Common Array Operations

Example 1: Sum Array

C Equivalent:

int sum(int *arr, int size) {
    int total = 0;
    for (int i = 0; i < size; i++) {
        total += arr[i];
    }
    return total;
}

Assembly:

sum:
    xor eax, eax                ; total = 0
    xor ecx, ecx                ; i = 0
loop:
    cmp ecx, esi
    jge done
    add eax, [rdi + rcx*4]
    inc ecx
    jmp loop
done:
    ret

Example 2: Find Maximum

C Equivalent:

int find_max(int *arr, int size) {
    int max = arr[0];
    for (int i = 1; i < size; i++) {
        if (arr[i] > max) max = arr[i];
    }
    return max;
}

Assembly:

find_max:
    mov eax, [rdi]              ; max = arr[0]
    mov ecx, 1                  ; i = 1
loop:
    cmp ecx, esi
    jge done
    cmp eax, [rdi + rcx*4]
    jge skip
    mov eax, [rdi + rcx*4]      ; max = arr[i]
skip:
    inc ecx
    jmp loop
done:
    ret

Part 3: String Basics

Strings in Assembly

Strings are byte arrays terminated by null (0).

section .data
    msg db "Hello", 0           ; 6 bytes: 'H','e','l','l','o',0
    msg_len equ $ - msg         ; Length = 6

Manual String Length

C Equivalent:

size_t strlen(const char *str) {
    size_t len = 0;
    while (str[len] != '\0') {
        len++;
    }
    return len;
}

Assembly:

my_strlen:
    xor eax, eax                ; len = 0
loop:
    cmp byte [rdi + rax], 0     ; Check for null
    je done
    inc rax
    jmp loop
done:
    ret

Part 4: String Instructions

x86 provides specialized string instructions that operate on memory blocks.

Direction Flag (DF)

Controls direction of string operations:

cld                             ; Clear DF (forward direction)
std                             ; Set DF (backward direction)

String Instructions Overview

┌──────────┬─────────────────────────────────────────────────┐
│ Instr    │ Operation                                       │
├──────────┼─────────────────────────────────────────────────┤
│ MOVSB    │ Move byte: [RDI] = [RSI], RSI++, RDI++         │
│ MOVSW    │ Move word (2 bytes)                             │
│ MOVSD    │ Move dword (4 bytes)                            │
│ MOVSQ    │ Move qword (8 bytes)                            │
├──────────┼─────────────────────────────────────────────────┤
│ LODSB    │ Load byte: AL = [RSI], RSI++                   │
│ LODSW    │ Load word: AX = [RSI], RSI += 2                │
│ LODSD    │ Load dword: EAX = [RSI], RSI += 4              │
│ LODSQ    │ Load qword: RAX = [RSI], RSI += 8              │
├──────────┼─────────────────────────────────────────────────┤
│ STOSB    │ Store byte: [RDI] = AL, RDI++                  │
│ STOSW    │ Store word: [RDI] = AX, RDI += 2               │
│ STOSD    │ Store dword: [RDI] = EAX, RDI += 4             │
│ STOSQ    │ Store qword: [RDI] = RAX, RDI += 8             │
├──────────┼─────────────────────────────────────────────────┤
│ SCASB    │ Scan byte: Compare AL with [RDI], RDI++        │
│ SCASW    │ Scan word                                       │
│ SCASD    │ Scan dword                                      │
│ SCASQ    │ Scan qword                                      │
├──────────┼─────────────────────────────────────────────────┤
│ CMPSB    │ Compare byte: [RSI] with [RDI], RSI++, RDI++   │
│ CMPSW    │ Compare word                                    │
│ CMPSD    │ Compare dword                                   │
│ CMPSQ    │ Compare qword                                   │
└──────────┴─────────────────────────────────────────────────┘

REP Prefix - Repeat

Repeats string instruction RCX times:

rep movsb                       ; Repeat MOVSB RCX times
rep stosb                       ; Repeat STOSB RCX times

Conditional repeats:

repe cmpsb                      ; Repeat while equal (and RCX > 0)
repne scasb                     ; Repeat while not equal (and RCX > 0)

Part 5: String Operations Examples

Example 1: Memory Copy (memcpy)

C Equivalent:

void *memcpy(void *dest, const void *src, size_t n) {
    char *d = dest;
    const char *s = src;
    while (n--) {
        *d++ = *s++;
    }
    return dest;
}

Assembly (Manual):

my_memcpy:
    mov rax, rdi                ; Save dest
    test rdx, rdx
    jz done
loop:
    mov cl, [rsi]
    mov [rdi], cl
    inc rsi
    inc rdi
    dec rdx
    jnz loop
done:
    ret

Assembly (Optimized with REP):

my_memcpy:
    mov rax, rdi                ; Save dest
    mov rcx, rdx                ; Count
    cld                         ; Forward direction
    rep movsb                   ; Copy RCX bytes from RSI to RDI
    ret

Example 2: Memory Set (memset)

C Equivalent:

void *memset(void *s, int c, size_t n) {
    unsigned char *p = s;
    while (n--) {
        *p++ = (unsigned char)c;
    }
    return s;
}

Assembly:

my_memset:
    mov rax, rdi                ; Save dest
    mov al, sil                 ; Value to set (2nd arg, low byte)
    mov rcx, rdx                ; Count
    cld
    rep stosb                   ; Set RCX bytes at RDI to AL
    ret

Example 3: String Copy (strcpy)

C Equivalent:

char *strcpy(char *dest, const char *src) {
    char *orig = dest;
    while ((*dest++ = *src++) != '\0');
    return orig;
}

Assembly:

my_strcpy:
    mov rax, rdi                ; Save dest
loop:
    lodsb                       ; AL = [RSI++]
    stosb                       ; [RDI++] = AL
    test al, al
    jnz loop
    ret

Example 4: String Length (strlen)

C Equivalent:

size_t strlen(const char *s) {
    size_t len = 0;
    while (s[len]) len++;
    return len;
}

Assembly (with SCASB):

my_strlen:
    mov rax, rdi                ; Save start
    xor al, al                  ; Search for null (0)
    mov rcx, -1                 ; Max count (search forever)
    cld
    repne scasb                 ; Scan until AL found or RCX=0
    ; RDI now points one past null
    mov rax, rdi
    sub rax, [rsp+8]            ; Original rdi (from caller)
    dec rax                     ; Don't count null
    ret
    
; Simpler version:
my_strlen_simple:
    xor rax, rax
loop:
    cmp byte [rdi + rax], 0
    je done
    inc rax
    jmp loop
done:
    ret

Example 5: String Compare (strcmp)

C Equivalent:

int strcmp(const char *s1, const char *s2) {
    while (*s1 && (*s1 == *s2)) {
        s1++;
        s2++;
    }
    return *(unsigned char*)s1 - *(unsigned char*)s2;
}

Assembly:

my_strcmp:
loop:
    mov al, [rdi]
    mov cl, [rsi]
    cmp al, cl
    jne not_equal
    test al, al                 ; Check if null
    jz equal
    inc rdi
    inc rsi
    jmp loop
    
equal:
    xor eax, eax                ; Return 0
    ret
    
not_equal:
    movzx eax, al
    movzx ecx, cl
    sub eax, ecx
    ret

Part 6: Multi-Dimensional Arrays

C Equivalent:

int matrix[3][4] = {
    {1, 2, 3, 4},
    {5, 6, 7, 8},
    {9, 10, 11, 12}
};

int get(int row, int col) {
    return matrix[row][col];
}

Assembly:

section .data
    matrix dd 1,2,3,4, 5,6,7,8, 9,10,11,12
    COLS equ 4

section .text
; int get(int row, int col)
; Args: row=EDI, col=ESI
get:
    imul edi, COLS              ; row * COLS
    add edi, esi                ; row * COLS + col
    mov rbx, matrix
    mov eax, [rbx + rdi*4]      ; matrix[row][col]
    ret

✅ Practice Exercises

Exercise 1: Reverse String

void reverse(char *str) {
    int len = strlen(str);
    for (int i = 0; i < len/2; i++) {
        char temp = str[i];
        str[i] = str[len-1-i];
        str[len-1-i] = temp;
    }
}
Solution ```nasm reverse: push rbx mov rbx, rdi ; str call my_strlen ; RAX = len xor rcx, rcx ; i = 0 lea rdx, [rax - 1] ; j = len - 1 loop: cmp rcx, rdx jge done ; Swap str[i] and str[j] mov al, [rbx + rcx] mov ah, [rbx + rdx] mov [rbx + rcx], ah mov [rbx + rdx], al inc rcx dec rdx jmp loop done: pop rbx ret ```

📋 Quick Reference

; String Instructions
cld             ; Clear direction flag (forward)
std             ; Set direction flag (backward)
rep movsb       ; Copy RCX bytes: RSI → RDI
rep stosb       ; Fill RCX bytes: AL → RDI
repne scasb     ; Find AL in [RDI], max RCX bytes

; Array Access
mov eax, [array + rcx*4]        ; array[index] (int)
mov rax, [array + rcx*8]        ; array[index] (long/ptr)
lea rax, [base + index*8 + off] ; Calculate address

🎉 Great! You’ve mastered arrays and strings!

Next: Topic 13: Multiplication & Division


← Previous Topic Back to Main Next Topic →