driver-know-hows

device driver related stuff

View on GitHub

Chapter 5: Concurrency and Synchronization

Table of Contents

  1. Introduction to Kernel Concurrency
  2. Atomic Operations
  3. Spinlocks
  4. Mutexes
  5. Semaphores
  6. Read-Write Locks
  7. Completion
  8. RCU (Read-Copy-Update)
  9. Memory Barriers
  10. Lock-Free Data Structures
  11. Choosing the Right Synchronization

Introduction to Kernel Concurrency

Sources of Concurrency in the Kernel

Concurrent Execution Contexts:

1. Multiple CPUs (SMP - Symmetric Multi-Processing)
   - Different CPUs executing same driver code simultaneously
   
2. Preemption
   - Higher priority task preempts current task
   - Can happen at any time (with CONFIG_PREEMPT)
   
3. Interrupts
   - Interrupt handler runs, interrupting current execution
   - Can access same data as interrupted code
   
4. Softirqs and Tasklets
   - Bottom-half handlers can run on any CPU
   
5. Kernel Preemption
   - Kernel can be preempted (since 2.6 kernels)

Race Conditions

/*
 * Example: Race condition (BUG!)
 */
static int counter = 0;  /* Shared variable */

void increment_counter(void)
{
    /*
     * This looks atomic but it's NOT!
     * 
     * Assembly:
     *   LOAD  counter -> register
     *   ADD   1 -> register  
     *   STORE register -> counter
     * 
     * If two CPUs execute this simultaneously:
     * 
     * CPU 0: LOAD 0
     * CPU 1: LOAD 0
     * CPU 0: ADD  -> 1
     * CPU 1: ADD  -> 1
     * CPU 0: STORE 1
     * CPU 1: STORE 1
     * 
     * Result: counter = 1 (should be 2!)
     */
    counter++;
}

Critical Sections

/*
 * Critical Section - Code that accesses shared data
 * 
 * Must be protected with synchronization primitives
 */

/* Unprotected (WRONG!) */
void unprotected_function(void)
{
    /* Critical section - accessing shared data */
    shared_var++;  /* RACE CONDITION! */
}

/* Protected (CORRECT) */
spinlock_t my_lock;

void protected_function(void)
{
    spin_lock(&my_lock);
    /* Critical section - safely accessing shared data */
    shared_var++;
    spin_unlock(&my_lock);
}

Atomic Operations

Atomic Integers

#include <linux/atomic.h>

/*
 * atomic_t - Atomic integer type
 * 
 * Operations on atomic_t are guaranteed to be atomic
 * (no race conditions, no need for locks)
 * 
 * Use for: Simple counters, flags
 */

typedef struct {
    int counter;
} atomic_t;

/*
 * Initialization
 */
atomic_t my_counter = ATOMIC_INIT(0);  /* Initialize to 0 */

/*
 * Basic operations
 */
void atomic_set(atomic_t *v, int i);        /* Set value */
int atomic_read(const atomic_t *v);         /* Read value */

/*
 * Arithmetic operations
 */
void atomic_add(int i, atomic_t *v);        /* Add i to v */
void atomic_sub(int i, atomic_t *v);        /* Subtract i from v */
void atomic_inc(atomic_t *v);               /* Increment by 1 */
void atomic_dec(atomic_t *v);               /* Decrement by 1 */

/*
 * Arithmetic with return value
 */
int atomic_add_return(int i, atomic_t *v);  /* Add and return new value */
int atomic_sub_return(int i, atomic_t *v);  /* Subtract and return new value */
int atomic_inc_return(atomic_t *v);         /* Increment and return */
int atomic_dec_return(atomic_t *v);         /* Decrement and return */

/*
 * Test operations (return true/false)
 */
int atomic_inc_and_test(atomic_t *v);       /* Increment and test if zero */
int atomic_dec_and_test(atomic_t *v);       /* Decrement and test if zero */
int atomic_sub_and_test(int i, atomic_t *v); /* Subtract and test if zero */
int atomic_add_negative(int i, atomic_t *v); /* Add and test if negative */

/*
 * Compare and exchange
 */
int atomic_cmpxchg(atomic_t *v, int old, int new);  /* If v==old, set v=new */

/*
 * 64-bit atomic operations (atomic64_t)
 */
typedef struct {
    long counter;
} atomic64_t;

/* Same operations with atomic64_ prefix */
atomic64_t my_big_counter = ATOMIC64_INIT(0);
void atomic64_add(long i, atomic64_t *v);
long atomic64_read(const atomic64_t *v);
/* ... etc ... */

Atomic Operations Examples

/*
 * Example 1: Reference counting
 */
struct my_object {
    atomic_t refcount;
    void *data;
};

static struct my_object *create_object(void)
{
    struct my_object *obj;
    
    obj = kzalloc(sizeof(*obj), GFP_KERNEL);
    if (!obj)
        return NULL;
    
    /* Initialize reference count to 1 */
    atomic_set(&obj->refcount, 1);
    
    return obj;
}

static void get_object(struct my_object *obj)
{
    /* Increment reference count */
    atomic_inc(&obj->refcount);
    pr_info("Object ref count: %d\n", atomic_read(&obj->refcount));
}

static void put_object(struct my_object *obj)
{
    /* Decrement and test if zero */
    if (atomic_dec_and_test(&obj->refcount)) {
        /* Last reference released - free object */
        pr_info("Freeing object (ref count reached 0)\n");
        kfree(obj->data);
        kfree(obj);
    } else {
        pr_info("Object ref count: %d\n", atomic_read(&obj->refcount));
    }
}

/*
 * Example 2: Statistics counters
 */
struct device_stats {
    atomic64_t packets_received;
    atomic64_t packets_sent;
    atomic64_t errors;
    atomic64_t bytes_received;
};

static struct device_stats stats = {
    .packets_received = ATOMIC64_INIT(0),
    .packets_sent = ATOMIC64_INIT(0),
    .errors = ATOMIC64_INIT(0),
    .bytes_received = ATOMIC64_INIT(0),
};

static void record_rx_packet(size_t bytes)
{
    atomic64_inc(&stats.packets_received);
    atomic64_add(bytes, &stats.bytes_received);
}

static void record_error(void)
{
    atomic64_inc(&stats.errors);
}

static void print_stats(void)
{
    pr_info("RX packets: %lld\n", atomic64_read(&stats.packets_received));
    pr_info("TX packets: %lld\n", atomic64_read(&stats.packets_sent));
    pr_info("Errors: %lld\n", atomic64_read(&stats.errors));
    pr_info("RX bytes: %lld\n", atomic64_read(&stats.bytes_received));
}

/*
 * Example 3: Compare and swap
 */
static atomic_t state = ATOMIC_INIT(0);

#define STATE_IDLE    0
#define STATE_RUNNING 1
#define STATE_STOPPED 2

static int try_start(void)
{
    int old_state;
    
    /* Try to transition from IDLE to RUNNING */
    old_state = atomic_cmpxchg(&state, STATE_IDLE, STATE_RUNNING);
    
    if (old_state == STATE_IDLE) {
        pr_info("Successfully started\n");
        return 0;
    } else {
        pr_info("Cannot start, state is %d\n", old_state);
        return -EBUSY;
    }
}

Atomic Bitwise Operations

#include <asm/bitops.h>

/*
 * Atomic bit operations on unsigned long
 */

/* Set bit */
void set_bit(int nr, volatile unsigned long *addr);

/* Clear bit */
void clear_bit(int nr, volatile unsigned long *addr);

/* Toggle bit */
void change_bit(int nr, volatile unsigned long *addr);

/* Test and set bit (returns old value) */
int test_and_set_bit(int nr, volatile unsigned long *addr);

/* Test and clear bit (returns old value) */
int test_and_clear_bit(int nr, volatile unsigned long *addr);

/* Test and change bit (returns old value) */
int test_and_change_bit(int nr, volatile unsigned long *addr);

/* Test bit (non-atomic) */
int test_bit(int nr, const volatile unsigned long *addr);

/*
 * Example: Device flags
 */
#define FLAG_DEVICE_OPEN     0
#define FLAG_DMA_ACTIVE      1
#define FLAG_IRQ_ENABLED     2
#define FLAG_SUSPENDED       3

static unsigned long device_flags = 0;

static int device_open(void)
{
    /* Try to set OPEN flag, return error if already set */
    if (test_and_set_bit(FLAG_DEVICE_OPEN, &device_flags)) {
        pr_err("Device is already open\n");
        return -EBUSY;
    }
    
    pr_info("Device opened\n");
    return 0;
}

static void device_close(void)
{
    clear_bit(FLAG_DEVICE_OPEN, &device_flags);
    pr_info("Device closed\n");
}

static void enable_dma(void)
{
    if (!test_bit(FLAG_DEVICE_OPEN, &device_flags)) {
        pr_err("Device not open\n");
        return;
    }
    
    set_bit(FLAG_DMA_ACTIVE, &device_flags);
    pr_info("DMA enabled\n");
}

Spinlocks

Theory: Spinlocks

Spinlocks are the most basic locking primitive:

#include <linux/spinlock.h>

/*
 * spinlock_t - Spinlock type
 */
typedef struct spinlock {
    /* ... implementation details ... */
} spinlock_t;

/*
 * Initialization
 */

/* Static initialization */
DEFINE_SPINLOCK(my_lock);

/* Dynamic initialization */
spinlock_t my_lock;
spin_lock_init(&my_lock);

/*
 * Basic locking (disables preemption)
 */
void spin_lock(spinlock_t *lock);
void spin_unlock(spinlock_t *lock);

/*
 * Locking with IRQ disabling (safest for interrupt-shared data)
 */
void spin_lock_irq(spinlock_t *lock);       /* Disable IRQs + acquire lock */
void spin_unlock_irq(spinlock_t *lock);     /* Release lock + enable IRQs */

/*
 * Locking with IRQ save/restore (nested interrupt-safe)
 */
void spin_lock_irqsave(spinlock_t *lock, unsigned long flags);
void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags);

/*
 * Bottom-half locking (disables softirqs)
 */
void spin_lock_bh(spinlock_t *lock);
void spin_unlock_bh(spinlock_t *lock);

/*
 * Try-lock (doesn't wait)
 */
int spin_trylock(spinlock_t *lock);  /* Returns non-zero if lock acquired */

/*
 * Check if locked (for debugging)
 */
int spin_is_locked(spinlock_t *lock);

Spinlock Examples

/*
 * Example 1: Basic spinlock usage
 */
struct shared_data {
    spinlock_t lock;
    int value;
    struct list_head items;
};

static struct shared_data data;

static void shared_data_init(void)
{
    spin_lock_init(&data.lock);
    data.value = 0;
    INIT_LIST_HEAD(&data.items);
}

static void update_value(int new_value)
{
    /* Acquire lock */
    spin_lock(&data.lock);
    
    /* Critical section */
    data.value = new_value;
    pr_info("Updated value to %d\n", data.value);
    
    /* Release lock */
    spin_unlock(&data.lock);
}

/*
 * Example 2: Interrupt-safe locking
 */
static spinlock_t irq_lock;
static int shared_counter = 0;

/* Called from both process context and interrupt handler */
static void increment_counter(void)
{
    unsigned long flags;
    
    /*
     * Save IRQ state and disable IRQs
     * This is necessary because interrupt handler also accesses shared_counter
     */
    spin_lock_irqsave(&irq_lock, flags);
    
    shared_counter++;
    
    /* Restore IRQ state */
    spin_unlock_irqrestore(&irq_lock, flags);
}

/* Interrupt handler */
static irqreturn_t my_interrupt_handler(int irq, void *dev_id)
{
    unsigned long flags;
    
    /* Same locking in interrupt context */
    spin_lock_irqsave(&irq_lock, flags);
    shared_counter++;
    spin_unlock_irqrestore(&irq_lock, flags);
    
    return IRQ_HANDLED;
}

/*
 * Example 3: Try-lock pattern
 */
static void try_lock_example(void)
{
    spinlock_t lock;
    
    spin_lock_init(&lock);
    
    if (spin_trylock(&lock)) {
        /* Got the lock */
        pr_info("Lock acquired\n");
        
        /* Critical section */
        
        spin_unlock(&lock);
    } else {
        /* Lock was held by someone else */
        pr_info("Lock busy, skipping operation\n");
    }
}

/*
 * Example 4: Multiple spinlocks (deadlock prevention)
 */
static spinlock_t lock_a;
static spinlock_t lock_b;

static void acquire_both_locks(void)
{
    /*
     * Lock ordering rule: Always acquire locks in same order!
     * 
     * If one function does:    lock_a -> lock_b
     * And another does:        lock_b -> lock_a
     * 
     * This can deadlock!
     */
    
    /* CORRECT: Always same order */
    spin_lock(&lock_a);
    spin_lock(&lock_b);
    
    /* Critical section with both locks */
    
    spin_unlock(&lock_b);
    spin_unlock(&lock_a);
}

Spinlock Best Practices

/*
 * DO:
 * - Keep critical sections as short as possible
 * - Use spin_lock_irqsave when data is shared with interrupts
 * - Always unlock in same function that locked
 * - Always acquire multiple locks in same order
 * 
 * DON'T:
 * - Sleep while holding spinlock
 * - Call functions that might sleep (kmalloc with GFP_KERNEL, copy_to_user, etc.)
 * - Hold spinlock for long time (> few microseconds)
 * - Take the same spinlock recursively (will deadlock!)
 */

/* BAD: Sleeping while holding spinlock */
void bad_example(void)
{
    spin_lock(&my_lock);
    
    /* BUG: kmalloc with GFP_KERNEL can sleep! */
    void *buf = kmalloc(1024, GFP_KERNEL);
    
    spin_unlock(&my_lock);
}

/* GOOD: Allocate before locking */
void good_example(void)
{
    void *buf;
    
    /* Allocate before acquiring lock */
    buf = kmalloc(1024, GFP_KERNEL);
    if (!buf)
        return;
    
    spin_lock(&my_lock);
    /* Use buf... */
    spin_unlock(&my_lock);
    
    kfree(buf);
}

/* GOOD: Use GFP_ATOMIC if you must allocate while locked */
void atomic_alloc_example(void)
{
    void *buf;
    
    spin_lock(&my_lock);
    
    /* GFP_ATOMIC won't sleep */
    buf = kmalloc(1024, GFP_ATOMIC);
    if (!buf) {
        spin_unlock(&my_lock);
        return;
    }
    
    /* Use buf... */
    
    spin_unlock(&my_lock);
    kfree(buf);
}

Mutexes

Theory: Mutexes

Mutexes (mutual exclusion locks):

#include <linux/mutex.h>

/*
 * struct mutex - Mutex type
 */
struct mutex {
    /* ... implementation details ... */
};

/*
 * Initialization
 */

/* Static initialization */
DEFINE_MUTEX(my_mutex);

/* Dynamic initialization */
struct mutex my_mutex;
mutex_init(&my_mutex);

/*
 * Locking operations
 */

/* Lock mutex (sleeps if unavailable) */
void mutex_lock(struct mutex *lock);

/* Unlock mutex */
void mutex_unlock(struct mutex *lock);

/* Try to lock (doesn't sleep, returns true if acquired) */
int mutex_trylock(struct mutex *lock);

/* Lock, but can be interrupted by signals */
int mutex_lock_interruptible(struct mutex *lock);
/* Returns 0 if locked, -EINTR if interrupted */

/* Lock with timeout */
int mutex_lock_killable(struct mutex *lock);

/*
 * Status check
 */
int mutex_is_locked(struct mutex *lock);

Mutex Examples

/*
 * Example 1: Protecting device state
 */
struct my_device {
    struct mutex lock;
    int state;
    char buffer[256];
    size_t buffer_len;
};

static struct my_device *dev;

static int device_open(struct inode *inode, struct file *filp)
{
    int ret = 0;
    
    /* Acquire mutex (can sleep) */
    mutex_lock(&dev->lock);
    
    if (dev->state != STATE_IDLE) {
        pr_err("Device busy\n");
        ret = -EBUSY;
        goto out;
    }
    
    dev->state = STATE_OPEN;
    dev->buffer_len = 0;
    
out:
    mutex_unlock(&dev->lock);
    return ret;
}

static ssize_t device_write(struct file *filp, const char __user *buf,
                           size_t count, loff_t *f_pos)
{
    size_t to_copy;
    
    mutex_lock(&dev->lock);
    
    /* Calculate how much we can copy */
    to_copy = min(count, sizeof(dev->buffer) - dev->buffer_len);
    
    /* Copy from user (can sleep - OK because we're using mutex) */
    if (copy_from_user(dev->buffer + dev->buffer_len, buf, to_copy)) {
        mutex_unlock(&dev->lock);
        return -EFAULT;
    }
    
    dev->buffer_len += to_copy;
    
    mutex_unlock(&dev->lock);
    return to_copy;
}

/*
 * Example 2: Interruptible locking
 */
static ssize_t device_read_interruptible(struct file *filp, char __user *buf,
                                        size_t count, loff_t *f_pos)
{
    int ret;
    
    /*
     * Use interruptible lock so user can Ctrl+C if blocked
     */
    ret = mutex_lock_interruptible(&dev->lock);
    if (ret)
        return -ERESTARTSYS;  /* Interrupted by signal */
    
    /* ... perform read ... */
    
    mutex_unlock(&dev->lock);
    return 0;
}

/*
 * Example 3: Try-lock pattern
 */
static int try_operation(void)
{
    if (!mutex_trylock(&dev->lock)) {
        pr_info("Device busy, operation skipped\n");
        return -EBUSY;
    }
    
    /* Got lock, perform operation */
    
    mutex_unlock(&dev->lock);
    return 0;
}

/*
 * Example 4: RAII-style locking with goto
 */
static int complex_operation(void)
{
    int ret = 0;
    void *buffer = NULL;
    
    mutex_lock(&dev->lock);
    
    /* Allocate resources */
    buffer = kmalloc(4096, GFP_KERNEL);
    if (!buffer) {
        ret = -ENOMEM;
        goto out_unlock;
    }
    
    /* Do work */
    ret = do_something(buffer);
    if (ret < 0)
        goto out_free;
    
    /* Success path */
    ret = 0;
    
out_free:
    kfree(buffer);
out_unlock:
    mutex_unlock(&dev->lock);
    return ret;
}

Spinlock vs Mutex Comparison

/*
 * When to use Spinlock vs Mutex
 */

/* Use SPINLOCK when:
 * - Critical section is very short (< few microseconds)
 * - In interrupt context
 * - Cannot sleep (atomic context)
 * - Need lowest latency
 * 
 * Example: Incrementing a counter, adding to list
 */
spinlock_t fast_lock;

void quick_operation(void)
{
    spin_lock(&fast_lock);
    quick_counter++;
    spin_unlock(&fast_lock);
}

/* Use MUTEX when:
 * - Critical section is longer
 * - In process context
 * - May need to sleep (copy_to_user, kmalloc with GFP_KERNEL)
 * - Want fair queuing of waiters
 * 
 * Example: File I/O, complex operations
 */
struct mutex slow_lock;

void slow_operation(void)
{
    mutex_lock(&slow_lock);
    
    /* Can call functions that sleep */
    buffer = kmalloc(4096, GFP_KERNEL);
    copy_from_user(buffer, user_buf, size);
    perform_io_operation();
    
    mutex_unlock(&slow_lock);
    kfree(buffer);
}

Semaphores

Theory: Semaphores

Semaphores are counting locks:

#include <linux/semaphore.h>

/*
 * struct semaphore - Semaphore type
 */
struct semaphore {
    /* ... implementation details ... */
};

/*
 * Initialization
 */

/* Initialize with count */
void sema_init(struct semaphore *sem, int val);

/* Common: Binary semaphore (like mutex) */
DEFINE_SEMAPHORE(my_sem);  /* Initializes to 1 */

/*
 * Operations
 */

/* Acquire semaphore (decrement count, sleep if 0) */
void down(struct semaphore *sem);

/* Acquire, interruptible */
int down_interruptible(struct semaphore *sem);

/* Acquire, killable */
int down_killable(struct semaphore *sem);

/* Try to acquire (non-blocking) */
int down_trylock(struct semaphore *sem);  /* Returns 0 if acquired */

/* Acquire with timeout */
int down_timeout(struct semaphore *sem, long jiffies);

/* Release semaphore (increment count) */
void up(struct semaphore *sem);

Semaphore Examples

/*
 * Example 1: Resource pool (N resources)
 */
#define MAX_DEVICES 4

struct resource_pool {
    struct semaphore available;
    struct device *devices[MAX_DEVICES];
    spinlock_t lock;
};

static struct resource_pool pool;

static int pool_init(void)
{
    int i;
    
    /* Initialize semaphore to MAX_DEVICES */
    sema_init(&pool.available, MAX_DEVICES);
    spin_lock_init(&pool.lock);
    
    for (i = 0; i < MAX_DEVICES; i++) {
        pool.devices[i] = allocate_device(i);
        if (!pool.devices[i])
            return -ENOMEM;
    }
    
    return 0;
}

static struct device *acquire_device(void)
{
    struct device *dev = NULL;
    int i;
    
    /* Wait for available device (decrements count) */
    if (down_interruptible(&pool.available))
        return NULL;  /* Interrupted */
    
    /* Find and mark device as used */
    spin_lock(&pool.lock);
    for (i = 0; i < MAX_DEVICES; i++) {
        if (pool.devices[i] && !device_is_busy(pool.devices[i])) {
            dev = pool.devices[i];
            mark_device_busy(dev);
            break;
        }
    }
    spin_unlock(&pool.lock);
    
    return dev;
}

static void release_device(struct device *dev)
{
    spin_lock(&pool.lock);
    mark_device_idle(dev);
    spin_unlock(&pool.lock);
    
    /* Release device back to pool (increments count) */
    up(&pool.available);
}

/*
 * Example 2: Producer-consumer with bounded buffer
 */
#define BUFFER_SIZE 10

struct bounded_buffer {
    struct semaphore full;      /* Counts full slots */
    struct semaphore empty;     /* Counts empty slots */
    spinlock_t lock;            /* Protects buffer access */
    int buffer[BUFFER_SIZE];
    int head, tail;
};

static struct bounded_buffer bb;

static void buffer_init(void)
{
    sema_init(&bb.full, 0);             /* Initially no full slots */
    sema_init(&bb.empty, BUFFER_SIZE);  /* Initially all empty */
    spin_lock_init(&bb.lock);
    bb.head = bb.tail = 0;
}

static int produce(int item)
{
    /* Wait for empty slot */
    if (down_interruptible(&bb.empty))
        return -ERESTARTSYS;
    
    /* Add item to buffer */
    spin_lock(&bb.lock);
    bb.buffer[bb.tail] = item;
    bb.tail = (bb.tail + 1) % BUFFER_SIZE;
    spin_unlock(&bb.lock);
    
    /* Signal that there's a full slot */
    up(&bb.full);
    
    return 0;
}

static int consume(int *item)
{
    /* Wait for full slot */
    if (down_interruptible(&bb.full))
        return -ERESTARTSYS;
    
    /* Remove item from buffer */
    spin_lock(&bb.lock);
    *item = bb.buffer[bb.head];
    bb.head = (bb.head + 1) % BUFFER_SIZE;
    spin_unlock(&bb.lock);
    
    /* Signal that there's an empty slot */
    up(&bb.empty);
    
    return 0;
}

Read-Write Locks

Theory: Reader-Writer Locks

Allow multiple concurrent readers OR single writer:

#include <linux/rwlock.h>
#include <linux/rwsem.h>

/*
 * Spinlock-based reader-writer lock (rwlock_t)
 * - Fast, atomic context
 * - Busy-waiting
 */
typedef struct {
    /* ... implementation ... */
} rwlock_t;

/* Initialization */
DEFINE_RWLOCK(my_rwlock);

rwlock_t my_rwlock;
rwlock_init(&my_rwlock);

/* Reader operations */
void read_lock(rwlock_t *lock);
void read_unlock(rwlock_t *lock);
void read_lock_irq(rwlock_t *lock);
void read_unlock_irq(rwlock_t *lock);
void read_lock_irqsave(rwlock_t *lock, unsigned long flags);
void read_unlock_irqrestore(rwlock_t *lock, unsigned long flags);

/* Writer operations */
void write_lock(rwlock_t *lock);
void write_unlock(rwlock_t *lock);
void write_lock_irq(rwlock_t *lock);
void write_unlock_irq(rwlock_t *lock);
void write_lock_irqsave(rwlock_t *lock, unsigned long flags);
void write_unlock_irqrestore(rwlock_t *lock, unsigned long flags);

/*
 * Semaphore-based reader-writer lock (rw_semaphore)
 * - Sleeping, process context
 * - More scalable
 */
struct rw_semaphore {
    /* ... implementation ... */
};

/* Initialization */
DECLARE_RWSEM(my_rwsem);

struct rw_semaphore my_rwsem;
init_rwsem(&my_rwsem);

/* Reader operations */
void down_read(struct rw_semaphore *sem);
void up_read(struct rw_semaphore *sem);
int down_read_trylock(struct rw_semaphore *sem);
int down_read_interruptible(struct rw_semaphore *sem);

/* Writer operations */
void down_write(struct rw_semaphore *sem);
void up_write(struct rw_semaphore *sem);
int down_write_trylock(struct rw_semaphore *sem);
int down_write_interruptible(struct rw_semaphore *sem);

Read-Write Lock Examples

/*
 * Example 1: Read-heavy data structure
 */
struct data_cache {
    rwlock_t lock;
    struct list_head entries;
    int count;
};

static struct data_cache cache;

/* Readers (can run concurrently) */
static int lookup_entry(int id)
{
    struct entry *e;
    int found = 0;
    
    /* Multiple readers can hold lock */
    read_lock(&cache.lock);
    
    list_for_each_entry(e, &cache.entries, list) {
        if (e->id == id) {
            found = 1;
            break;
        }
    }
    
    read_unlock(&cache.lock);
    return found;
}

/* Writer (exclusive access) */
static void add_entry(struct entry *new_entry)
{
    /* Only one writer, no readers */
    write_lock(&cache.lock);
    
    list_add_tail(&new_entry->list, &cache.entries);
    cache.count++;
    
    write_unlock(&cache.lock);
}

/*
 * Example 2: Configuration data (rw_semaphore)
 */
struct config {
    struct rw_semaphore rwsem;
    char settings[256];
    int enabled;
};

static struct config cfg;

/* Many threads can read config simultaneously */
static int read_config(char *buf, size_t len)
{
    int ret;
    
    down_read(&cfg.rwsem);
    
    if (!cfg.enabled) {
        ret = -EINVAL;
        goto out;
    }
    
    strncpy(buf, cfg.settings, len);
    ret = 0;
    
out:
    up_read(&cfg.rwsem);
    return ret;
}

/* Writer has exclusive access */
static int update_config(const char *new_settings)
{
    down_write(&cfg.rwsem);
    
    strncpy(cfg.settings, new_settings, sizeof(cfg.settings));
    cfg.settings[sizeof(cfg.settings) - 1] = '\0';
    
    up_write(&cfg.rwsem);
    
    pr_info("Config updated\n");
    return 0;
}

Completion

Theory: Completion

Completions are for signaling event completion:

#include <linux/completion.h>

struct completion {
    /* ... implementation ... */
};

/* Initialization */
DECLARE_COMPLETION(my_comp);

struct completion my_comp;
init_completion(&my_comp);

/* Reinitialize for reuse */
void reinit_completion(struct completion *x);

/* Wait for completion */
void wait_for_completion(struct completion *x);

/* Wait, interruptible */
int wait_for_completion_interruptible(struct completion *x);

/* Wait with timeout (returns 0 if timeout, >0 if completed) */
unsigned long wait_for_completion_timeout(struct completion *x,
                                         unsigned long timeout);

/* Signal completion */
void complete(struct completion *x);      /* Wake one waiter */
void complete_all(struct completion *x);  /* Wake all waiters */

Completion Examples

/*
 * Example 1: Waiting for initialization
 */
static struct completion init_done;
static struct task_struct *init_thread;

static int initialization_thread(void *data)
{
    pr_info("Init thread: Starting initialization...\n");
    
    /* Perform long initialization */
    ssleep(2);
    
    pr_info("Init thread: Initialization complete!\n");
    
    /* Signal completion */
    complete(&init_done);
    
    return 0;
}

static int start_device(void)
{
    init_completion(&init_done);
    
    /* Start initialization thread */
    init_thread = kthread_run(initialization_thread, NULL, "init_thread");
    if (IS_ERR(init_thread))
        return PTR_ERR(init_thread);
    
    pr_info("Main: Waiting for initialization...\n");
    
    /* Wait for initialization to complete */
    wait_for_completion(&init_done);
    
    pr_info("Main: Initialization done, proceeding\n");
    
    return 0;
}

/*
 * Example 2: Waiting for I/O with timeout
 */
struct io_request {
    struct completion done;
    int result;
    void *data;
};

static void start_io(struct io_request *req)
{
    init_completion(&req->done);
    
    /* Initiate hardware I/O operation */
    hardware_start_io(req);
}

/* Called by interrupt handler when I/O completes */
static void io_complete_handler(struct io_request *req, int result)
{
    req->result = result;
    
    /* Signal completion */
    complete(&req->done);
}

static int wait_for_io(struct io_request *req)
{
    unsigned long timeout = msecs_to_jiffies(5000);  /* 5 seconds */
    unsigned long ret;
    
    /* Wait for I/O with timeout */
    ret = wait_for_completion_timeout(&req->done, timeout);
    
    if (ret == 0) {
        pr_err("I/O timeout!\n");
        return -ETIMEDOUT;
    }
    
    if (req->result < 0) {
        pr_err("I/O error: %d\n", req->result);
        return req->result;
    }
    
    pr_info("I/O completed successfully\n");
    return 0;
}

/*
 * Example 3: Multiple waiters
 */
static struct completion shutdown_comp;

static void notify_shutdown(void)
{
    pr_info("Notifying all threads of shutdown\n");
    
    /* Wake all waiting threads */
    complete_all(&shutdown_comp);
}

static int worker_thread(void *data)
{
    int id = (long)data;
    
    pr_info("Thread %d: Waiting for shutdown signal\n", id);
    
    /* Wait for shutdown */
    wait_for_completion(&shutdown_comp);
    
    pr_info("Thread %d: Shutdown signal received, exiting\n", id);
    
    return 0;
}

RCU (Read-Copy-Update)

Theory: RCU

RCU is an advanced lock-free synchronization mechanism:

#include <linux/rcupdate.h>

/*
 * RCU operations
 */

/* Reader operations */
rcu_read_lock();          /* Enter RCU read-side critical section */
rcu_read_unlock();        /* Exit RCU read-side critical section */
rcu_dereference(ptr);     /* Dereference RCU-protected pointer */

/* Writer operations */
rcu_assign_pointer(ptr, new_ptr);  /* Publish new pointer */
synchronize_rcu();        /* Wait for all readers to finish */
call_rcu(head, func);     /* Schedule callback after grace period */

/*
 * Example: RCU-protected configuration
 */
struct config {
    int value1;
    int value2;
    char name[32];
};

static struct config __rcu *global_config;

/* Reader (lock-free!) */
static int read_config_value(void)
{
    struct config *cfg;
    int val;
    
    /* Enter RCU read-side critical section */
    rcu_read_lock();
    
    /* Dereference RCU-protected pointer */
    cfg = rcu_dereference(global_config);
    if (cfg)
        val = cfg->value1;
    else
        val = -1;
    
    /* Exit RCU read-side critical section */
    rcu_read_unlock();
    
    return val;
}

/* Writer (rare) */
static int update_config(int new_val1, int new_val2)
{
    struct config *new_cfg, *old_cfg;
    
    /* Allocate new config */
    new_cfg = kmalloc(sizeof(*new_cfg), GFP_KERNEL);
    if (!new_cfg)
        return -ENOMEM;
    
    /* Initialize new config */
    new_cfg->value1 = new_val1;
    new_cfg->value2 = new_val2;
    strcpy(new_cfg->name, "updated");
    
    /* Get old config */
    old_cfg = global_config;
    
    /* Publish new config (atomic pointer update) */
    rcu_assign_pointer(global_config, new_cfg);
    
    /* Wait for all readers to finish with old config */
    synchronize_rcu();
    
    /* Now safe to free old config */
    kfree(old_cfg);
    
    pr_info("Config updated\n");
    return 0;
}

Summary

In this chapter, you learned:

Atomic Operations: Lock-free counters and flags
Spinlocks: Fast locking for short critical sections
Mutexes: Sleeping locks for longer operations
Semaphores: Counting locks for resource pools
Read-Write Locks: Optimize for read-heavy workloads
Completions: Event synchronization
RCU: Advanced lock-free reads

Key Takeaways

  1. Choose the right primitive based on context and duration
  2. Keep critical sections short, especially with spinlocks
  3. Never sleep while holding spinlock
  4. Prevent deadlocks with consistent lock ordering
  5. Use atomic operations when possible (no locks needed)

Next Steps

Proceed to 06-interrupts.md to learn about handling hardware interrupts, bottom-halves, and deferred work.


Quick Reference

/* Atomic operations */
atomic_t counter = ATOMIC_INIT(0);
atomic_inc(&counter);
atomic_dec_and_test(&counter);

/* Spinlocks (short, atomic context) */
DEFINE_SPINLOCK(lock);
spin_lock_irqsave(&lock, flags);
/* Critical section */
spin_unlock_irqrestore(&lock, flags);

/* Mutexes (long, process context) */
DEFINE_MUTEX(mutex);
mutex_lock(&mutex);
/* Critical section (can sleep) */
mutex_unlock(&mutex);

/* Read-write locks */
DECLARE_RWSEM(rwsem);
down_read(&rwsem);    /* Reader */
up_read(&rwsem);
down_write(&rwsem);   /* Writer */
up_write(&rwsem);

/* Completions */
DECLARE_COMPLETION(done);
wait_for_completion(&done);
complete(&done);