Chapter 3: Advanced File Operations
Table of Contents
- The file_operations Structure in Depth
- ioctl - Device Control
- poll and select
- llseek - Seeking in Device Files
- mmap - Memory Mapping
- Asynchronous I/O
- fsync - Synchronization
- Advanced Examples
The file_operations Structure in Depth
Complete Structure Overview
#include <linux/fs.h>
/*
* file_operations - Complete structure (kernel 5.x/6.x)
*
* Not all operations need to be implemented.
* Set unimplemented operations to NULL.
*/
struct file_operations {
struct module *owner;
/* Position operations */
loff_t (*llseek) (struct file *, loff_t, int);
/* Read/Write operations */
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
/* Directory operations (not used for device drivers) */
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
/* Polling */
__poll_t (*poll) (struct file *, struct poll_table_struct *);
/* Device control */
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
/* Memory mapping */
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
/* Open/Release */
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
/* Synchronization */
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
/* Async operations */
int (*fasync) (int, struct file *, int);
/* Locking */
int (*lock) (struct file *, int, struct file_lock *);
int (*flock) (struct file *, int, struct file_lock *);
/* Sendfile operations */
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
/* Misc */
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
};
Important Data Structures
/*
* struct file - Represents an open file
*
* Created when file is opened, passed to most file operations
*/
struct file {
struct path f_path; /* File path */
struct inode *f_inode; /* Cached inode */
const struct file_operations *f_op; /* File operations */
unsigned int f_flags; /* Open flags (O_RDONLY, O_NONBLOCK, etc.) */
fmode_t f_mode; /* File mode (FMODE_READ, FMODE_WRITE) */
loff_t f_pos; /* Current position */
struct fown_struct f_owner; /* Owner for async notification */
const struct cred *f_cred; /* File credentials */
void *private_data; /* Driver-specific data */
/* ... many more fields ... */
};
/*
* struct inode - Represents a file on disk
*
* Contains metadata about the file
*/
struct inode {
umode_t i_mode; /* File type and permissions */
unsigned short i_opflags;
kuid_t i_uid; /* Owner user ID */
kgid_t i_gid; /* Owner group ID */
unsigned int i_flags;
const struct inode_operations *i_op;
struct super_block *i_sb;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev; /* Character device */
char *i_link;
};
dev_t i_rdev; /* Device number */
loff_t i_size; /* File size */
struct timespec64 i_atime; /* Access time */
struct timespec64 i_mtime; /* Modification time */
struct timespec64 i_ctime; /* Change time */
/* ... many more fields ... */
};
ioctl - Device Control
Theory: ioctl Overview
ioctl (input/output control) provides a mechanism for device-specific operations that don’t fit into the standard read/write model.
Common uses:
- Setting hardware parameters (baud rate, resolution, etc.)
- Querying device capabilities
- Triggering special operations
- Configuration and control
ioctl Command Encoding
#include <linux/ioctl.h>
/*
* ioctl command structure (32-bit value):
*
* Bits:
* 31-30: Direction (read/write)
* 29-16: Size of argument (14 bits)
* 15-8: Type (magic number, usually ASCII character)
* 7-0: Command number (0-255)
*/
/*
* Direction bits
*/
#define _IOC_NONE 0U /* No data transfer */
#define _IOC_WRITE 1U /* User writing to kernel */
#define _IOC_READ 2U /* Kernel writing to user */
/*
* Helper macros to create ioctl commands
*/
/* No argument */
#define _IO(type, nr) _IOC(_IOC_NONE, (type), (nr), 0)
/* Write parameter */
#define _IOW(type, nr, size) _IOC(_IOC_WRITE, (type), (nr), sizeof(size))
/* Read parameter */
#define _IOR(type, nr, size) _IOC(_IOC_READ, (type), (nr), sizeof(size))
/* Read and write parameter */
#define _IOWR(type, nr, size) _IOC(_IOC_READ|_IOC_WRITE, (type), (nr), sizeof(size))
/*
* Macros to extract information from ioctl command
*/
#define _IOC_DIR(nr) (((nr) >> 30) & 0x03)
#define _IOC_TYPE(nr) (((nr) >> 8) & 0xFF)
#define _IOC_NR(nr) (((nr) >> 0) & 0xFF)
#define _IOC_SIZE(nr) (((nr) >> 16) & 0x3FFF)
Implementing ioctl
mydev_ioctl.h - Shared header for kernel and userspace:
/*
* mydev_ioctl.h - ioctl definitions for mydevice
*
* This file is included by both kernel driver and userspace applications
*/
#ifndef MYDEV_IOCTL_H
#define MYDEV_IOCTL_H
#include <linux/ioctl.h>
/*
* Magic number for this driver
* Choose an unused number from Documentation/ioctl/ioctl-number.rst
* Use ASCII character for readability
*/
#define MYDEV_IOC_MAGIC 'M'
/*
* Data structure for complex ioctl operations
*/
struct mydev_config {
unsigned int speed; /* Speed in Hz */
unsigned int mode; /* Operating mode */
unsigned int flags; /* Configuration flags */
char name[32]; /* Device name */
};
/*
* Define ioctl commands
*/
/* Reset device - no argument */
#define MYDEV_IOCRESET _IO(MYDEV_IOC_MAGIC, 0)
/* Get device speed - read int */
#define MYDEV_IOCGSPEED _IOR(MYDEV_IOC_MAGIC, 1, int)
/* Set device speed - write int */
#define MYDEV_IOCSSPEED _IOW(MYDEV_IOC_MAGIC, 2, int)
/* Get configuration - read struct */
#define MYDEV_IOCGCONFIG _IOR(MYDEV_IOC_MAGIC, 3, struct mydev_config)
/* Set configuration - write struct */
#define MYDEV_IOCSCONFIG _IOW(MYDEV_IOC_MAGIC, 4, struct mydev_config)
/* Exchange configuration - read/write struct */
#define MYDEV_IOCXCONFIG _IOWR(MYDEV_IOC_MAGIC, 5, struct mydev_config)
/* Maximum command number */
#define MYDEV_IOC_MAXNR 5
#endif /* MYDEV_IOCTL_H */
Kernel driver implementation:
/*
* mydev_driver.c - Driver with ioctl implementation
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/uaccess.h>
#include "mydev_ioctl.h"
/* Device data structure */
struct mydev_data {
struct cdev cdev;
int speed;
int mode;
int flags;
char name[32];
};
static struct mydev_data *mydev;
static dev_t dev_num;
static struct class *dev_class;
/*
* ioctl implementation
*
* @filp: File pointer
* @cmd: ioctl command
* @arg: Command argument (can be int or pointer to struct)
*
* Return: 0 on success, negative error code on failure
*/
static long mydev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int retval = 0;
int tmp;
struct mydev_config config;
/*
* Extract command components
*/
unsigned int dir = _IOC_DIR(cmd);
unsigned int type = _IOC_TYPE(cmd);
unsigned int nr = _IOC_NR(cmd);
unsigned int size = _IOC_SIZE(cmd);
pr_info("ioctl: cmd=0x%x, dir=%u, type=%c, nr=%u, size=%u\n",
cmd, dir, type, nr, size);
/*
* Verify ioctl command validity
*/
/* Check magic number */
if (type != MYDEV_IOC_MAGIC) {
pr_err("ioctl: Invalid magic number\n");
return -ENOTTY; /* Inappropriate ioctl for device */
}
/* Check command number range */
if (nr > MYDEV_IOC_MAXNR) {
pr_err("ioctl: Invalid command number\n");
return -ENOTTY;
}
/*
* Verify user space pointer validity
*
* access_ok() checks if user space address is valid
* (replaced with just checking in newer kernels as copy_* does this)
*/
if (dir & _IOC_READ)
retval = !access_ok((void __user *)arg, size);
else if (dir & _IOC_WRITE)
retval = !access_ok((void __user *)arg, size);
if (retval)
return -EFAULT;
/*
* Handle each ioctl command
*/
switch (cmd) {
case MYDEV_IOCRESET:
/*
* Reset device to default state
* No argument
*/
pr_info("ioctl: RESET command\n");
mydev->speed = 1000;
mydev->mode = 0;
mydev->flags = 0;
strcpy(mydev->name, "default");
break;
case MYDEV_IOCGSPEED:
/*
* Get speed - copy to userspace
*/
pr_info("ioctl: GET SPEED command\n");
tmp = mydev->speed;
if (copy_to_user((int __user *)arg, &tmp, sizeof(tmp)))
return -EFAULT;
break;
case MYDEV_IOCSSPEED:
/*
* Set speed - copy from userspace
*/
pr_info("ioctl: SET SPEED command\n");
if (copy_from_user(&tmp, (int __user *)arg, sizeof(tmp)))
return -EFAULT;
/* Validate input */
if (tmp < 0 || tmp > 100000) {
pr_err("ioctl: Invalid speed value: %d\n", tmp);
return -EINVAL;
}
mydev->speed = tmp;
pr_info("ioctl: Speed set to %d\n", mydev->speed);
break;
case MYDEV_IOCGCONFIG:
/*
* Get configuration - copy struct to userspace
*/
pr_info("ioctl: GET CONFIG command\n");
config.speed = mydev->speed;
config.mode = mydev->mode;
config.flags = mydev->flags;
strncpy(config.name, mydev->name, sizeof(config.name));
if (copy_to_user((struct mydev_config __user *)arg,
&config, sizeof(config)))
return -EFAULT;
break;
case MYDEV_IOCSCONFIG:
/*
* Set configuration - copy struct from userspace
*/
pr_info("ioctl: SET CONFIG command\n");
if (copy_from_user(&config, (struct mydev_config __user *)arg,
sizeof(config)))
return -EFAULT;
/* Validate and apply configuration */
mydev->speed = config.speed;
mydev->mode = config.mode;
mydev->flags = config.flags;
strncpy(mydev->name, config.name, sizeof(mydev->name));
mydev->name[sizeof(mydev->name) - 1] = '\0'; /* Ensure null termination */
pr_info("ioctl: Config applied\n");
break;
case MYDEV_IOCXCONFIG:
/*
* Exchange configuration
* Get current config, set new config, return old config
*/
pr_info("ioctl: EXCHANGE CONFIG command\n");
/* Read new config from user */
if (copy_from_user(&config, (struct mydev_config __user *)arg,
sizeof(config)))
return -EFAULT;
/* Save old config */
struct mydev_config old_config;
old_config.speed = mydev->speed;
old_config.mode = mydev->mode;
old_config.flags = mydev->flags;
strncpy(old_config.name, mydev->name, sizeof(old_config.name));
/* Apply new config */
mydev->speed = config.speed;
mydev->mode = config.mode;
mydev->flags = config.flags;
strncpy(mydev->name, config.name, sizeof(mydev->name));
/* Return old config to user */
if (copy_to_user((struct mydev_config __user *)arg,
&old_config, sizeof(old_config)))
return -EFAULT;
break;
default:
pr_err("ioctl: Unknown command: 0x%x\n", cmd);
return -ENOTTY;
}
return retval;
}
static int mydev_open(struct inode *inode, struct file *filp)
{
pr_info("Device opened\n");
return 0;
}
static int mydev_release(struct inode *inode, struct file *filp)
{
pr_info("Device closed\n");
return 0;
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = mydev_open,
.release = mydev_release,
.unlocked_ioctl = mydev_ioctl, /* ioctl handler */
};
static int __init mydev_init(void)
{
int ret;
/* Allocate device data */
mydev = kzalloc(sizeof(struct mydev_data), GFP_KERNEL);
if (!mydev)
return -ENOMEM;
/* Initialize device data */
mydev->speed = 1000;
mydev->mode = 0;
mydev->flags = 0;
strcpy(mydev->name, "default");
/* Allocate device number */
ret = alloc_chrdev_region(&dev_num, 0, 1, "mydev");
if (ret < 0)
goto err_alloc;
/* Create class */
dev_class = class_create(THIS_MODULE, "mydev_class");
if (IS_ERR(dev_class)) {
ret = PTR_ERR(dev_class);
goto err_class;
}
/* Initialize and add cdev */
cdev_init(&mydev->cdev, &fops);
mydev->cdev.owner = THIS_MODULE;
ret = cdev_add(&mydev->cdev, dev_num, 1);
if (ret < 0)
goto err_cdev;
/* Create device */
if (IS_ERR(device_create(dev_class, NULL, dev_num, NULL, "mydev"))) {
ret = -EINVAL;
goto err_device;
}
pr_info("mydev: Device initialized\n");
return 0;
err_device:
cdev_del(&mydev->cdev);
err_cdev:
class_destroy(dev_class);
err_class:
unregister_chrdev_region(dev_num, 1);
err_alloc:
kfree(mydev);
return ret;
}
static void __exit mydev_exit(void)
{
device_destroy(dev_class, dev_num);
cdev_del(&mydev->cdev);
class_destroy(dev_class);
unregister_chrdev_region(dev_num, 1);
kfree(mydev);
pr_info("mydev: Device removed\n");
}
module_init(mydev_init);
module_exit(mydev_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tutorial Author");
MODULE_DESCRIPTION("Device driver with ioctl support");
Userspace test program:
/*
* test_ioctl.c - Userspace program to test ioctl
*
* Compile: gcc -o test_ioctl test_ioctl.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <string.h>
#include "mydev_ioctl.h"
int main(int argc, char *argv[])
{
int fd;
int speed;
struct mydev_config config;
/* Open device */
fd = open("/dev/mydev", O_RDWR);
if (fd < 0) {
perror("Failed to open device");
return EXIT_FAILURE;
}
printf("Device opened successfully\n");
/* Test 1: Reset device */
printf("\n--- Test 1: Reset device ---\n");
if (ioctl(fd, MYDEV_IOCRESET, NULL) < 0) {
perror("IOCRESET failed");
} else {
printf("Device reset successfully\n");
}
/* Test 2: Get speed */
printf("\n--- Test 2: Get speed ---\n");
if (ioctl(fd, MYDEV_IOCGSPEED, &speed) < 0) {
perror("IOCGSPEED failed");
} else {
printf("Current speed: %d\n", speed);
}
/* Test 3: Set speed */
printf("\n--- Test 3: Set speed ---\n");
speed = 5000;
if (ioctl(fd, MYDEV_IOCSSPEED, &speed) < 0) {
perror("IOCSSPEED failed");
} else {
printf("Speed set to: %d\n", speed);
}
/* Test 4: Get configuration */
printf("\n--- Test 4: Get configuration ---\n");
if (ioctl(fd, MYDEV_IOCGCONFIG, &config) < 0) {
perror("IOCGCONFIG failed");
} else {
printf("Configuration:\n");
printf(" Speed: %u\n", config.speed);
printf(" Mode: %u\n", config.mode);
printf(" Flags: %u\n", config.flags);
printf(" Name: %s\n", config.name);
}
/* Test 5: Set configuration */
printf("\n--- Test 5: Set configuration ---\n");
config.speed = 10000;
config.mode = 2;
config.flags = 0x0F;
strcpy(config.name, "custom_config");
if (ioctl(fd, MYDEV_IOCSCONFIG, &config) < 0) {
perror("IOCSCONFIG failed");
} else {
printf("Configuration set successfully\n");
}
/* Test 6: Exchange configuration */
printf("\n--- Test 6: Exchange configuration ---\n");
config.speed = 20000;
config.mode = 3;
config.flags = 0xFF;
strcpy(config.name, "exchanged");
if (ioctl(fd, MYDEV_IOCXCONFIG, &config) < 0) {
perror("IOCXCONFIG failed");
} else {
printf("Old configuration (returned):\n");
printf(" Speed: %u\n", config.speed);
printf(" Mode: %u\n", config.mode);
printf(" Flags: %u\n", config.flags);
printf(" Name: %s\n", config.name);
}
/* Close device */
close(fd);
printf("\nDevice closed\n");
return EXIT_SUCCESS;
}
poll and select
Theory: Blocking I/O vs Non-blocking I/O
poll and select allow userspace programs to monitor multiple file descriptors for I/O readiness:
- Check if data is available for reading (without blocking)
- Check if device is ready for writing (without blocking)
- Wait with timeout for I/O to become ready
Implementing poll
/*
* poll_driver.c - Driver with poll support
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/poll.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
struct poll_dev {
struct cdev cdev;
wait_queue_head_t read_queue; /* Wait queue for readers */
wait_queue_head_t write_queue; /* Wait queue for writers */
char buffer[256];
size_t data_size;
int readable; /* Flag: data available for reading */
int writable; /* Flag: space available for writing */
};
static struct poll_dev *pdev;
static dev_t dev_num;
static struct class *dev_class;
/*
* poll implementation
*
* @filp: File pointer
* @wait: Poll table (kernel structure)
*
* Return: Mask of ready events (EPOLLIN, EPOLLOUT, etc.)
*/
static __poll_t poll_dev_poll(struct file *filp, poll_table *wait)
{
__poll_t mask = 0;
pr_info("poll: Called\n");
/*
* Add our wait queues to the poll table
*
* This doesn't block. It registers our wait queues so that
* if we return 0 (not ready), the kernel knows where to wait.
*/
poll_wait(filp, &pdev->read_queue, wait);
poll_wait(filp, &pdev->write_queue, wait);
/*
* Check device state and return appropriate mask
*/
/* Data available for reading? */
if (pdev->readable) {
mask |= EPOLLIN | EPOLLRDNORM; /* Readable */
pr_info("poll: Device is readable\n");
}
/* Space available for writing? */
if (pdev->writable) {
mask |= EPOLLOUT | EPOLLWRNORM; /* Writable */
pr_info("poll: Device is writable\n");
}
/*
* Other possible flags:
* EPOLLERR - Error condition
* EPOLLHUP - Hang up
* EPOLLPRI - Urgent data available
*/
return mask;
}
static ssize_t poll_dev_read(struct file *filp, char __user *buf,
size_t count, loff_t *f_pos)
{
ssize_t retval;
/*
* If no data available and non-blocking mode, return immediately
*/
if (!pdev->readable && (filp->f_flags & O_NONBLOCK))
return -EAGAIN;
/*
* If no data available and blocking mode, wait for data
*/
if (!pdev->readable) {
pr_info("read: Waiting for data...\n");
if (wait_event_interruptible(pdev->read_queue, pdev->readable))
return -ERESTARTSYS; /* Interrupted by signal */
}
/* Read data */
if (count > pdev->data_size)
count = pdev->data_size;
if (copy_to_user(buf, pdev->buffer, count))
return -EFAULT;
pdev->data_size = 0;
pdev->readable = 0;
pdev->writable = 1;
/* Wake up writers waiting for space */
wake_up_interruptible(&pdev->write_queue);
retval = count;
pr_info("read: Read %zu bytes\n", count);
return retval;
}
static ssize_t poll_dev_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
ssize_t retval;
/* If no space and non-blocking, return immediately */
if (!pdev->writable && (filp->f_flags & O_NONBLOCK))
return -EAGAIN;
/* If no space and blocking, wait for space */
if (!pdev->writable) {
pr_info("write: Waiting for space...\n");
if (wait_event_interruptible(pdev->write_queue, pdev->writable))
return -ERESTARTSYS;
}
/* Write data */
if (count > sizeof(pdev->buffer))
count = sizeof(pdev->buffer);
if (copy_from_user(pdev->buffer, buf, count))
return -EFAULT;
pdev->data_size = count;
pdev->readable = 1;
pdev->writable = 0;
/* Wake up readers waiting for data */
wake_up_interruptible(&pdev->read_queue);
retval = count;
pr_info("write: Wrote %zu bytes\n", count);
return retval;
}
static int poll_dev_open(struct inode *inode, struct file *filp)
{
pr_info("Device opened\n");
return 0;
}
static int poll_dev_release(struct inode *inode, struct file *filp)
{
pr_info("Device closed\n");
return 0;
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = poll_dev_open,
.release = poll_dev_release,
.read = poll_dev_read,
.write = poll_dev_write,
.poll = poll_dev_poll, /* poll handler */
};
static int __init poll_dev_init(void)
{
int ret;
pdev = kzalloc(sizeof(struct poll_dev), GFP_KERNEL);
if (!pdev)
return -ENOMEM;
/* Initialize wait queues */
init_waitqueue_head(&pdev->read_queue);
init_waitqueue_head(&pdev->write_queue);
/* Initially writable (empty buffer) */
pdev->readable = 0;
pdev->writable = 1;
pdev->data_size = 0;
/* Standard device registration... */
ret = alloc_chrdev_region(&dev_num, 0, 1, "polldev");
if (ret < 0)
goto err;
dev_class = class_create(THIS_MODULE, "polldev_class");
if (IS_ERR(dev_class)) {
ret = PTR_ERR(dev_class);
goto err_class;
}
cdev_init(&pdev->cdev, &fops);
ret = cdev_add(&pdev->cdev, dev_num, 1);
if (ret < 0)
goto err_cdev;
device_create(dev_class, NULL, dev_num, NULL, "polldev");
pr_info("polldev: Initialized\n");
return 0;
err_cdev:
class_destroy(dev_class);
err_class:
unregister_chrdev_region(dev_num, 1);
err:
kfree(pdev);
return ret;
}
static void __exit poll_dev_exit(void)
{
device_destroy(dev_class, dev_num);
cdev_del(&pdev->cdev);
class_destroy(dev_class);
unregister_chrdev_region(dev_num, 1);
kfree(pdev);
pr_info("polldev: Removed\n");
}
module_init(poll_dev_init);
module_exit(poll_dev_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Driver with poll support");
Userspace test with poll:
/*
* test_poll.c - Test poll functionality
*
* Compile: gcc -o test_poll test_poll.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <poll.h>
#include <string.h>
int main(void)
{
int fd;
struct pollfd fds[1];
int ret;
char buffer[256];
/* Open device */
fd = open("/dev/polldev", O_RDWR);
if (fd < 0) {
perror("open");
return EXIT_FAILURE;
}
/* Setup poll structure */
fds[0].fd = fd;
fds[0].events = POLLIN | POLLOUT; /* Monitor read and write */
printf("Testing poll...\n");
/* Test 1: Check if writable (should be) */
printf("\nTest 1: Check if device is writable\n");
ret = poll(fds, 1, 5000); /* 5 second timeout */
if (ret < 0) {
perror("poll");
} else if (ret == 0) {
printf("Timeout - no events\n");
} else {
if (fds[0].revents & POLLOUT)
printf("Device is writable!\n");
if (fds[0].revents & POLLIN)
printf("Device is readable!\n");
}
/* Test 2: Write data, then check if readable */
printf("\nTest 2: Write data and check readability\n");
const char *msg = "Hello, poll!";
write(fd, msg, strlen(msg));
printf("Data written\n");
fds[0].events = POLLIN; /* Only monitor read */
ret = poll(fds, 1, 5000);
if (ret > 0 && (fds[0].revents & POLLIN)) {
printf("Device is readable!\n");
ssize_t n = read(fd, buffer, sizeof(buffer) - 1);
buffer[n] = '\0';
printf("Read: %s\n", buffer);
}
/* Test 3: Non-blocking read when no data */
printf("\nTest 3: Non-blocking read with no data\n");
fcntl(fd, F_SETFL, O_NONBLOCK);
ret = read(fd, buffer, sizeof(buffer));
if (ret < 0)
perror("Non-blocking read returned");
close(fd);
return EXIT_SUCCESS;
}
llseek - Seeking in Device Files
Theory: File Position Management
Some devices support seeking (changing file position). For devices with addressable storage, implementing llseek allows random access.
Implementing llseek
/*
* llseek implementation
*/
#include <linux/fs.h>
#define DEVICE_SIZE 4096 /* Size of our "device" */
struct seekable_dev {
char buffer[DEVICE_SIZE];
struct cdev cdev;
};
/*
* llseek implementation
*
* @filp: File pointer
* @offset: Offset to seek to/by
* @whence: Seek mode (SEEK_SET, SEEK_CUR, SEEK_END)
*
* Return: New file position, or negative error code
*/
static loff_t seekable_llseek(struct file *filp, loff_t offset, int whence)
{
loff_t new_pos;
pr_info("llseek: offset=%lld, whence=%d\n", offset, whence);
switch (whence) {
case SEEK_SET: /* Absolute position */
new_pos = offset;
break;
case SEEK_CUR: /* Relative to current position */
new_pos = filp->f_pos + offset;
break;
case SEEK_END: /* Relative to end */
new_pos = DEVICE_SIZE + offset;
break;
default:
return -EINVAL;
}
/* Validate new position */
if (new_pos < 0 || new_pos > DEVICE_SIZE) {
pr_err("llseek: Invalid position: %lld\n", new_pos);
return -EINVAL;
}
/* Update file position */
filp->f_pos = new_pos;
pr_info("llseek: New position: %lld\n", new_pos);
return new_pos;
}
/*
* For devices that don't support seeking
*/
static loff_t no_llseek(struct file *filp, loff_t offset, int whence)
{
return -ESPIPE; /* Illegal seek (like pipe) */
}
/*
* Use default llseek (works for most cases)
*/
static struct file_operations fops = {
.llseek = default_llseek, /* Provided by kernel */
/* ... other operations ... */
};
/*
* Disable seeking completely
*/
static struct file_operations no_seek_fops = {
.llseek = no_llseek,
/* ... other operations ... */
};
mmap - Memory Mapping
Theory: Memory Mapped I/O
mmap allows userspace to directly access device memory without system calls for each access. Very efficient for:
- Framebuffers (video memory)
- DMA buffers
- Memory-mapped hardware registers
- High-performance data transfer
mmap Architecture
User Space Kernel Space
┌────────────┐ ┌────────────┐
│ │ │ │
│ User │ mmap() │ Device │
│ Process ├──────────────┤ Driver │
│ │ │ │
│ ┌────────┐ │ │ ┌────────┐ │
│ │Virtual │ │ │ │Physical│ │
│ │Address │ ◄──────────────► │Memory │ │
│ │Space │ │ Page Tables │ │ │ │
│ └────────┘ │ │ └────────┘ │
└────────────┘ └────────────┘
Implementing mmap
/*
* mmap_driver.c - Device driver with mmap support
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/slab.h>
#define MMAP_SIZE (4 * PAGE_SIZE) /* 4 pages */
struct mmap_dev {
struct cdev cdev;
void *kmem; /* Kernel memory buffer */
unsigned long size;
};
static struct mmap_dev *mdev;
static dev_t dev_num;
static struct class *dev_class;
/*
* VM operations - called for page faults
*/
static vm_fault_t mmap_dev_fault(struct vm_fault *vmf)
{
struct page *page;
unsigned long offset;
void *page_ptr;
pr_info("mmap: Page fault at offset %lu\n", vmf->pgoff);
/* Calculate offset in device memory */
offset = vmf->pgoff << PAGE_SHIFT;
if (offset >= mdev->size)
return VM_FAULT_SIGBUS; /* Out of range */
/* Get page address */
page_ptr = mdev->kmem + offset;
page = virt_to_page(page_ptr);
/* Increment page reference count */
get_page(page);
/* Install page in page table */
vmf->page = page;
pr_info("mmap: Mapped page at offset %lu\n", offset);
return 0;
}
static const struct vm_operations_struct mmap_vm_ops = {
.fault = mmap_dev_fault,
};
/*
* mmap implementation - Method 1: Using VM operations (on-demand)
*
* @filp: File pointer
* @vma: Virtual memory area
*
* Return: 0 on success, negative error code on failure
*/
static int mmap_dev_mmap(struct file *filp, struct vm_area_struct *vma)
{
unsigned long size;
pr_info("mmap: Called\n");
pr_info("mmap: Start=0x%lx, End=0x%lx, Offset=%lu\n",
vma->vm_start, vma->vm_end, vma->vm_pgoff);
/* Calculate requested size */
size = vma->vm_end - vma->vm_start;
/* Validate size */
if (size > mdev->size) {
pr_err("mmap: Requested size too large\n");
return -EINVAL;
}
/* Validate offset */
if (vma->vm_pgoff > 0) {
pr_err("mmap: Non-zero offset not supported\n");
return -EINVAL;
}
/*
* Set VM flags
*
* VM_IO - This is I/O memory (not pageable)
* VM_DONTEXPAND - Don't expand with mremap()
* VM_DONTDUMP - Don't include in core dump
*/
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
/* Set VM operations */
vma->vm_ops = &mmap_vm_ops;
pr_info("mmap: Mapping %lu bytes\n", size);
return 0;
}
/*
* mmap implementation - Method 2: Direct mapping (all at once)
*/
static int mmap_dev_mmap_direct(struct file *filp, struct vm_area_struct *vma)
{
unsigned long size;
unsigned long pfn;
int ret;
size = vma->vm_end - vma->vm_start;
if (size > mdev->size)
return -EINVAL;
/*
* Use remap_pfn_range() to map kernel memory
*
* This maps all pages immediately (not on-demand)
*/
pfn = virt_to_phys(mdev->kmem) >> PAGE_SHIFT;
ret = remap_pfn_range(vma,
vma->vm_start, /* User virtual address */
pfn, /* Physical page frame number */
size, /* Size to map */
vma->vm_page_prot); /* Protection */
if (ret < 0) {
pr_err("mmap: remap_pfn_range failed\n");
return ret;
}
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
pr_info("mmap: Mapped %lu bytes directly\n", size);
return 0;
}
static int mmap_dev_open(struct inode *inode, struct file *filp)
{
pr_info("Device opened\n");
return 0;
}
static int mmap_dev_release(struct inode *inode, struct file *filp)
{
pr_info("Device closed\n");
return 0;
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = mmap_dev_open,
.release = mmap_dev_release,
.mmap = mmap_dev_mmap, /* Use on-demand mapping */
/* .mmap = mmap_dev_mmap_direct, */ /* Or use direct mapping */
};
static int __init mmap_dev_init(void)
{
int ret;
mdev = kzalloc(sizeof(struct mmap_dev), GFP_KERNEL);
if (!mdev)
return -ENOMEM;
/*
* Allocate kernel memory for mmap
* Must be page-aligned
*/
mdev->kmem = kzalloc(MMAP_SIZE, GFP_KERNEL);
if (!mdev->kmem) {
kfree(mdev);
return -ENOMEM;
}
mdev->size = MMAP_SIZE;
/* Write test pattern to memory */
memset(mdev->kmem, 0xAA, MMAP_SIZE);
/* Standard device registration */
ret = alloc_chrdev_region(&dev_num, 0, 1, "mmapdev");
if (ret < 0)
goto err;
dev_class = class_create(THIS_MODULE, "mmapdev_class");
if (IS_ERR(dev_class)) {
ret = PTR_ERR(dev_class);
goto err_class;
}
cdev_init(&mdev->cdev, &fops);
ret = cdev_add(&mdev->cdev, dev_num, 1);
if (ret < 0)
goto err_cdev;
device_create(dev_class, NULL, dev_num, NULL, "mmapdev");
pr_info("mmapdev: Initialized with %lu bytes\n", mdev->size);
return 0;
err_cdev:
class_destroy(dev_class);
err_class:
unregister_chrdev_region(dev_num, 1);
err:
kfree(mdev->kmem);
kfree(mdev);
return ret;
}
static void __exit mmap_dev_exit(void)
{
device_destroy(dev_class, dev_num);
cdev_del(&mdev->cdev);
class_destroy(dev_class);
unregister_chrdev_region(dev_num, 1);
kfree(mdev->kmem);
kfree(mdev);
pr_info("mmapdev: Removed\n");
}
module_init(mmap_dev_init);
module_exit(mmap_dev_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Device with mmap support");
Userspace test:
/*
* test_mmap.c - Test mmap functionality
*
* Compile: gcc -o test_mmap test_mmap.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#define MMAP_SIZE (4 * 4096) /* 4 pages */
int main(void)
{
int fd;
void *mapped;
unsigned char *ptr;
int i;
/* Open device */
fd = open("/dev/mmapdev", O_RDWR);
if (fd < 0) {
perror("open");
return EXIT_FAILURE;
}
/* Map device memory to user space */
mapped = mmap(NULL, /* Let kernel choose address */
MMAP_SIZE, /* Size to map */
PROT_READ | PROT_WRITE, /* Read/write access */
MAP_SHARED, /* Share mapping with kernel */
fd, /* File descriptor */
0); /* Offset in device */
if (mapped == MAP_FAILED) {
perror("mmap");
close(fd);
return EXIT_FAILURE;
}
printf("Memory mapped successfully at %p\n", mapped);
/* Access mapped memory */
ptr = (unsigned char *)mapped;
/* Read initial pattern */
printf("\nInitial pattern (first 16 bytes):\n");
for (i = 0; i < 16; i++) {
printf("%02x ", ptr[i]);
}
printf("\n");
/* Write new pattern */
printf("\nWriting new pattern...\n");
for (i = 0; i < MMAP_SIZE; i++) {
ptr[i] = i & 0xFF;
}
/* Read back */
printf("New pattern (first 16 bytes):\n");
for (i = 0; i < 16; i++) {
printf("%02x ", ptr[i]);
}
printf("\n");
/* Unmap memory */
if (munmap(mapped, MMAP_SIZE) < 0) {
perror("munmap");
} else {
printf("\nMemory unmapped successfully\n");
}
close(fd);
return EXIT_SUCCESS;
}
Summary
In this chapter, you learned:
✅ ioctl: Device-specific control operations
✅ poll/select: Non-blocking I/O and multiplexing
✅ llseek: File position management
✅ mmap: Memory-mapped I/O for high performance
✅ Advanced file operations: Complete file_operations structure
Key Takeaways
- Use ioctl for device control, not for regular data transfer
- Implement poll for efficient I/O multiplexing
- mmap provides zero-copy access to device memory
- Always validate user input in all operations
- Use wait queues for blocking operations
Next Steps
Proceed to 04-memory.md to learn about kernel memory management, allocation strategies, and DMA.
Quick Reference
ioctl
/* Define commands */
#define MY_IOCRESET _IO('M', 0)
#define MY_IOCGVAL _IOR('M', 1, int)
#define MY_IOCSVAL _IOW('M', 2, int)
/* Implement handler */
long my_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
poll
/* Implement poll */
__poll_t my_poll(struct file *filp, poll_table *wait) {
poll_wait(filp, &my_queue, wait);
if (data_ready)
return EPOLLIN | EPOLLRDNORM;
return 0;
}
/* Wake up waiters */
wake_up_interruptible(&my_queue);
mmap
/* Implement mmap */
int my_mmap(struct file *filp, struct vm_area_struct *vma) {
unsigned long pfn = virt_to_phys(kmem) >> PAGE_SHIFT;
return remap_pfn_range(vma, vma->vm_start, pfn,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
}