Find the answer to your Linux question:
Results 1 to 2 of 2
Hi everyone, I decided to write a simple, minimal block driver based on sbull from LDD3 (hence a simple ramdisk). The block device model changed a lot since LDD3 and ...
Enjoy an ad free experience by logging in. Not a member yet? Register.
  1. #1
    Just Joined!
    Join Date
    Aug 2013
    Posts
    1

    Block driver failing on reads


    Hi everyone,

    I decided to write a simple, minimal block driver based on sbull from LDD3 (hence a simple ramdisk). The block device model changed a lot since LDD3 and I've tried my best to adapt my driver to the 3.x kernel (I'm compiling with 3.10), but my system freezes completely whenever I perform reads on my device. Sometimes it takes a few seconds before it freezes, sometimes it happens instantly.

    I inserted a few "printks" to debug my driver, but they aren't helping. I assume my problem is not within the driver itself, but some higher level layer, since in a few cases my system got stuck in the middle of one random printk (i.e., /var/log/messages shows part of the content of the printk and then the contents of the kernel initialization after I rebooted the VM, which makes me think there's concurrency happening during the issue).

    Thus I also assume it might be lock-related. I took a look at a few other block drivers and saw some mutex's and irq locks and tried to play with them, but no hope. I've tried setting the I/O scheduler to noop as well.

    Any help will be much appreciated :-)

    escsi.c:
    Code:
    // Based on sbull driver from the book 'Linux Device Drivers'
    
    #include <linux/module.h>
    #include <linux/blkdev.h>
    #include <linux/kernel.h>
    #include <linux/slab.h>
    #include <linux/fs.h>
    #include <linux/errno.h>
    #include <linux/types.h>
    #include <linux/fcntl.h>
    #include <linux/hdreg.h>
    #include <linux/kdev_t.h>
    #include <linux/vmalloc.h>
    #include <linux/buffer_head.h>
    #include <linux/sched.h>
    #include "escsi.h"
    
    MODULE_LICENSE("Dual BSD/GPL");
    
    static int major = 0;
    module_param(major, int, 0);
    static int hardsect_size = 512;
    module_param(hardsect_size, int, 0);
    static int nsectors = 20480; // 10MB virtual disk
    module_param(nsectors, int, 0);
    static int ndevices = 2;
    module_param(ndevices, int, 0);
    
    #define BLOCK_MINORS	16
    #define MINOR_SHIFT	4
    #define DEVNUM(kdevnum)	(MINOR(kdev_t_to_nr(kdevnum)) >> MINOR_SHIFT
    
    #define INVALIDATE_DELAY	300*HZ
    
    static struct escsi_dev *Devices = NULL;
    
    int escsi_transfer(struct escsi_dev *dev, unsigned long sector,
    		unsigned long nsect, char *buffer, int write)
    {
    	unsigned long offset = sector*KERNEL_SECTOR_SIZE;
    	unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE;
    	unsigned char currch;
    	unsigned char *response;
    	unsigned char *zeroes;
    	int i;
    
    	currch = 0;
    
        //spin_lock (&dev->lock);
    
    	printk("escsi_TRANSFER nsect=%lu, write=%d, sector=%lu\n",nsect,write,sector);
    	printk("dev->data FIM=%lu, write/read FIM=%lu\n",dev->data+dev->size,dev->data+offset+nbytes);
    	
    	if ((offset + nbytes) > dev->size) {
    		printk ("Beyond-end write (%ld %ld)\n", offset, nbytes);
    		return -EIO;
    	}
    
    	printk("escsi probe 1\n" );
    
    	if (write) {
    		printk("WRITING BEFORE: dev->data(FIM)=%lu, dev->data+offset=%lu, diff=%lu, nbytes=%lu\n", dev->data+dev->size, dev->data + offset, dev->size-offset, nbytes);
    		memcpy (dev->data + offset, buffer, nbytes);
    		printk("WRITING AFTER\n" );
    	}
    	else {
    		printk("READING BEFORE: dev->data(FIM)=%lu, dev->data+offset=%lu, diff=%lu, nbytes=%lu\n", dev->data+dev->size, dev->data + offset, dev->size-offset, nbytes);
    		//zeroes = kzalloc(nbytes, GFP_KERNEL);
    		//memcpy(buffer,zeroes,nbytes);
    		memcpy (buffer, dev->data + offset, nbytes);
    		printk("READING AFTER\n");
    	}
    
            printk("escsi probe 2\n" );
    
    	//spin_unlock(&dev->lock);
    
    	return 0;
    }
    
    void escsi_request(struct request_queue *q)
    {
    	struct request *req;
    	int err,i;
    
    	req = blk_fetch_request (q);
    	//printk("escsi received request!!! cmd=%s, type=%x\n",req->cmd,req->cmd_type);
    	while (req)
    	{
    		struct escsi_dev *dev = req->rq_disk->private_data;
    	
    		if(req->cmd_type != REQ_TYPE_FS)
    		{
    			//printk (KERN_NOTICE "Skip non-fs request\n");
    			continue;
    		}
    
    		escsi_transfer(dev, blk_rq_pos(req), blk_rq_sectors(req), req->buffer, rq_data_dir(req));
    		i = __blk_end_request_cur (req, err);
    		if (i == 0)
    			req = blk_fetch_request (q);
    	}
    }
    
    int escsi_ioctl (struct block_device *bdev, fmode_t mode,
    		unsigned int cmd, unsigned long arg)
    {
    	long size;
    	struct hd_geometry geo;
    	unsigned char *resp;
    	unsigned char *req;
    	struct escsi_dev *dev = bdev->bd_inode->i_bdev->bd_disk->private_data;
    
        //printk("escsi received ioctl!!! cmd=%d\n",cmd);
    
    	resp = kzalloc(sizeof(unsigned char),GFP_KERNEL);
    	switch(cmd)
    	{
    	case HDIO_GETGEO:
    		// Bogus info
    		size = dev->size*(hardsect_size/KERNEL_SECTOR_SIZE);
    		geo.cylinders = (size & ~0x3f) >> 6;
    		geo.heads = 4;
    		geo.sectors = 16;
    		geo.start = 4;
    		if(copy_to_user ((void __user *) arg, &geo, sizeof(geo)))
    			return -EFAULT;
    		return 0;
    	}
    
    	return -ENOTTY;
    }
    
    static struct block_device_operations escsi_ops = {
    	.owner           = THIS_MODULE,
    	//.open 	         = escsi_open,
    	//.release         = escsi_release,
    	//.media_changed   = escsi_media_changed,
    	//.revalidate_disk = escsi_revalidate,
    	.ioctl	         = escsi_ioctl
    };
    
    static void setup_device (struct escsi_dev *dev, int which)
    {
    	memset (dev, 0, sizeof (struct escsi_dev));
    	dev->size = nsectors*hardsect_size;
    	dev->data = vmalloc (dev->size);
    	if (dev->data == NULL)
    	{
    		//printk (KERN_NOTICE "vmalloc failure.\n");
    		return;
    	}
    	spin_lock_init (&dev->lock);
    
    	//init_timer (&dev->timer);
    	//dev->timer.data = (unsigned long) dev;
    	//dev->timer.function = escsi_invalidate;
    	
    	// Replaced the line below with the two next lines
    	dev->queue = blk_init_queue (escsi_request, &dev->lock);
    
    	//dev->queue = blk_alloc_queue(GFP_KERNEL);
    	//blk_queue_make_request(dev->queue, escsi_request);
    	//dev->queue = blk_init_queue (escsi_request, &dev->lock);
            //blk_queue_max_hw_sectors(dev->queue, 1024);
            //blk_queue_bounce_limit(dev->queue, BLK_BOUNCE_ANY);
            //dev->queue->limits.discard_granularity = PAGE_SIZE;
            //dev->queue->limits.max_discard_sectors = UINT_MAX;
            //dev->queue->limits.discard_zeroes_data = 1;
    
    	if (dev->queue == NULL)
    		goto out_vfree;
    
    	dev->queue->queuedata = dev;
    	dev->gd = alloc_disk (BLOCK_MINORS);
    
    	if (! dev->gd)
    	{
    		//printk (KERN_NOTICE "alloc_disk failure\n");
    		goto out_vfree;
    	}
    
    	dev->gd->major = major;
    	dev->gd->first_minor = which*BLOCK_MINORS;
    	dev->gd->fops = &escsi_ops;
    	dev->gd->queue = dev->queue;
    	dev->gd->private_data = dev;
    
    	snprintf (dev->gd->disk_name, 32, "esd%c", which + 'a');
    	set_capacity (dev->gd, nsectors*(hardsect_size/KERNEL_SECTOR_SIZE));
    	add_disk (dev->gd);
    	return;
    
    out_vfree:
    	if (dev->data)
    		vfree (dev->data);
    }
    
    static int escsi_init (void)
    {
    	int i;
    
    	major = register_blkdev (major, "esd");
    	if (major <= 0)
    	{
    		//printk (KERN_WARNING "block: unable to get major number\n");
    		return -EBUSY;
    	}
    	Devices = kzalloc (ndevices*sizeof (struct escsi_dev), GFP_KERNEL);
    	if (Devices == NULL)
    		goto out_unregister;
    	for (i = 0; i < ndevices; i++) 
    		setup_device (Devices + i, i);
        
    	return 0;
    
    out_unregister:
    	unregister_blkdev (major, "block");
    	return -ENOMEM;
    }
    
    static void escsi_exit (void)
    {
    	int i;
    
    	for (i=0; i < ndevices; i++)
    	{
    		struct escsi_dev *dev = Devices + i;
    
    		del_timer_sync (&dev->timer);
    		if (dev->gd)
    		{
    			del_gendisk (dev->gd);
    			put_disk (dev->gd);
    		}
    		if (dev->queue)
    			blk_cleanup_queue (dev->queue);
    		if (dev->data)
    			vfree(dev->data);
    	}
    	unregister_blkdev (major, "esd");
    	kfree(Devices);
    }
    	
    module_init(escsi_init);
    module_exit(escsi_exit);
    escsi.h:
    Code:
    #include <linux/blkdev.h>
    
    #define ESCSI_INQUIRY_CMD 0x2285
    #define CDB_LENGTH_IN_BITS 40
    #define KERNEL_SECTOR_SIZE  512
    #define DEFAULT_INQUIRY_RESP_LENGTH 96
    
    struct escsi_dev {
            int size; // nr. sectors
            u8 *data;
            short users;
            short media_change; // flag
            spinlock_t lock;
            struct request_queue *queue;
            struct gendisk *gd;
            struct timer_list timer;
    };
    
    int escsi_transfer (struct escsi_dev *dev, unsigned long sector,
            unsigned long nsect, char *buffer, int write);
    void escsi_request (struct request_queue *q);
    Makefile:
    Code:
    obj-m += escsi.o
    
    all:
            make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
    
    clean:
            make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
    I created a text file with 10MB+ and I'm testing the writes with "dd if=input_file of=/dev/esda".. and it seems to be working.

    I'm testing the reads with "dd if=/dev/esda of=read_test.out count=1". My system freezes instantly half the times, hangs after the first read 40% of the times and hangs after the second read the rest of the times. When it doesn't freeze instantly I can see the exact 512 first bytes (1 block) of input_file in read_test.out :-( if I try the same command again (which will try to get the very same block!) it will likely fail.

    After I reboot the system and I check /var/log/messages, I either hit a soft lockup or see something like this:

    2013-08-13T18:42:41.842892-03:00 linux-xjwl kernel: [ 271.924294] READING BEFORE: dev->data(FIM)=18446683600590282752, dev->data+offset=18446683600584888320, diff=5394432, nbytes=4096
    2013-08-13T18:42:41.842900-03:00 linux-xjwl kernel: [ 271.924301] READING AFTER
    2013-08-13T18:42:41.842908-03:00 linux-xjwl kernel: [ 271.924308] escsi probe 2
    2013-08-13T18:42:41.842916-03:00 linux-xjwl kernel: [ 271.924527] escsi_TRANSFER nsect=8, write=0, sector=9952
    2013-08-13T18:42:41.842925-03:00 linux-xjwl kernel: [ 271.924535] dev->data FIM=18446683600590282752, write/read FIM=18446683600584896512
    2013-08-13T18:42:41.842934-03:00 linux-xjwl kernel: [ 271.924541] escsi probe 1
    2013-08-13T18:42:41.842943-03:00 linux-xjwl kernel: [ 271.924548] READING BEFORE: dev->data(FIM)=18446683600590282752, dev->data+offset=18446683600584892416, diff=5390336, nbytes=4096
    2013-08-13T18:42:41.842952-03:00 linux-xjwl kernel: [ 271.924557] READING AFTER
    2013-08-13T18:42:41.842959-03:00 linux-xjwl kernel: [ 271.924563] escsi probe 2
    2013-08-13T18:42:51.389091-03:00 linux-xjwl systemd[1]: systemd-journald.service: main process exited, code=exited, status=1/FAILURE
    2013-08-13T18:42:51.389662-03:00 linux-xjwl systemd[1]: Started Trigger Flushing of Journal to Persistent Storage.
    2013-08-13T18:42:51.390061-03:00 linux-xjwl systemd[1]: systemd-journald.service: main process exited, code=killed, status=10/USR1
linux-xjwl rsyslogd: [origin software="rsyslogd" swVersion="7.2.5" x-pid="445" x-info="rsyslog"] start
    2013-08-13T02:34:32.461188-03:00 linux-xjwl kernel: [ 0.000000] Initializing cgroup subsys cpuset
    2013-08-13T02:34:32.461347-03:00 linux-xjwl kernel: [ 0.000000] Initializing cgroup subsys cpu
    2013-08-13T02:34:32.461354-03:00 linux-xjwl kernel: [ 0.000000] Initializing cgroup subsys cpuacct

    The ^# are 0x0 and the first entry after them is the first message that is written by the kernel to the log.

    Anyone could advise please?

    Thanks in advance,
    P.L.V.

  2. #2
    Linux Guru Rubberman's Avatar
    Join Date
    Apr 2009
    Location
    I can be found either 40 miles west of Chicago, in Chicago, or in a galaxy far, far away.
    Posts
    11,664
    Two suggestions.

    1. Go to www.kernel.org and read the docs.
    2. Look at how other block device drivers are doing it in your kernel. The source is freely available.
    Sometimes, real fast is almost as good as real time.
    Just remember, Semper Gumbi - always be flexible!

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •