virtio block device adapted for MIC.
copied from drivers/block/virtio_blk.c of Linux kernel
It is initially commited by
Rusty Russell <rusty@rustcorp.com.au> 2007-10-21 18:03:38
with SHA1 ID, e467cde238184d1b0923db2cd61ae1c5a6dc15aa
drivers/block/virtio_blk.c of Linux kernel does not have copyright notice.
* (C) Copyright 2012 Intel Corporation
* Author: Caz Yokoyama <Caz.Yokoyama@intel.com>
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>
#include "mic/micveth_dma.h"
#include "mic/micscif_intr.h"
#include "mic/mic_virtio.h"
#define SBOX_MMIO_LENGTH (64 * 1024)
#define VIRTQUEUE_LENGTH 128
#define MIC_VRING_ALIGN PAGE_SIZE
#define INTERRUPT_ID_FOR_VIRTBLK 3
extern int get_sbox_irq(int index
);
static int major
, index
= 0;
static long virtio_addr
= 0;
static mic_data_t virtblk_mic_data
;
struct virtio_device
*vdev
;
/* The disk structure for the kernel. */
/* virtual address of blk_config */
/* What host tells us, plus 2 for header & tailer. */
/* Scatterlist: can be too big for stack. */
struct scatterlist sg
[/*sg_elems*/];
struct virtio_blk_outhdr out_hdr
;
struct virtio_scsi_inhdr in_hdr
;
#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
/* The following vring_virtqueue and to_vvq() are copied from virtio_ring.c. Please name sure you have the same structure
as in virtio_ring.c. The reason why they are copied is that I don't want to change virtio_ring.c which is a symbolic link.
/* Actual memory layout for this queue */
/* Other side has made a mess, don't try any more. */
/* Host supports indirect buffers */
/* Number of free buffers */
/* Head of free buffer list. */
/* Number we've added since last sync. */
/* Last used index we've seen. */
/* How to notify other side. FIXME: commonalize hcalls! */
void (*notify
)(struct virtqueue
*vq
);
/* They're supposed to lock for us. */
struct _mic_ctx_t
*mic_ctx
;
/* Tokens for callbacks. */
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
static void blk_done(struct virtqueue
*vq
)
struct virtio_blk
*vblk
= vq
->vdev
->priv
;
spin_lock_irqsave(&vblk
->lock
, flags
);
while ((vbr
= virtqueue_get_buf(vblk
->vq
, &len
)) != NULL
) {
case VIRTIO_BLK_S_UNSUPP
:
if (blk_pc_request(vbr
->req
)) {
vbr
->req
->resid_len
= vbr
->in_hdr
.residual
;
vbr
->req
->sense_len
= vbr
->in_hdr
.sense_len
;
vbr
->req
->errors
= vbr
->in_hdr
.errors
;
__blk_end_request_all(vbr
->req
, error
);
mempool_free(vbr
, vblk
->pool
);
/* In case queue is stopped waiting for more buffers. */
blk_start_queue(vblk
->disk
->queue
);
spin_unlock_irqrestore(&vblk
->lock
, flags
);
static bool do_req(struct request_queue
*q
, struct virtio_blk
*vblk
,
unsigned long num
, out
= 0, in
= 0;
vbr
= mempool_alloc(vblk
->pool
, GFP_ATOMIC
);
/* When another request finishes we'll try again. */
if (req
->cmd_flags
& REQ_FLUSH
) {
vbr
->out_hdr
.type
= VIRTIO_BLK_T_FLUSH
;
vbr
->out_hdr
.ioprio
= req_get_ioprio(vbr
->req
);
vbr
->out_hdr
.sector
= blk_rq_pos(vbr
->req
);
vbr
->out_hdr
.ioprio
= req_get_ioprio(vbr
->req
);
vbr
->out_hdr
.type
= VIRTIO_BLK_T_SCSI_CMD
;
vbr
->out_hdr
.ioprio
= req_get_ioprio(vbr
->req
);
vbr
->out_hdr
.type
= VIRTIO_BLK_T_GET_ID
;
vbr
->out_hdr
.ioprio
= req_get_ioprio(vbr
->req
);
/* We don't put anything else in the queue. */
sg_set_buf(&vblk
->sg
[out
++], &vbr
->out_hdr
, sizeof(vbr
->out_hdr
));
* If this is a packet command we need a couple of additional headers.
* Behind the normal outhdr we put a segment with the scsi command
* block, and before the normal inhdr we put the sense data and the
* inhdr with additional status information before the normal inhdr.
if (blk_pc_request(vbr
->req
))
sg_set_buf(&vblk
->sg
[out
++], vbr
->req
->cmd
, vbr
->req
->cmd_len
);
num
= blk_rq_map_sg(q
, vbr
->req
, vblk
->sg
+ out
);
if (blk_pc_request(vbr
->req
)) {
sg_set_buf(&vblk
->sg
[num
+ out
+ in
++], vbr
->req
->sense
, 96);
sg_set_buf(&vblk
->sg
[num
+ out
+ in
++], &vbr
->in_hdr
,
sg_set_buf(&vblk
->sg
[num
+ out
+ in
++], &vbr
->status
,
if (rq_data_dir(vbr
->req
) == WRITE
) {
vbr
->out_hdr
.type
|= VIRTIO_BLK_T_OUT
;
vbr
->out_hdr
.type
|= VIRTIO_BLK_T_IN
;
if (virtqueue_add_buf(vblk
->vq
, vblk
->sg
, out
, in
, vbr
) < 0) {
mempool_free(vbr
, vblk
->pool
);
list_add_tail(&vbr
->list
, &vblk
->reqs
);
static void do_virtblk_request(struct request_queue
*q
)
struct virtio_blk
*vblk
= q
->queuedata
;
while ((req
= blk_peek_request(q
)) != NULL
) {
BUG_ON(req
->nr_phys_segments
+ 2 > vblk
->sg_elems
);
/* If this request fails, stop queue and wait for something to
if (!do_req(q
, vblk
, req
)) {
virtqueue_kick(vblk
->vq
);
set_capacity_from_host(struct virtio_blk
*vblk
)
struct virtio_device
*vdev
= vblk
->vdev
;
/* Host must always specify the capacity. */
vdev
->config
->get(vdev
, offsetof(struct virtio_blk_config
, capacity
),
printk(KERN_ERR
"Have you set virtblk file?\n");
/* If capacity is too big, truncate with warning. */
if ((sector_t
)cap
!= cap
) {
dev_warn(&vdev
->dev
, "Capacity %llu too large: truncating\n",
(unsigned long long)cap
);
set_capacity(vblk
->disk
, cap
);
virtblk_open(struct block_device
*bdev
, fmode_t mode
)
struct gendisk
*disk
= bdev
->bd_disk
;
struct virtio_blk
*vblk
= disk
->private_data
;
return set_capacity_from_host(vblk
);
static int virtblk_ioctl(struct block_device
*bdev
, fmode_t mode
,
unsigned cmd
, unsigned long data
)
struct gendisk
*disk
= bdev
->bd_disk
;
struct virtio_blk
*vblk
= disk
->private_data
;
* Only allow the generic SCSI ioctls if the host can support it.
if (!virtio_has_feature(vblk
->vdev
, VIRTIO_BLK_F_SCSI
))
return scsi_cmd_ioctl(disk
->queue
, disk
, mode
, cmd
,
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo(struct block_device
*bd
, struct hd_geometry
*geo
)
struct virtio_blk
*vblk
= bd
->bd_disk
->private_data
;
struct virtio_blk_geometry vgeo
;
/* see if the host passed in geometry config */
err
= virtio_config_val(vblk
->vdev
, VIRTIO_BLK_F_GEOMETRY
,
offsetof(struct virtio_blk_config
, geometry
),
geo
->sectors
= vgeo
.sectors
;
geo
->cylinders
= vgeo
.cylinders
;
/* some standard values, similar to sd */
geo
->cylinders
= get_capacity(bd
->bd_disk
) >> 11;
static const struct block_device_operations virtblk_fops
= {
.getgeo
= virtblk_getgeo
,
static int index_to_minor(int index
)
return index
<< PART_BITS
;
static inline bool more_used(const struct vring_virtqueue
*vq
)
return vq
->last_used_idx
!= vq
->vring
.used
->idx
;
mic_virtblk_intr_handler(int irq
, void *_vq
)
struct vring_virtqueue
*vq
= to_vvq(_vq
);
pr_debug("virtqueue interrupt with no work for %p\n", vq
);
if (unlikely(vq
->broken
))
pr_debug("virtqueue callback for %p (%p)\n", vq
, vq
->vq
.callback
);
vq
->vq
.callback(&vq
->vq
);
static int __devinit
virtblk_probe(struct virtio_device
*vdev
)
u32 v
, blk_size
, sg_elems
, opt_io_size
;
u8 physical_block_exp
, alignment_offset
;
struct board_info
*bd_info
= virtblk_mic_data
.dd_bi
[0];
struct vb_shared
*vb_shared
;
if (index_to_minor(index
) >= 1 << MINORBITS
)
vb_shared
= ((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
;
vdev
->features
[0] = readl(&vb_shared
->host_features
);
/* We need to know how many segments before we allocate. */
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_SEG_MAX
,
offsetof(struct virtio_blk_config
, seg_max
),
/* We need an extra sg elements at head and tail. */
vdev
->priv
= vblk
= kmalloc(sizeof(*vblk
) +
sizeof(vblk
->sg
[0]) * sg_elems
, GFP_KERNEL
);
INIT_LIST_HEAD(&vblk
->reqs
);
spin_lock_init(&vblk
->lock
);
vblk
->sg_elems
= sg_elems
;
sg_init_table(vblk
->sg
, vblk
->sg_elems
);
vblk
->sbox
= ioremap_nocache(SBOX_BASE
, SBOX_MMIO_LENGTH
);
printk(KERN_ERR
"%s: NULL SBOX ptr\n", __func__
);
/* We expect one virtqueue, for output. */
vblk
->vq
= virtio_find_single_vq(vdev
, blk_done
, "requests");
if ((err
= request_irq(get_sbox_irq(VIRTIO_SBOX_INT_IDX
),
mic_virtblk_intr_handler
, IRQF_DISABLED
,
"virtio intr", vblk
->vq
))) {
printk(KERN_ERR
"%s: can't register interrupt: %d\n", __func__
, err
);
vblk
->pool
= mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req
));
/* FIXME: How many partitions? How long is a piece of string? */
vblk
->disk
= alloc_disk(1 << PART_BITS
);
q
= vblk
->disk
->queue
= blk_init_queue(do_virtblk_request
, &vblk
->lock
);
sprintf(vblk
->disk
->disk_name
, "vd%c", 'a' + index
% 26);
} else if (index
< (26 + 1) * 26) {
sprintf(vblk
->disk
->disk_name
, "vd%c%c",
'a' + index
/ 26 - 1, 'a' + index
% 26);
const unsigned int m1
= (index
/ 26 - 1) / 26 - 1;
const unsigned int m2
= (index
/ 26 - 1) % 26;
const unsigned int m3
= index
% 26;
sprintf(vblk
->disk
->disk_name
, "vd%c%c%c",
'a' + m1
, 'a' + m2
, 'a' + m3
);
vblk
->disk
->major
= major
;
vblk
->disk
->first_minor
= index_to_minor(index
);
vblk
->disk
->private_data
= vblk
;
vblk
->disk
->fops
= &virtblk_fops
;
vblk
->disk
->driverfs_dev
= NULL
; // There is no parent device.
/* configure queue flush support */
if (virtio_has_feature(vdev
, VIRTIO_BLK_F_FLUSH
))
blk_queue_flush(q
, REQ_FLUSH
);
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev
, VIRTIO_BLK_F_RO
)) {
if (vdev
->config
->get_features(vdev
) & (1U << VIRTIO_BLK_F_RO
)) {
set_disk_ro(vblk
->disk
, 1);
err
= set_capacity_from_host(vblk
);
/* We can handle whatever the host told us to handle. */
blk_queue_max_segments(q
, vblk
->sg_elems
-2);
/* No need to bounce any requests */
blk_queue_bounce_limit(q
, BLK_BOUNCE_ANY
);
/* No real sector limit. */
blk_queue_max_hw_sectors(q
, -1U);
/* Host can optionally specify maximum segment size and number of
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_SIZE_MAX
,
offsetof(struct virtio_blk_config
, size_max
),
blk_queue_max_segment_size(q
, v
);
blk_queue_max_segment_size(q
, -1U);
/* Host can optionally specify the block size of the device */
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_BLK_SIZE
,
offsetof(struct virtio_blk_config
, blk_size
),
blk_queue_logical_block_size(q
, blk_size
);
blk_size
= queue_logical_block_size(q
);
/* Use topology information if available */
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
offsetof(struct virtio_blk_config
, physical_block_exp
),
if (!err
&& physical_block_exp
)
blk_queue_physical_block_size(q
,
blk_size
* (1 << physical_block_exp
));
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
offsetof(struct virtio_blk_config
, alignment_offset
),
if (!err
&& alignment_offset
)
blk_queue_alignment_offset(q
, blk_size
* alignment_offset
);
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
offsetof(struct virtio_blk_config
, min_io_size
),
blk_queue_io_min(q
, blk_size
* min_io_size
);
err
= virtio_config_val(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
offsetof(struct virtio_blk_config
, opt_io_size
),
blk_queue_io_opt(q
, blk_size
* opt_io_size
);
mempool_destroy(vblk
->pool
);
free_irq(get_sbox_irq(VIRTIO_SBOX_INT_IDX
), vblk
->vq
);
vdev
->config
->del_vqs(vdev
);
static void __devexit
virtblk_remove(struct virtio_device
*vdev
)
struct virtio_blk
*vblk
= vdev
->priv
;
/* Nothing should be pending. */
BUG_ON(!list_empty(&vblk
->reqs
));
free_irq(get_sbox_irq(VIRTIO_SBOX_INT_IDX
), vblk
->vq
);
/* Stop all the virtqueues. */
vdev
->config
->reset(vdev
);
blk_cleanup_queue(vblk
->disk
->queue
);
mempool_destroy(vblk
->pool
);
vdev
->config
->del_vqs(vdev
);
/* config->get_features() implementation */
static u32
virtblk_get_features(struct virtio_device
*vdev
)
/* When someone needs more than 32 feature bits, we'll need to
* steal a bit to indicate that the rest are somewhere else. */
struct board_info
*bd_info
= virtblk_mic_data
.dd_bi
[0];
struct vb_shared
*vb_shared
;
vb_shared
= ((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
;
return readl(&vb_shared
->host_features
);
/* virtio config->finalize_features() implementation */
static void virtblk_finalize_features(struct virtio_device
*vdev
)
struct board_info
*bd_info
= virtblk_mic_data
.dd_bi
[0];
struct vb_shared
*vb_shared
;
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev
);
/* We only support 32 feature bits. */
BUILD_BUG_ON(ARRAY_SIZE(vdev
->features
) != 1);
vb_shared
= ((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
;
writel(vdev
->features
[0], &vb_shared
->client_features
);
/* config->get() implementation */
static void virtblk_get(struct virtio_device
*vdev
, unsigned offset
,
struct board_info
*bd_info
= virtblk_mic_data
.dd_bi
[0];
struct vb_shared
*vb_shared
;
vb_shared
= ((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
;
ioaddr
= (void *)&vb_shared
->blk_config
+ offset
;
for (i
= 0; i
< len
; i
++)
ptr
[i
] = readb(ioaddr
+ i
);
static void virtblk_reset(struct virtio_device
*vdev
)
/* the notify function used when creating a virt queue */
static void virtblk_notify(struct virtqueue
*vq
)
struct virtio_blk
*vblk
= vq
->vdev
->priv
;
/* Ring host doorbell interrupt */
db_reg
= readl(vblk
->sbox
+ (SBOX_SDBIC0
+ (4 * doorbell
)))
writel(db_reg
, vblk
->sbox
+ (SBOX_SDBIC0
+ (4 * doorbell
)));
/* the config->del_vqs() implementation */
static void virtblk_del_vqs(struct virtio_device
*vdev
)
struct virtio_blk
*vblk
= vdev
->priv
;
size
= PAGE_ALIGN(vring_size(VIRTQUEUE_LENGTH
, MIC_VRING_ALIGN
));
free_pages_exact(vblk
->vq
->priv
, size
);
vring_del_virtqueue(vblk
->vq
);
/* the config->find_vqs() implementation */
static int virtblk_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
vq_callback_t
*callbacks
[],
struct virtio_blk
*vblk
= vdev
->priv
;
void *queue
; /* the virtual address of the ring queue */
struct vring_virtqueue
*vvq
;
struct board_info
*bd_info
= virtblk_mic_data
.dd_bi
[0];
size
= PAGE_ALIGN(vring_size(VIRTQUEUE_LENGTH
, MIC_VRING_ALIGN
));
queue
= alloc_pages_exact(size
, GFP_KERNEL
|__GFP_ZERO
);
vq
= vring_new_virtqueue(VIRTQUEUE_LENGTH
, MIC_VRING_ALIGN
,
vdev
, queue
, virtblk_notify
, callbacks
[0], names
[0]);
vring
= &((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
->vring
;
writel(vvq
->vring
.num
, &vring
->num
);
writeq(virt_to_phys(vvq
->vring
.desc
), &vring
->desc
);
writeq(virt_to_phys(vvq
->vring
.avail
), &vring
->avail
);
writeq(virt_to_phys(vvq
->vring
.used
), &vring
->used
);
free_pages_exact(queue
, size
);
static struct virtio_config_ops virtio_blk_config_ops
= {
// .get_status = vp_get_status,
// .set_status = vp_set_status,
.find_vqs
= virtblk_find_vqs
,
.del_vqs
= virtblk_del_vqs
,
.get_features
= virtblk_get_features
,
.finalize_features
= virtblk_finalize_features
,
static unsigned int features
[] = {
VIRTIO_BLK_F_SEG_MAX
, VIRTIO_BLK_F_SIZE_MAX
, VIRTIO_BLK_F_GEOMETRY
,
VIRTIO_BLK_F_RO
, VIRTIO_BLK_F_BLK_SIZE
, VIRTIO_BLK_F_SCSI
,
VIRTIO_BLK_F_FLUSH
, VIRTIO_BLK_F_TOPOLOGY
* virtio_blk causes spurious section mismatch warning by
* simultaneously referring to a __devinit and a __devexit function.
* Use __refdata to avoid this warning.
static struct virtio_driver __refdata virtio_blk
= {
.feature_table
= features
,
.feature_table_size
= ARRAY_SIZE(features
),
.driver
.name
= KBUILD_MODNAME
,
.driver
.owner
= THIS_MODULE
,
struct class block_class
= {
static struct device_type disk_type
= {
.groups = disk_attr_groups,
.devnode = block_devnode,
static int __init
init(void)
struct virtio_device
*vdev
;
struct mic_virtblk
*mic_virtblk
;
struct vb_shared
*vb_shared
;
printk(KERN_ERR
"virtio block device is not available on KNF\n");
major
= register_blkdev(0, "virtblk");
bd_info
= kmalloc(sizeof(bd_info_t
), GFP_KERNEL
);
memset(bd_info
, 0, sizeof(*bd_info
));
virtblk_mic_data
.dd_numdevs
= 1;
virtblk_mic_data
.dd_bi
[0] = bd_info
;
bd_info
->bi_ctx
.bi_id
= 0;
mic_virtblk
= kmalloc(sizeof(*mic_virtblk
), GFP_KERNEL
);
if (mic_virtblk
== NULL
) {
memset(mic_virtblk
, 0, sizeof(*mic_virtblk
));
bd_info
->bi_virtio
= (void *)mic_virtblk
;
printk(KERN_ERR
"virtio address is not passed from host\n");
vb_shared
= ioremap_nocache(virtio_addr
, sizeof(*vb_shared
));
vb_shared
->update
= true;
mic_virtblk
->vb_shared
= vb_shared
;
vdev
= kmalloc(sizeof(*vdev
), GFP_KERNEL
);
memset(vdev
, 0, sizeof(*vdev
));
vdev
->config
= &virtio_blk_config_ops
;
INIT_LIST_HEAD(&vdev
->vqs
);
vdev
->dev
.driver
= &virtio_blk
.driver
;
vdev
->dev
.class = &block_class
;
vdev
->dev
.type
= &disk_type
;
device_initialize(&vdev
->dev
);
mic_virtblk
->vdev
= (void *)vdev
;
return virtblk_probe(vdev
);
kfree(bd_info
->bi_virtio
);
static void __exit
fini(void)
bd_info_t
*bd_info
= virtblk_mic_data
.dd_bi
[0];
struct mic_virtblk
*mic_virtblk
= (struct mic_virtblk
*)bd_info
->bi_virtio
;
unregister_blkdev(major
, "virtblk");
virtblk_remove(mic_virtblk
->vdev
);
iounmap(mic_virtblk
->vb_shared
);
kfree(mic_virtblk
->vdev
);
kfree(bd_info
->bi_virtio
);
MODULE_DESCRIPTION("Virtio block driver");
MODULE_PARM_DESC(virtio_addr
, "address of virtio related structure");
module_param(virtio_addr
, long, S_IRUGO
);