* Copyright (C) 2009 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
* This work is licensed under the terms of the GNU GPL, version 2.
* (C) Badari Pulavarty pbadari@us.ibm.com 2010 with the following comment.
* He posted on http://lwn.net/Articles/382543/
* virtio-block server in host kernel.
* Inspired by vhost-net and shamlessly ripped code from it :)
* (C) Copyright 2012 Intel Corporation
* Author: Caz Yokoyama <Caz.Yokoyama@intel.com>
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34) || \
defined(RHEL_RELEASE_CODE)
#include <linux/compat.h>
#include <linux/eventfd.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_net.h>
#include <linux/virtio_blk.h>
#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/rcupdate.h>
#include <linux/fdtable.h>
#ifndef VIRTIO_RING_F_EVENT_IDX /* virtio_ring.h of rhel6.0 does not define */
#define VIRTIO_RING_F_EVENT_IDX 29
#include "mic/micveth_dma.h"
#include "mic/mic_virtio.h"
#define MIC_SECTOR_SHIFT 9
#define MIC_SECTOR_SIZE (1UL << MIC_SECTOR_SHIFT)
#define VIRTIO_BLK_QUEUE_SIZE 128
#define DISK_SEG_MAX (VIRTIO_BLK_QUEUE_SIZE - 2)
#define VHOST_BLK_VQ_MAX 1
struct vhost_virtqueue vqs
[VHOST_BLK_VQ_MAX
];
struct vhost_poll poll
[VHOST_BLK_VQ_MAX
];
struct workqueue_struct
*vb_wq
;
char vb_wqname
[WQNAME_SIZE
];
struct work_struct vb_ws_bh
;
struct workqueue_struct
*vblk_workqueue
;
struct board_info
*bd_info
;
struct file
*virtblk_file
;
#define mic_addr_in_host(va, pa) ((u8 *)(va) + (u64)(pa))
static LIST_HEAD(write_queue
);
static LIST_HEAD(read_queue
);
cleanup_vblk_workqueue(struct vhost_blk_io
*vbio
, struct vhost_virtqueue
*vq
)
struct list_head single
, *head
, *node
, *tmp
;
struct vhost_blk_io
*entry
;
list_add(&vbio
->list
, &single
);
list_for_each_safe(node
, tmp
, head
) {
entry
= list_entry(node
, struct vhost_blk_io
, list
);
mutex_unlock(&vq
->mutex
);
static void handle_io_work(struct work_struct
*work
)
struct vhost_blk_io
*vbio
, *entry
;
struct vhost_virtqueue
*vq
;
struct list_head single
, *head
, *node
, *tmp
;
vbio
= container_of(work
, struct vhost_blk_io
, work
);
pos
= vbio
->sector
<< MIC_SECTOR_SHIFT
;
aper_va
= blk
->bd_info
->bi_ctx
.aper
.va
;
vring
= &((struct mic_virtblk
*)blk
->bd_info
->bi_virtio
)->vb_shared
.vring
;
num
= readl(&vring
->num
);
if (num
== 0 || micpm_get_reference(&blk
->bd_info
->bi_ctx
, true)) {
cleanup_vblk_workqueue(vbio
, vq
);
if (atomic64_read(&vbio
->file
->f_count
) == 0) { /* file is closed */
} else if (vbio
->type
& VIRTIO_BLK_T_FLUSH
) {
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
ret
= vfs_fsync(vbio
->file
, 1);
ret
= vfs_fsync(vbio
->file
, vbio
->file
->f_path
.dentry
, 1);
ret
= vfs_fsync(vbio
->file
, 1);
} else if (vbio
->type
& VIRTIO_BLK_T_OUT
) {
for (iov
= vbio
->iov
; iov
< &vbio
->iov
[vbio
->nvecs
]; iov
++) {
iov
->iov_base
= mic_addr_in_host(aper_va
, iov
->iov_base
);
iov_iter_init(&iter
, WRITE
, vbio
->iov
, vbio
->nvecs
, iov_length(vbio
->iov
, vbio
->nvecs
));
ret
= vfs_iter_write(vbio
->file
, &iter
, &pos
, 0);
for (iov
= vbio
->iov
; iov
< &vbio
->iov
[vbio
->nvecs
]; iov
++) {
iov
->iov_base
= mic_addr_in_host(aper_va
, iov
->iov_base
);
iov_iter_init(&iter
, READ
, vbio
->iov
, vbio
->nvecs
, iov_length(vbio
->iov
, vbio
->nvecs
));
ret
= vfs_iter_read(vbio
->file
, &iter
, &pos
, 0);
status
= (ret
< 0) ? VIRTIO_BLK_S_IOERR
: VIRTIO_BLK_S_OK
;
list_add(&vbio
->list
, &single
);
list_for_each_entry(entry
, head
, list
) {
memcpy_toio(mic_addr_in_host(aper_va
, entry
->iov
[entry
->nvecs
].iov_base
), &status
, sizeof(status
));
list_for_each_safe(node
, tmp
, head
) {
entry
= list_entry(node
, struct vhost_blk_io
, list
);
vhost_add_used_and_signal(&blk
->dev
, vq
, entry
->head
, ret
);
mutex_unlock(&vq
->mutex
);
micpm_put_reference(&blk
->bd_info
->bi_ctx
);
static struct vhost_blk_io
*allocate_vbio(int nvecs
)
struct vhost_blk_io
*vbio
;
int size
= sizeof(struct vhost_blk_io
) + nvecs
* sizeof(struct iovec
);
vbio
= kmalloc(size
, GFP_KERNEL
);
INIT_WORK(&vbio
->work
, handle_io_work
);
INIT_LIST_HEAD(&vbio
->list
);
static void merge_and_handoff_work(struct list_head
*queue
)
struct vhost_blk_io
*vbio
, *entry
;
list_for_each_entry(entry
, queue
, list
) {
vbio
= list_first_entry(queue
, struct vhost_blk_io
, list
);
queue_work(vbio
->blk
->vblk_workqueue
, &vbio
->work
);
vbio
= allocate_vbio(nvecs
);
/* Unable to allocate memory - submit IOs individually */
list_for_each_entry(vbio
, queue
, list
) {
queue_work(vbio
->blk
->vblk_workqueue
, &vbio
->work
);
entry
= list_first_entry(queue
, struct vhost_blk_io
, list
);
vbio
->file
= entry
->file
;
vbio
->type
= entry
->type
;
vbio
->sector
= entry
->sector
;
list_for_each_entry(entry
, queue
, list
) {
memcpy(&vbio
->iov
[nvecs
], entry
->iov
, entry
->nvecs
* sizeof(struct iovec
));
list_replace_init(queue
, &vbio
->list
);
queue_work(vbio
->blk
->vblk_workqueue
, &vbio
->work
);
static void start_io(struct list_head
*queue
)
struct vhost_blk_io
*vbio
= NULL
, *entry
;
list_for_each_entry(entry
, queue
, list
) {
if (vbio
->sector
+ (vbio
->len
>> MIC_SECTOR_SHIFT
) == entry
->sector
) {
list_cut_position(&start
, queue
, &vbio
->list
);
merge_and_handoff_work(&start
);
merge_and_handoff_work(queue
);
static uint64_t calculate_len(struct iovec
*iov
, int nvecs
)
static void insert_to_queue(struct vhost_blk_io
*vbio
,
struct vhost_blk_io
*entry
;
list_for_each_entry(entry
, queue
, list
) {
if (entry
->sector
> vbio
->sector
)
list_add_tail(&vbio
->list
, &entry
->list
);
static int handoff_io(struct vhost_blk
*blk
, int head
,
uint32_t type
, uint64_t sector
,
struct iovec
*iov
, int nvecs
)
struct vhost_virtqueue
*vq
= &blk
->dev
.vqs
[0];
struct vhost_blk_io
*vbio
;
vbio
= allocate_vbio(nvecs
+1);
vbio
->file
= vq
->private_data
;
vbio
->len
= calculate_len(iov
, nvecs
);
memcpy(vbio
->iov
, iov
, (nvecs
+ 1) * sizeof(struct iovec
));
if (vbio
->type
& VIRTIO_BLK_T_FLUSH
) {
/* Sync called - do I need to submit IOs in the queue ? */
queue_work(blk
->vblk_workqueue
, &vbio
->work
);
} else if (vbio
->type
& VIRTIO_BLK_T_OUT
) {
insert_to_queue(vbio
, &write_queue
);
insert_to_queue(vbio
, &read_queue
);
static void handle_blk(struct vhost_blk
*blk
)
struct vhost_virtqueue
*vq
= &blk
->dev
.vqs
[0];
struct virtio_blk_outhdr hdr
;
struct board_info
*bd_info
= blk
->bd_info
;
vring
= &((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
.vring
;
if (vring
== 0 || readl(&vring
->num
) == 0) {
printk("request comes in while card side driver is not loaded yet. Ignore\n");
/* the first time since the card side driver becomes ready */
if (vq
->desc
== NULL
|| readb(&((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
.update
)) {
vq
->num
= readl(&vring
->num
);
vq
->desc
= (struct vring_desc
*)readq(&vring
->desc
);
vq
->avail
= (struct vring_avail
*)readq(&vring
->avail
);
vq
->used
= (struct vring_used
*)readq(&vring
->used
);
vq
->signalled_used_valid
= false;
writeb(false, &((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
.update
);
if (micpm_get_reference(&blk
->bd_info
->bi_ctx
, true))
vhost_disable_notify(&blk
->dev
, vq
);
head
= vhost_get_vq_desc(&blk
->dev
, vq
, vq
->iov
,
if ((head
== vq
->num
) || (head
== -EFAULT
) || (head
== -EINVAL
)) {
if (unlikely(vhost_enable_notify(&blk
->dev
, vq
))) {
vhost_disable_notify(&blk
->dev
, vq
);
BUG_ON(vq
->iov
[0].iov_len
!= 16);
memcpy_fromio(&hdr
, mic_addr_in_host(bd_info
->bi_ctx
.aper
.va
, vq
->iov
[0].iov_base
),
if (hdr
.type
== VIRTIO_BLK_T_IN
)
BUG_ON(vq
->iov
[nvecs
+1].iov_len
!= 1);
if (handoff_io(blk
, head
, hdr
.type
, hdr
.sector
, &vq
->iov
[1], nvecs
) < 0) {
vhost_discard_vq_desc(vq
, 1);
mutex_unlock(&vq
->mutex
);
micpm_put_reference(&blk
->bd_info
->bi_ctx
);
static void handle_blk_kick(struct work_struct
*work
)
vblk
= container_of(work
, struct vhost_blk
, vb_ws_bh
);
static void handle_rq_blk(struct vhost_work
*work
)
blk
= container_of(work
, struct vhost_blk
, poll
[0].work
);
vhost_doorbell_intr_handler(mic_ctx_t
*mic_ctx
, int doorbell
)
bi
= container_of(mic_ctx
, struct board_info
, bi_ctx
);
vblk
= ((struct mic_virtblk
*)bi
->bi_virtio
)->vblk
;
queue_work(vblk
->vb_wq
, &vblk
->vb_ws_bh
);
static long vhost_blk_set_backend(struct vhost_blk
*vblk
)
struct vhost_virtqueue
*vq
;
struct board_info
*bd_info
= vblk
->bd_info
;
unsigned index
= bd_info
->bi_ctx
.bi_id
;
struct vb_shared
*vb_shared
;
unsigned int virtio_blk_features
= (1U << VIRTIO_BLK_F_SEG_MAX
) |
(1U << VIRTIO_BLK_F_BLK_SIZE
);
if (index
>= MAX_BOARD_SUPPORTED
) {
if (vblk
->virtblk_file
== NULL
) {
rcu_assign_pointer(vq
->private_data
, vblk
->virtblk_file
);
mutex_unlock(&vq
->mutex
);
snprintf(vblk
->vb_wqname
, sizeof(vblk
->vb_wqname
),
vblk
->vb_wq
= __mic_create_singlethread_workqueue(vblk
->vb_wqname
);
if (vblk
->vb_wq
== NULL
) {
INIT_WORK(&vblk
->vb_ws_bh
, handle_blk_kick
);
/* They have to be accessed from "struct vhost_virtqueue *vq" in mic_vhost.c.
They are not used in vhost block. I don't modify vhost.h. */
vq
->log_base
= (void __user
*)&bd_info
->bi_ctx
;
vq
->log_addr
= (u64
)bd_info
->bi_ctx
.aper
.va
;
vb_shared
= &((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0))
virtio_blk_features
|= (1U << VIRTIO_BLK_F_FLUSH
);
writel(virtio_blk_features
, &vb_shared
->host_features
);
writel(DISK_SEG_MAX
, &vb_shared
->blk_config
.seg_max
);
writel(MIC_SECTOR_SIZE
, &vb_shared
->blk_config
.blk_size
);
ret
= vfs_getattr(&vblk
->virtblk_file
->f_path
, &stat
, STATX_BASIC_STATS
, AT_STATX_SYNC_AS_STAT
);
if (S_ISBLK(stat
.mode
)) {
writel(i_size_read(I_BDEV(vblk
->virtblk_file
->f_mapping
->host
)->bd_inode
) / MIC_SECTOR_SIZE
,
&vb_shared
->blk_config
.capacity
);
writel(stat
.size
/ MIC_SECTOR_SIZE
, &vb_shared
->blk_config
.capacity
);
ret
= mic_reg_irqhandler(&bd_info
->bi_ctx
, MIC_IRQ_DB2
, "Host DoorBell 2",
vhost_doorbell_intr_handler
);
mic_vhost_blk_stop(bd_info_t
*bd_info
)
struct vring
*vring
= &((struct mic_virtblk
*)bd_info
->bi_virtio
)->vb_shared
.vring
;
writel(0, &vring
->num
); /* reject subsequent request from MIC card */
extern bd_info_t
*dev_to_bdi(struct device
*dev
);
show_virtblk_file(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
struct board_info
*bd_info
= dev_to_bdi(dev
);
struct mic_virtblk
*mic_virtblk
;
mic_virtblk
= bd_info
->bi_virtio
;
BUG_ON(mic_virtblk
== NULL
);
vblk
= mic_virtblk
->vblk
;
if (vblk
->file_name
!= NULL
)
return snprintf(buf
, PAGE_SIZE
, "%s\n", vblk
->file_name
);
store_virtblk_file(struct device
*dev
, struct device_attribute
*attr
,
const char *buf
, size_t count
)
struct board_info
*bd_info
= dev_to_bdi(dev
);
struct mic_virtblk
*mic_virtblk
;
struct vhost_virtqueue
*vq
;
struct file
*virtblk_file
;
mic_virtblk
= bd_info
->bi_virtio
;
BUG_ON(mic_virtblk
== NULL
);
vblk
= mic_virtblk
->vblk
;
if (vblk
->virtblk_file
!= NULL
) { /* if virtblk file is already assigned */
printk(KERN_ALERT
"you are changing virtblk file: %s -> %s.\n", vblk
->file_name
, buf
);
filp_close(vblk
->virtblk_file
, current
->files
);
vblk
->virtblk_file
= NULL
;
vblk
->file_name
= kmalloc(count
+ 1, GFP_KERNEL
);
strcpy(vblk
->file_name
, buf
);
virtblk_file
= filp_open(vblk
->file_name
, O_RDWR
|O_LARGEFILE
, 0);
if (IS_ERR(virtblk_file
)) {
ret
= PTR_ERR(virtblk_file
);
mutex_unlock(&vq
->mutex
);
vblk
->virtblk_file
= virtblk_file
;
mutex_unlock(&vq
->mutex
);
ret
= vhost_blk_set_backend(vblk
);
filp_close(vblk
->virtblk_file
, current
->files
);
int mic_vhost_blk_probe(bd_info_t
*bd_info
)
struct mic_virtblk
*mic_virtblk
;
mic_virtblk
= kzalloc(sizeof(*mic_virtblk
), GFP_KERNEL
);
if (mic_virtblk
== NULL
) {
bd_info
->bi_virtio
= mic_virtblk
;
vblk
= kzalloc(sizeof *vblk
, GFP_KERNEL
);
mic_virtblk
->vblk
= vblk
;
ret
= vhost_dev_init(&vblk
->dev
, vblk
->vqs
, VHOST_BLK_VQ_MAX
);
vhost_poll_init(vblk
->poll
, handle_rq_blk
, POLLOUT
|POLLIN
, &vblk
->dev
);
BUG_ON(bd_info
->bi_ctx
.bi_id
>= 1000);
snprintf(wq_name
, ARRAY_SIZE(wq_name
), "vblk%03d", bd_info
->bi_ctx
.bi_id
);
vblk
->vblk_workqueue
= __mic_create_singlethread_workqueue(wq_name
);
if (vblk
->vblk_workqueue
== NULL
) {
void mic_vhost_blk_remove(bd_info_t
*bd_info
)
struct mic_virtblk
*mic_virtblk
= bd_info
->bi_virtio
;
struct vhost_blk
*vblk
= mic_virtblk
->vblk
;
struct vb_shared
*vb_shared
= &mic_virtblk
->vb_shared
;
if (vblk
->virtblk_file
!= NULL
) {
mic_unreg_irqhandler(&bd_info
->bi_ctx
, MIC_IRQ_DB2
, "Host DoorBell 2");
memset(&vb_shared
->blk_config
, 0, sizeof(vb_shared
->blk_config
));
destroy_workqueue(vblk
->vb_wq
);
if (vblk
->vqs
[0].private_data
!= NULL
)
fput(vblk
->vqs
[0].private_data
);
filp_close(vblk
->virtblk_file
, current
->files
);
vhost_dev_cleanup(&vblk
->dev
);
destroy_workqueue(vblk
->vblk_workqueue
);