Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* Copyright (C) 2009 Red Hat, Inc. |
2 | * Copyright (C) 2006 Rusty Russell IBM Corporation | |
3 | * | |
4 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
5 | * | |
6 | * (C) Badari Pulavarty pbadari@us.ibm.com 2010 with the following comment. | |
7 | * Inspiration, some code, and most witty comments come from | |
8 | * Documentation/lguest/lguest.c, by Rusty Russell | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. | |
11 | ||
12 | * For adapting to MIC | |
13 | * (C) Copyright 2012 Intel Corporation | |
14 | * Author: Caz Yokoyama <Caz.Yokoyama@intel.com> | |
15 | * | |
16 | * Generic code for virtio server in host kernel. | |
17 | */ | |
18 | ||
19 | #include <linux/version.h> | |
20 | #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) || \ | |
21 | defined(RHEL_RELEASE_CODE) | |
22 | ||
23 | #include <linux/eventfd.h> | |
24 | #ifdef RHEL_RELEASE_CODE | |
25 | #include <linux/vhost.h> | |
26 | #else | |
27 | #include "./linux/vhost.h" | |
28 | #endif | |
29 | #include <linux/virtio_net.h> | |
30 | #include <linux/mm.h> | |
31 | #include <linux/mmu_context.h> | |
32 | #include <linux/miscdevice.h> | |
33 | #include <linux/mutex.h> | |
34 | #include <linux/rcupdate.h> | |
35 | #include <linux/poll.h> | |
36 | #include <linux/file.h> | |
37 | #include <linux/highmem.h> | |
38 | #include <linux/slab.h> | |
39 | #include <linux/kthread.h> | |
40 | #include <linux/cgroup.h> | |
41 | ||
42 | #include <linux/net.h> | |
43 | #include <linux/if_packet.h> | |
44 | #include <linux/if_arp.h> | |
45 | ||
46 | #include <net/sock.h> | |
47 | ||
48 | #ifndef VIRTIO_RING_F_EVENT_IDX /* virtio_ring.h of rhel6.0 does not define */ | |
49 | #define VIRTIO_RING_F_EVENT_IDX 29 | |
50 | #endif | |
51 | #include "vhost.h" | |
52 | #include "mic/micveth_dma.h" | |
53 | ||
54 | #define mic_addr_in_host(va, pa) ((u8 *)(va) + (u64)(pa)) | |
55 | ||
56 | enum { | |
57 | VHOST_MEMORY_MAX_NREGIONS = 64, | |
58 | VHOST_MEMORY_F_LOG = 0x1, | |
59 | }; | |
60 | ||
61 | #if 0 | |
62 | static unsigned vhost_zcopy_mask __read_mostly; | |
63 | #endif | |
64 | ||
65 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | |
66 | poll_table *pt) | |
67 | { | |
68 | struct vhost_poll *poll; | |
69 | poll = container_of(pt, struct vhost_poll, table); | |
70 | ||
71 | poll->wqh = wqh; | |
72 | add_wait_queue(wqh, &poll->wait); | |
73 | } | |
74 | ||
75 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, | |
76 | void *key) | |
77 | { | |
78 | struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); | |
79 | ||
80 | if (!((unsigned long)key & poll->mask)) | |
81 | return 0; | |
82 | ||
83 | vhost_poll_queue(poll); | |
84 | return 0; | |
85 | } | |
86 | ||
87 | static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) | |
88 | { | |
89 | INIT_LIST_HEAD(&work->node); | |
90 | work->fn = fn; | |
91 | init_waitqueue_head(&work->done); | |
92 | work->flushing = 0; | |
93 | work->queue_seq = work->done_seq = 0; | |
94 | } | |
95 | ||
96 | /* Init poll structure */ | |
97 | void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, | |
98 | unsigned long mask, struct vhost_dev *dev) | |
99 | { | |
100 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); | |
101 | init_poll_funcptr(&poll->table, vhost_poll_func); | |
102 | poll->mask = mask; | |
103 | poll->dev = dev; | |
104 | ||
105 | vhost_work_init(&poll->work, fn); | |
106 | } | |
107 | ||
108 | #if 0 | |
109 | /* Start polling a file. We add ourselves to file's wait queue. The caller must | |
110 | * keep a reference to a file until after vhost_poll_stop is called. */ | |
111 | void vhost_poll_start(struct vhost_poll *poll, struct file *file) | |
112 | { | |
113 | unsigned long mask; | |
114 | mask = file->f_op->poll(file, &poll->table); | |
115 | if (mask) | |
116 | vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); | |
117 | } | |
118 | #endif | |
119 | ||
120 | /* Stop polling a file. After this function returns, it becomes safe to drop the | |
121 | * file reference. You must also flush afterwards. */ | |
122 | void vhost_poll_stop(struct vhost_poll *poll) | |
123 | { | |
124 | remove_wait_queue(poll->wqh, &poll->wait); | |
125 | } | |
126 | ||
127 | static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, | |
128 | unsigned seq) | |
129 | { | |
130 | int left; | |
131 | spin_lock_irq(&dev->work_lock); | |
132 | left = seq - work->done_seq; | |
133 | spin_unlock_irq(&dev->work_lock); | |
134 | return left <= 0; | |
135 | } | |
136 | ||
137 | static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) | |
138 | { | |
139 | unsigned seq; | |
140 | int flushing; | |
141 | ||
142 | spin_lock_irq(&dev->work_lock); | |
143 | seq = work->queue_seq; | |
144 | work->flushing++; | |
145 | spin_unlock_irq(&dev->work_lock); | |
146 | wait_event(work->done, vhost_work_seq_done(dev, work, seq)); | |
147 | spin_lock_irq(&dev->work_lock); | |
148 | flushing = --work->flushing; | |
149 | spin_unlock_irq(&dev->work_lock); | |
150 | BUG_ON(flushing < 0); | |
151 | } | |
152 | ||
153 | /* Flush any work that has been scheduled. When calling this, don't hold any | |
154 | * locks that are also used by the callback. */ | |
155 | void vhost_poll_flush(struct vhost_poll *poll) | |
156 | { | |
157 | vhost_work_flush(poll->dev, &poll->work); | |
158 | } | |
159 | ||
160 | static inline void vhost_work_queue(struct vhost_dev *dev, | |
161 | struct vhost_work *work) | |
162 | { | |
163 | unsigned long flags; | |
164 | ||
165 | spin_lock_irqsave(&dev->work_lock, flags); | |
166 | if (list_empty(&work->node)) { | |
167 | list_add_tail(&work->node, &dev->work_list); | |
168 | work->queue_seq++; | |
169 | wake_up_process(dev->worker); | |
170 | } | |
171 | spin_unlock_irqrestore(&dev->work_lock, flags); | |
172 | } | |
173 | ||
174 | void vhost_poll_queue(struct vhost_poll *poll) | |
175 | { | |
176 | vhost_work_queue(poll->dev, &poll->work); | |
177 | } | |
178 | ||
179 | static void vhost_vq_reset(struct vhost_dev *dev, | |
180 | struct vhost_virtqueue *vq) | |
181 | { | |
182 | vq->num = 1; | |
183 | vq->desc = NULL; | |
184 | vq->avail = NULL; | |
185 | vq->used = NULL; | |
186 | vq->last_avail_idx = 0; | |
187 | vq->avail_idx = 0; | |
188 | vq->last_used_idx = 0; | |
189 | vq->signalled_used = 0; | |
190 | vq->signalled_used_valid = false; | |
191 | vq->used_flags = 0; | |
192 | vq->log_used = false; | |
193 | vq->log_addr = -1ull; | |
194 | vq->vhost_hlen = 0; | |
195 | vq->sock_hlen = 0; | |
196 | vq->private_data = NULL; | |
197 | vq->log_base = NULL; | |
198 | vq->error_ctx = NULL; | |
199 | vq->error = NULL; | |
200 | vq->kick = NULL; | |
201 | vq->call_ctx = NULL; | |
202 | vq->call = NULL; | |
203 | vq->log_ctx = NULL; | |
204 | vq->upend_idx = 0; | |
205 | vq->done_idx = 0; | |
206 | vq->ubufs = NULL; | |
207 | } | |
208 | ||
209 | static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) | |
210 | { | |
211 | kfree(vq->indirect); | |
212 | vq->indirect = NULL; | |
213 | kfree(vq->log); | |
214 | vq->log = NULL; | |
215 | kfree(vq->heads); | |
216 | vq->heads = NULL; | |
217 | kfree(vq->ubuf_info); | |
218 | vq->ubuf_info = NULL; | |
219 | } | |
220 | ||
221 | #if 0 | |
222 | void vhost_enable_zcopy(int vq) | |
223 | { | |
224 | vhost_zcopy_mask |= 0x1 << vq; | |
225 | } | |
226 | #endif | |
227 | ||
228 | static void vhost_dev_free_iovecs(struct vhost_dev *dev) | |
229 | { | |
230 | int i; | |
231 | for (i = 0; i < dev->nvqs; ++i) | |
232 | vhost_vq_free_iovecs(&dev->vqs[i]); | |
233 | } | |
234 | ||
235 | long vhost_dev_init(struct vhost_dev *dev, | |
236 | struct vhost_virtqueue *vqs, int nvqs) | |
237 | { | |
238 | int i; | |
239 | ||
240 | dev->vqs = vqs; | |
241 | dev->nvqs = nvqs; | |
242 | mutex_init(&dev->mutex); | |
243 | dev->log_ctx = NULL; | |
244 | dev->log_file = NULL; | |
245 | dev->memory = NULL; | |
246 | dev->mm = NULL; | |
247 | spin_lock_init(&dev->work_lock); | |
248 | INIT_LIST_HEAD(&dev->work_list); | |
249 | dev->worker = NULL; | |
250 | ||
251 | for (i = 0; i < dev->nvqs; ++i) { | |
252 | dev->vqs[i].log = NULL; | |
253 | dev->vqs[i].indirect = NULL; | |
254 | dev->vqs[i].heads = NULL; | |
255 | dev->vqs[i].ubuf_info = NULL; | |
256 | dev->vqs[i].dev = dev; | |
257 | mutex_init(&dev->vqs[i].mutex); | |
258 | vhost_vq_reset(dev, dev->vqs + i); | |
259 | if (dev->vqs[i].handle_kick) | |
260 | vhost_poll_init(&dev->vqs[i].poll, | |
261 | dev->vqs[i].handle_kick, POLLIN, dev); | |
262 | } | |
263 | ||
264 | return 0; | |
265 | } | |
266 | ||
267 | #if 0 | |
268 | /* Caller should have device mutex */ | |
269 | long vhost_dev_check_owner(struct vhost_dev *dev) | |
270 | { | |
271 | /* Are you the owner? If not, I don't think you mean to do that */ | |
272 | return dev->mm == current->mm ? 0 : -EPERM; | |
273 | } | |
274 | #endif | |
275 | ||
276 | struct vhost_attach_cgroups_struct { | |
277 | struct vhost_work work; | |
278 | struct task_struct *owner; | |
279 | int ret; | |
280 | }; | |
281 | ||
282 | #if 0 | |
283 | /* Caller should have device mutex */ | |
284 | long vhost_dev_reset_owner(struct vhost_dev *dev) | |
285 | { | |
286 | struct vhost_memory *memory; | |
287 | ||
288 | /* Restore memory to default empty mapping. */ | |
289 | memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); | |
290 | if (!memory) | |
291 | return -ENOMEM; | |
292 | ||
293 | vhost_dev_cleanup(dev); | |
294 | ||
295 | memory->nregions = 0; | |
296 | dev->memory = memory; | |
297 | return 0; | |
298 | } | |
299 | #endif | |
300 | ||
301 | /* In case of DMA done not in order in lower device driver for some reason. | |
302 | * upend_idx is used to track end of used idx, done_idx is used to track head | |
303 | * of used idx. Once lower device DMA done contiguously, we will signal KVM | |
304 | * guest used idx. | |
305 | */ | |
306 | int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq) | |
307 | { | |
308 | int i; | |
309 | int j = 0; | |
310 | ||
311 | for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { | |
312 | if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) { | |
313 | vq->heads[i].len = VHOST_DMA_CLEAR_LEN; | |
314 | vhost_add_used_and_signal(vq->dev, vq, | |
315 | vq->heads[i].id, 0); | |
316 | ++j; | |
317 | } else | |
318 | break; | |
319 | } | |
320 | if (j) | |
321 | vq->done_idx = i; | |
322 | return j; | |
323 | } | |
324 | ||
325 | /* Caller should have device mutex */ | |
326 | void vhost_dev_cleanup(struct vhost_dev *dev) | |
327 | { | |
328 | int i; | |
329 | for (i = 0; i < dev->nvqs; ++i) { | |
330 | if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { | |
331 | vhost_poll_stop(&dev->vqs[i].poll); | |
332 | vhost_poll_flush(&dev->vqs[i].poll); | |
333 | } | |
334 | BUG_ON(dev->vqs[i].ubufs != NULL); | |
335 | ||
336 | /* Signal guest as appropriate. */ | |
337 | vhost_zerocopy_signal_used(&dev->vqs[i]); | |
338 | ||
339 | if (dev->vqs[i].error_ctx) | |
340 | eventfd_ctx_put(dev->vqs[i].error_ctx); | |
341 | if (dev->vqs[i].error) | |
342 | fput(dev->vqs[i].error); | |
343 | if (dev->vqs[i].kick) | |
344 | fput(dev->vqs[i].kick); | |
345 | if (dev->vqs[i].call_ctx) | |
346 | eventfd_ctx_put(dev->vqs[i].call_ctx); | |
347 | if (dev->vqs[i].call) | |
348 | fput(dev->vqs[i].call); | |
349 | vhost_vq_reset(dev, dev->vqs + i); | |
350 | } | |
351 | vhost_dev_free_iovecs(dev); | |
352 | if (dev->log_ctx) | |
353 | eventfd_ctx_put(dev->log_ctx); | |
354 | dev->log_ctx = NULL; | |
355 | if (dev->log_file) | |
356 | fput(dev->log_file); | |
357 | dev->log_file = NULL; | |
358 | /* No one will access memory at this point */ | |
359 | kfree(dev->memory); | |
360 | dev->memory = NULL; | |
361 | WARN_ON(!list_empty(&dev->work_list)); | |
362 | if (dev->worker) { | |
363 | kthread_stop(dev->worker); | |
364 | dev->worker = NULL; | |
365 | } | |
366 | if (dev->mm) | |
367 | mmput(dev->mm); | |
368 | dev->mm = NULL; | |
369 | } | |
370 | ||
371 | #if 0 | |
372 | /* Caller must have device mutex */ | |
373 | long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) | |
374 | { | |
375 | return 0; | |
376 | } | |
377 | #endif | |
378 | ||
379 | static int vhost_update_used_flags(struct vhost_virtqueue *vq) | |
380 | { | |
381 | iowrite16(vq->used_flags, mic_addr_in_host(vq->log_addr, &vq->used->flags)); | |
382 | return 0; | |
383 | } | |
384 | ||
385 | #if 0 | |
386 | int vhost_init_used(struct vhost_virtqueue *vq) | |
387 | { | |
388 | int r; | |
389 | if (!vq->private_data) | |
390 | return 0; | |
391 | ||
392 | r = vhost_update_used_flags(vq); | |
393 | if (r) | |
394 | return r; | |
395 | vq->signalled_used_valid = false; | |
396 | vq->last_used_idx = ioread16(mic_addr_in_host(vq->log_addr, &vq->used->idx)); | |
397 | return 0; | |
398 | } | |
399 | #endif | |
400 | ||
401 | /* Each buffer in the virtqueues is actually a chain of descriptors. This | |
402 | * function returns the next descriptor in the chain, | |
403 | * or -1U if we're at the end. */ | |
404 | static unsigned next_desc(struct vring_desc *desc) | |
405 | { | |
406 | unsigned int next; | |
407 | ||
408 | /* If this descriptor says it doesn't chain, we're done. */ | |
409 | if (!(desc->flags & VRING_DESC_F_NEXT)) | |
410 | return -1U; | |
411 | ||
412 | /* Check they're not leading us off end of descriptors. */ | |
413 | next = desc->next; | |
414 | /* Make sure compiler knows to grab that: we don't want it changing! */ | |
415 | /* We will use the result as an index in an array, so most | |
416 | * architectures only need a compiler barrier here. */ | |
417 | read_barrier_depends(); | |
418 | ||
419 | return next; | |
420 | } | |
421 | ||
422 | /* This looks in the virtqueue and for the first available buffer, and converts | |
423 | * it to an iovec for convenient access. Since descriptors consist of some | |
424 | * number of output then some number of input descriptors, it's actually two | |
425 | * iovecs, but we pack them into one and note how many of each there were. | |
426 | * | |
427 | * This function returns the descriptor number found, or vq->num (which is | |
428 | * never a valid descriptor number) if none was found. A negative code is | |
429 | * returned on error. */ | |
430 | int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |
431 | struct iovec iov[], unsigned int iov_size, | |
432 | unsigned int *out_num, unsigned int *in_num, | |
433 | struct vhost_log *log, unsigned int *log_num) | |
434 | { | |
435 | struct vring_desc desc; | |
436 | unsigned int i, head, found = 0; | |
437 | u16 last_avail_idx; | |
438 | int ret; | |
439 | ||
440 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
441 | last_avail_idx = vq->last_avail_idx; | |
442 | vq->avail_idx = ioread16(mic_addr_in_host(vq->log_addr, &vq->avail->idx)); | |
443 | ||
444 | if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { | |
445 | vq_err(vq, "Guest moved used index from %u to %u", | |
446 | last_avail_idx, vq->avail_idx); | |
447 | return -EFAULT; | |
448 | } | |
449 | ||
450 | /* If there's nothing new since last we looked, return invalid. */ | |
451 | if (vq->avail_idx == last_avail_idx) | |
452 | return vq->num; | |
453 | ||
454 | /* Only get avail ring entries after they have been exposed by guest. */ | |
455 | smp_rmb(); | |
456 | ||
457 | /* Grab the next descriptor number they're advertising, and increment | |
458 | * the index we've seen. */ | |
459 | head = ioread16(mic_addr_in_host(vq->log_addr, | |
460 | &vq->avail->ring[last_avail_idx % vq->num])); | |
461 | ||
462 | /* If their number is silly, that's an error. */ | |
463 | if (unlikely(head >= vq->num)) { | |
464 | vq_err(vq, "Guest says index %u > %u is available", | |
465 | head, vq->num); | |
466 | return -EINVAL; | |
467 | } | |
468 | ||
469 | /* When we start there are none of either input nor output. */ | |
470 | *out_num = *in_num = 0; | |
471 | if (unlikely(log)) | |
472 | *log_num = 0; | |
473 | ||
474 | i = head; | |
475 | do { | |
476 | unsigned iov_count = *in_num + *out_num; | |
477 | if (unlikely(i >= vq->num)) { | |
478 | vq_err(vq, "Desc index is %u > %u, head = %u", | |
479 | i, vq->num, head); | |
480 | return -EINVAL; | |
481 | } | |
482 | if (unlikely(++found > vq->num)) { | |
483 | vq_err(vq, "Loop detected: last one at %u " | |
484 | "vq size %u head %u\n", | |
485 | i, vq->num, head); | |
486 | return -EINVAL; | |
487 | } | |
488 | memcpy_fromio(&desc, mic_addr_in_host(vq->log_addr, vq->desc + i), sizeof(desc)); | |
489 | ||
490 | (iov + iov_count)->iov_base = (void *)desc.addr; | |
491 | (iov + iov_count)->iov_len = desc.len; | |
492 | ret = 1; | |
493 | if (desc.flags & VRING_DESC_F_WRITE) { | |
494 | /* If this is an input descriptor, | |
495 | * increment that count. */ | |
496 | *in_num += ret; | |
497 | if (unlikely(log)) { | |
498 | log[*log_num].addr = desc.addr; | |
499 | log[*log_num].len = desc.len; | |
500 | ++*log_num; | |
501 | } | |
502 | } else { | |
503 | /* If it's an output descriptor, they're all supposed | |
504 | * to come before any input descriptors. */ | |
505 | if (unlikely(*in_num)) { | |
506 | vq_err(vq, "Descriptor has out after in: " | |
507 | "idx %d\n", i); | |
508 | return -EINVAL; | |
509 | } | |
510 | *out_num += ret; | |
511 | } | |
512 | } while ((i = next_desc(&desc)) != -1); | |
513 | ||
514 | /* On success, increment avail index. */ | |
515 | vq->last_avail_idx++; | |
516 | ||
517 | /* Assume notifications from guest are disabled at this point, | |
518 | * if they aren't we would need to update avail_event index. */ | |
519 | BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); | |
520 | return head; | |
521 | } | |
522 | ||
523 | /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ | |
524 | void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) | |
525 | { | |
526 | vq->last_avail_idx -= n; | |
527 | } | |
528 | ||
529 | /* After we've used one of their buffers, we tell them about it. We'll then | |
530 | * want to notify the guest, using eventfd. */ | |
531 | int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) | |
532 | { | |
533 | struct vring_used_elem __user *used; | |
534 | ||
535 | /* The virtqueue contains a ring of used buffers. Get a pointer to the | |
536 | * next entry in that used ring. */ | |
537 | used = &vq->used->ring[vq->last_used_idx % vq->num]; | |
538 | iowrite16(head, mic_addr_in_host(vq->log_addr, &used->id)); | |
539 | iowrite16(len, mic_addr_in_host(vq->log_addr, &used->len)); | |
540 | /* Make sure buffer is written before we update index. */ | |
541 | smp_wmb(); | |
542 | ioread16(mic_addr_in_host(vq->log_addr, &used->id)); | |
543 | iowrite16(vq->last_used_idx + 1, mic_addr_in_host(vq->log_addr, &vq->used->idx)); | |
544 | ||
545 | vq->last_used_idx++; | |
546 | ||
547 | /* If the driver never bothers to signal in a very long while, | |
548 | * used index might wrap around. If that happens, invalidate | |
549 | * signalled_used index we stored. TODO: make sure driver | |
550 | * signals at least once in 2^16 and remove this. */ | |
551 | if (unlikely(vq->last_used_idx == vq->signalled_used)) | |
552 | vq->signalled_used_valid = false; | |
553 | return 0; | |
554 | } | |
555 | ||
556 | static int __vhost_add_used_n(struct vhost_virtqueue *vq, | |
557 | struct vring_used_elem *heads, | |
558 | unsigned count) | |
559 | { | |
560 | struct vring_used_elem __user *used; | |
561 | u16 old, new; | |
562 | int start; | |
563 | ||
564 | start = vq->last_used_idx % vq->num; | |
565 | used = vq->used->ring + start; | |
566 | memcpy_toio(mic_addr_in_host(vq->log_addr, used), heads, count * sizeof(*used)); | |
567 | old = vq->last_used_idx; | |
568 | new = (vq->last_used_idx += count); | |
569 | /* If the driver never bothers to signal in a very long while, | |
570 | * used index might wrap around. If that happens, invalidate | |
571 | * signalled_used index we stored. TODO: make sure driver | |
572 | * signals at least once in 2^16 and remove this. */ | |
573 | if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) | |
574 | vq->signalled_used_valid = false; | |
575 | return 0; | |
576 | } | |
577 | ||
578 | /* After we've used one of their buffers, we tell them about it. We'll then | |
579 | * want to notify the guest, using eventfd. */ | |
580 | int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | |
581 | unsigned count) | |
582 | { | |
583 | int start, n, r; | |
584 | ||
585 | start = vq->last_used_idx % vq->num; | |
586 | n = vq->num - start; | |
587 | if (n < count) { | |
588 | r = __vhost_add_used_n(vq, heads, n); | |
589 | if (r < 0) | |
590 | return r; | |
591 | heads += n; | |
592 | count -= n; | |
593 | } | |
594 | r = __vhost_add_used_n(vq, heads, count); | |
595 | ||
596 | /* Make sure buffer is written before we update index. */ | |
597 | smp_wmb(); | |
598 | iowrite16(vq->last_used_idx, mic_addr_in_host(vq->log_addr, &vq->used->idx)); | |
599 | return r; | |
600 | } | |
601 | ||
602 | static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |
603 | { | |
604 | __u16 old, new; | |
605 | bool v; | |
606 | /* Flush out used index updates. This is paired | |
607 | * with the barrier that the Guest executes when enabling | |
608 | * interrupts. */ | |
609 | smp_mb(); | |
610 | ||
611 | if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && | |
612 | unlikely(vq->avail_idx == vq->last_avail_idx)) | |
613 | return true; | |
614 | ||
615 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | |
616 | __u16 flags; | |
617 | flags = ioread16(mic_addr_in_host(vq->log_addr, &vq->avail->flags)); | |
618 | return !(flags & VRING_AVAIL_F_NO_INTERRUPT); | |
619 | } | |
620 | old = vq->signalled_used; | |
621 | v = vq->signalled_used_valid; | |
622 | new = vq->signalled_used = vq->last_used_idx; | |
623 | vq->signalled_used_valid = true; | |
624 | ||
625 | if (unlikely(!v)) | |
626 | return true; | |
627 | ||
628 | return false; | |
629 | } | |
630 | ||
631 | /* This actually signals the guest, using eventfd. */ | |
632 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |
633 | { | |
634 | /* Signal the Guest tell them we used something up. */ | |
635 | if (vq->log_base && vhost_notify(dev, vq)) | |
636 | mic_send_virtio_intr((struct _mic_ctx_t *)vq->log_base); | |
637 | } | |
638 | ||
639 | /* And here's the combo meal deal. Supersize me! */ | |
640 | void vhost_add_used_and_signal(struct vhost_dev *dev, | |
641 | struct vhost_virtqueue *vq, | |
642 | unsigned int head, int len) | |
643 | { | |
644 | vhost_add_used(vq, head, len); | |
645 | vhost_signal(dev, vq); | |
646 | } | |
647 | ||
648 | #if 0 | |
649 | /* multi-buffer version of vhost_add_used_and_signal */ | |
650 | void vhost_add_used_and_signal_n(struct vhost_dev *dev, | |
651 | struct vhost_virtqueue *vq, | |
652 | struct vring_used_elem *heads, unsigned count) | |
653 | { | |
654 | vhost_add_used_n(vq, heads, count); | |
655 | vhost_signal(dev, vq); | |
656 | } | |
657 | #endif | |
658 | ||
659 | /* OK, now we need to know about added descriptors. */ | |
660 | bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |
661 | { | |
662 | u16 avail_idx; | |
663 | int r; | |
664 | if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) | |
665 | return false; | |
666 | vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; | |
667 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | |
668 | r = vhost_update_used_flags(vq); | |
669 | if (r) { | |
670 | vq_err(vq, "Failed to enable notification at %p: %d\n", | |
671 | &vq->used->flags, r); | |
672 | return false; | |
673 | } | |
674 | } | |
675 | /* They could have slipped one in as we were doing that: make | |
676 | * sure it's written, then check again. */ | |
677 | smp_mb(); | |
678 | avail_idx = ioread16(mic_addr_in_host(vq->log_addr, &vq->avail->idx)); | |
679 | ||
680 | return avail_idx != vq->avail_idx; | |
681 | } | |
682 | ||
683 | /* We don't need to be notified again. */ | |
684 | void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |
685 | { | |
686 | int r; | |
687 | if (vq->used_flags & VRING_USED_F_NO_NOTIFY) | |
688 | return; | |
689 | vq->used_flags |= VRING_USED_F_NO_NOTIFY; | |
690 | if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | |
691 | r = vhost_update_used_flags(vq); | |
692 | if (r) | |
693 | vq_err(vq, "Failed to enable notification at %p: %d\n", | |
694 | &vq->used->flags, r); | |
695 | } | |
696 | } | |
697 | #endif |