OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [drivers/] [kvm/] [kvm_main.c] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * Kernel-based Virtual Machine driver for Linux
3
 *
4
 * This module enables machines with Intel VT-x extensions to run virtual
5
 * machines without emulation or binary translation.
6
 *
7
 * Copyright (C) 2006 Qumranet, Inc.
8
 *
9
 * Authors:
10
 *   Avi Kivity   <avi@qumranet.com>
11
 *   Yaniv Kamay  <yaniv@qumranet.com>
12
 *
13
 * This work is licensed under the terms of the GNU GPL, version 2.  See
14
 * the COPYING file in the top-level directory.
15
 *
16
 */
17
 
18
#include "kvm.h"
19
#include "x86_emulate.h"
20
#include "segment_descriptor.h"
21
#include "irq.h"
22
 
23
#include <linux/kvm.h>
24
#include <linux/module.h>
25
#include <linux/errno.h>
26
#include <linux/percpu.h>
27
#include <linux/gfp.h>
28
#include <linux/mm.h>
29
#include <linux/miscdevice.h>
30
#include <linux/vmalloc.h>
31
#include <linux/reboot.h>
32
#include <linux/debugfs.h>
33
#include <linux/highmem.h>
34
#include <linux/file.h>
35
#include <linux/sysdev.h>
36
#include <linux/cpu.h>
37
#include <linux/sched.h>
38
#include <linux/cpumask.h>
39
#include <linux/smp.h>
40
#include <linux/anon_inodes.h>
41
#include <linux/profile.h>
42
 
43
#include <asm/processor.h>
44
#include <asm/msr.h>
45
#include <asm/io.h>
46
#include <asm/uaccess.h>
47
#include <asm/desc.h>
48
 
49
MODULE_AUTHOR("Qumranet");
50
MODULE_LICENSE("GPL");
51
 
52
static DEFINE_SPINLOCK(kvm_lock);
53
static LIST_HEAD(vm_list);
54
 
55
static cpumask_t cpus_hardware_enabled;
56
 
57
struct kvm_x86_ops *kvm_x86_ops;
58
struct kmem_cache *kvm_vcpu_cache;
59
EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
60
 
61
static __read_mostly struct preempt_ops kvm_preempt_ops;
62
 
63
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
64
 
65
static struct kvm_stats_debugfs_item {
66
        const char *name;
67
        int offset;
68
        struct dentry *dentry;
69
} debugfs_entries[] = {
70
        { "pf_fixed", STAT_OFFSET(pf_fixed) },
71
        { "pf_guest", STAT_OFFSET(pf_guest) },
72
        { "tlb_flush", STAT_OFFSET(tlb_flush) },
73
        { "invlpg", STAT_OFFSET(invlpg) },
74
        { "exits", STAT_OFFSET(exits) },
75
        { "io_exits", STAT_OFFSET(io_exits) },
76
        { "mmio_exits", STAT_OFFSET(mmio_exits) },
77
        { "signal_exits", STAT_OFFSET(signal_exits) },
78
        { "irq_window", STAT_OFFSET(irq_window_exits) },
79
        { "halt_exits", STAT_OFFSET(halt_exits) },
80
        { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
81
        { "request_irq", STAT_OFFSET(request_irq_exits) },
82
        { "irq_exits", STAT_OFFSET(irq_exits) },
83
        { "light_exits", STAT_OFFSET(light_exits) },
84
        { "efer_reload", STAT_OFFSET(efer_reload) },
85
        { NULL }
86
};
87
 
88
static struct dentry *debugfs_dir;
89
 
90
#define MAX_IO_MSRS 256
91
 
92
#define CR0_RESERVED_BITS                                               \
93
        (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
94
                          | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
95
                          | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
96
#define CR4_RESERVED_BITS                                               \
97
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
98
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
99
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
100
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
101
 
102
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
103
#define EFER_RESERVED_BITS 0xfffffffffffff2fe
104
 
105
#ifdef CONFIG_X86_64
106
// LDT or TSS descriptor in the GDT. 16 bytes.
107
struct segment_descriptor_64 {
108
        struct segment_descriptor s;
109
        u32 base_higher;
110
        u32 pad_zero;
111
};
112
 
113
#endif
114
 
115
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
116
                           unsigned long arg);
117
 
118
unsigned long segment_base(u16 selector)
119
{
120
        struct descriptor_table gdt;
121
        struct segment_descriptor *d;
122
        unsigned long table_base;
123
        typedef unsigned long ul;
124
        unsigned long v;
125
 
126
        if (selector == 0)
127
                return 0;
128
 
129
        asm ("sgdt %0" : "=m"(gdt));
130
        table_base = gdt.base;
131
 
132
        if (selector & 4) {           /* from ldt */
133
                u16 ldt_selector;
134
 
135
                asm ("sldt %0" : "=g"(ldt_selector));
136
                table_base = segment_base(ldt_selector);
137
        }
138
        d = (struct segment_descriptor *)(table_base + (selector & ~7));
139
        v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24);
140
#ifdef CONFIG_X86_64
141
        if (d->system == 0
142
            && (d->type == 2 || d->type == 9 || d->type == 11))
143
                v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32;
144
#endif
145
        return v;
146
}
147
EXPORT_SYMBOL_GPL(segment_base);
148
 
149
static inline int valid_vcpu(int n)
150
{
151
        return likely(n >= 0 && n < KVM_MAX_VCPUS);
152
}
153
 
154
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
155
{
156
        if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
157
                return;
158
 
159
        vcpu->guest_fpu_loaded = 1;
160
        fx_save(&vcpu->host_fx_image);
161
        fx_restore(&vcpu->guest_fx_image);
162
}
163
EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
164
 
165
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
166
{
167
        if (!vcpu->guest_fpu_loaded)
168
                return;
169
 
170
        vcpu->guest_fpu_loaded = 0;
171
        fx_save(&vcpu->guest_fx_image);
172
        fx_restore(&vcpu->host_fx_image);
173
}
174
EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
175
 
176
/*
177
 * Switches to specified vcpu, until a matching vcpu_put()
178
 */
179
static void vcpu_load(struct kvm_vcpu *vcpu)
180
{
181
        int cpu;
182
 
183
        mutex_lock(&vcpu->mutex);
184
        cpu = get_cpu();
185
        preempt_notifier_register(&vcpu->preempt_notifier);
186
        kvm_x86_ops->vcpu_load(vcpu, cpu);
187
        put_cpu();
188
}
189
 
190
static void vcpu_put(struct kvm_vcpu *vcpu)
191
{
192
        preempt_disable();
193
        kvm_x86_ops->vcpu_put(vcpu);
194
        preempt_notifier_unregister(&vcpu->preempt_notifier);
195
        preempt_enable();
196
        mutex_unlock(&vcpu->mutex);
197
}
198
 
199
static void ack_flush(void *_completed)
200
{
201
}
202
 
203
void kvm_flush_remote_tlbs(struct kvm *kvm)
204
{
205
        int i, cpu;
206
        cpumask_t cpus;
207
        struct kvm_vcpu *vcpu;
208
 
209
        cpus_clear(cpus);
210
        for (i = 0; i < KVM_MAX_VCPUS; ++i) {
211
                vcpu = kvm->vcpus[i];
212
                if (!vcpu)
213
                        continue;
214
                if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
215
                        continue;
216
                cpu = vcpu->cpu;
217
                if (cpu != -1 && cpu != raw_smp_processor_id())
218
                        cpu_set(cpu, cpus);
219
        }
220
        smp_call_function_mask(cpus, ack_flush, NULL, 1);
221
}
222
 
223
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
224
{
225
        struct page *page;
226
        int r;
227
 
228
        mutex_init(&vcpu->mutex);
229
        vcpu->cpu = -1;
230
        vcpu->mmu.root_hpa = INVALID_PAGE;
231
        vcpu->kvm = kvm;
232
        vcpu->vcpu_id = id;
233
        if (!irqchip_in_kernel(kvm) || id == 0)
234
                vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
235
        else
236
                vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
237
        init_waitqueue_head(&vcpu->wq);
238
 
239
        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
240
        if (!page) {
241
                r = -ENOMEM;
242
                goto fail;
243
        }
244
        vcpu->run = page_address(page);
245
 
246
        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
247
        if (!page) {
248
                r = -ENOMEM;
249
                goto fail_free_run;
250
        }
251
        vcpu->pio_data = page_address(page);
252
 
253
        r = kvm_mmu_create(vcpu);
254
        if (r < 0)
255
                goto fail_free_pio_data;
256
 
257
        return 0;
258
 
259
fail_free_pio_data:
260
        free_page((unsigned long)vcpu->pio_data);
261
fail_free_run:
262
        free_page((unsigned long)vcpu->run);
263
fail:
264
        return -ENOMEM;
265
}
266
EXPORT_SYMBOL_GPL(kvm_vcpu_init);
267
 
268
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
269
{
270
        kvm_mmu_destroy(vcpu);
271
        if (vcpu->apic)
272
                hrtimer_cancel(&vcpu->apic->timer.dev);
273
        kvm_free_apic(vcpu->apic);
274
        free_page((unsigned long)vcpu->pio_data);
275
        free_page((unsigned long)vcpu->run);
276
}
277
EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
278
 
279
static struct kvm *kvm_create_vm(void)
280
{
281
        struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
282
 
283
        if (!kvm)
284
                return ERR_PTR(-ENOMEM);
285
 
286
        kvm_io_bus_init(&kvm->pio_bus);
287
        mutex_init(&kvm->lock);
288
        INIT_LIST_HEAD(&kvm->active_mmu_pages);
289
        kvm_io_bus_init(&kvm->mmio_bus);
290
        spin_lock(&kvm_lock);
291
        list_add(&kvm->vm_list, &vm_list);
292
        spin_unlock(&kvm_lock);
293
        return kvm;
294
}
295
 
296
/*
297
 * Free any memory in @free but not in @dont.
298
 */
299
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
300
                                  struct kvm_memory_slot *dont)
301
{
302
        int i;
303
 
304
        if (!dont || free->phys_mem != dont->phys_mem)
305
                if (free->phys_mem) {
306
                        for (i = 0; i < free->npages; ++i)
307
                                if (free->phys_mem[i])
308
                                        __free_page(free->phys_mem[i]);
309
                        vfree(free->phys_mem);
310
                }
311
 
312
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
313
                vfree(free->dirty_bitmap);
314
 
315
        free->phys_mem = NULL;
316
        free->npages = 0;
317
        free->dirty_bitmap = NULL;
318
}
319
 
320
static void kvm_free_physmem(struct kvm *kvm)
321
{
322
        int i;
323
 
324
        for (i = 0; i < kvm->nmemslots; ++i)
325
                kvm_free_physmem_slot(&kvm->memslots[i], NULL);
326
}
327
 
328
static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
329
{
330
        int i;
331
 
332
        for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
333
                if (vcpu->pio.guest_pages[i]) {
334
                        __free_page(vcpu->pio.guest_pages[i]);
335
                        vcpu->pio.guest_pages[i] = NULL;
336
                }
337
}
338
 
339
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
340
{
341
        vcpu_load(vcpu);
342
        kvm_mmu_unload(vcpu);
343
        vcpu_put(vcpu);
344
}
345
 
346
static void kvm_free_vcpus(struct kvm *kvm)
347
{
348
        unsigned int i;
349
 
350
        /*
351
         * Unpin any mmu pages first.
352
         */
353
        for (i = 0; i < KVM_MAX_VCPUS; ++i)
354
                if (kvm->vcpus[i])
355
                        kvm_unload_vcpu_mmu(kvm->vcpus[i]);
356
        for (i = 0; i < KVM_MAX_VCPUS; ++i) {
357
                if (kvm->vcpus[i]) {
358
                        kvm_x86_ops->vcpu_free(kvm->vcpus[i]);
359
                        kvm->vcpus[i] = NULL;
360
                }
361
        }
362
 
363
}
364
 
365
static void kvm_destroy_vm(struct kvm *kvm)
366
{
367
        spin_lock(&kvm_lock);
368
        list_del(&kvm->vm_list);
369
        spin_unlock(&kvm_lock);
370
        kvm_io_bus_destroy(&kvm->pio_bus);
371
        kvm_io_bus_destroy(&kvm->mmio_bus);
372
        kfree(kvm->vpic);
373
        kfree(kvm->vioapic);
374
        kvm_free_vcpus(kvm);
375
        kvm_free_physmem(kvm);
376
        kfree(kvm);
377
}
378
 
379
static int kvm_vm_release(struct inode *inode, struct file *filp)
380
{
381
        struct kvm *kvm = filp->private_data;
382
 
383
        kvm_destroy_vm(kvm);
384
        return 0;
385
}
386
 
387
static void inject_gp(struct kvm_vcpu *vcpu)
388
{
389
        kvm_x86_ops->inject_gp(vcpu, 0);
390
}
391
 
392
/*
393
 * Load the pae pdptrs.  Return true is they are all valid.
394
 */
395
static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
396
{
397
        gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
398
        unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
399
        int i;
400
        u64 *pdpt;
401
        int ret;
402
        struct page *page;
403
        u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
404
 
405
        mutex_lock(&vcpu->kvm->lock);
406
        page = gfn_to_page(vcpu->kvm, pdpt_gfn);
407
        if (!page) {
408
                ret = 0;
409
                goto out;
410
        }
411
 
412
        pdpt = kmap_atomic(page, KM_USER0);
413
        memcpy(pdpte, pdpt+offset, sizeof(pdpte));
414
        kunmap_atomic(pdpt, KM_USER0);
415
 
416
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
417
                if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
418
                        ret = 0;
419
                        goto out;
420
                }
421
        }
422
        ret = 1;
423
 
424
        memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
425
out:
426
        mutex_unlock(&vcpu->kvm->lock);
427
 
428
        return ret;
429
}
430
 
431
void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
432
{
433
        if (cr0 & CR0_RESERVED_BITS) {
434
                printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
435
                       cr0, vcpu->cr0);
436
                inject_gp(vcpu);
437
                return;
438
        }
439
 
440
        if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
441
                printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
442
                inject_gp(vcpu);
443
                return;
444
        }
445
 
446
        if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
447
                printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
448
                       "and a clear PE flag\n");
449
                inject_gp(vcpu);
450
                return;
451
        }
452
 
453
        if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
454
#ifdef CONFIG_X86_64
455
                if ((vcpu->shadow_efer & EFER_LME)) {
456
                        int cs_db, cs_l;
457
 
458
                        if (!is_pae(vcpu)) {
459
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
460
                                       "in long mode while PAE is disabled\n");
461
                                inject_gp(vcpu);
462
                                return;
463
                        }
464
                        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
465
                        if (cs_l) {
466
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
467
                                       "in long mode while CS.L == 1\n");
468
                                inject_gp(vcpu);
469
                                return;
470
 
471
                        }
472
                } else
473
#endif
474
                if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
475
                        printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
476
                               "reserved bits\n");
477
                        inject_gp(vcpu);
478
                        return;
479
                }
480
 
481
        }
482
 
483
        kvm_x86_ops->set_cr0(vcpu, cr0);
484
        vcpu->cr0 = cr0;
485
 
486
        mutex_lock(&vcpu->kvm->lock);
487
        kvm_mmu_reset_context(vcpu);
488
        mutex_unlock(&vcpu->kvm->lock);
489
        return;
490
}
491
EXPORT_SYMBOL_GPL(set_cr0);
492
 
493
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
494
{
495
        set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
496
}
497
EXPORT_SYMBOL_GPL(lmsw);
498
 
499
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
500
{
501
        if (cr4 & CR4_RESERVED_BITS) {
502
                printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
503
                inject_gp(vcpu);
504
                return;
505
        }
506
 
507
        if (is_long_mode(vcpu)) {
508
                if (!(cr4 & X86_CR4_PAE)) {
509
                        printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
510
                               "in long mode\n");
511
                        inject_gp(vcpu);
512
                        return;
513
                }
514
        } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
515
                   && !load_pdptrs(vcpu, vcpu->cr3)) {
516
                printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
517
                inject_gp(vcpu);
518
                return;
519
        }
520
 
521
        if (cr4 & X86_CR4_VMXE) {
522
                printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
523
                inject_gp(vcpu);
524
                return;
525
        }
526
        kvm_x86_ops->set_cr4(vcpu, cr4);
527
        vcpu->cr4 = cr4;
528
        mutex_lock(&vcpu->kvm->lock);
529
        kvm_mmu_reset_context(vcpu);
530
        mutex_unlock(&vcpu->kvm->lock);
531
}
532
EXPORT_SYMBOL_GPL(set_cr4);
533
 
534
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
535
{
536
        if (is_long_mode(vcpu)) {
537
                if (cr3 & CR3_L_MODE_RESERVED_BITS) {
538
                        printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
539
                        inject_gp(vcpu);
540
                        return;
541
                }
542
        } else {
543
                if (is_pae(vcpu)) {
544
                        if (cr3 & CR3_PAE_RESERVED_BITS) {
545
                                printk(KERN_DEBUG
546
                                       "set_cr3: #GP, reserved bits\n");
547
                                inject_gp(vcpu);
548
                                return;
549
                        }
550
                        if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
551
                                printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
552
                                       "reserved bits\n");
553
                                inject_gp(vcpu);
554
                                return;
555
                        }
556
                } else {
557
                        if (cr3 & CR3_NONPAE_RESERVED_BITS) {
558
                                printk(KERN_DEBUG
559
                                       "set_cr3: #GP, reserved bits\n");
560
                                inject_gp(vcpu);
561
                                return;
562
                        }
563
                }
564
        }
565
 
566
        mutex_lock(&vcpu->kvm->lock);
567
        /*
568
         * Does the new cr3 value map to physical memory? (Note, we
569
         * catch an invalid cr3 even in real-mode, because it would
570
         * cause trouble later on when we turn on paging anyway.)
571
         *
572
         * A real CPU would silently accept an invalid cr3 and would
573
         * attempt to use it - with largely undefined (and often hard
574
         * to debug) behavior on the guest side.
575
         */
576
        if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
577
                inject_gp(vcpu);
578
        else {
579
                vcpu->cr3 = cr3;
580
                vcpu->mmu.new_cr3(vcpu);
581
        }
582
        mutex_unlock(&vcpu->kvm->lock);
583
}
584
EXPORT_SYMBOL_GPL(set_cr3);
585
 
586
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
587
{
588
        if (cr8 & CR8_RESERVED_BITS) {
589
                printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
590
                inject_gp(vcpu);
591
                return;
592
        }
593
        if (irqchip_in_kernel(vcpu->kvm))
594
                kvm_lapic_set_tpr(vcpu, cr8);
595
        else
596
                vcpu->cr8 = cr8;
597
}
598
EXPORT_SYMBOL_GPL(set_cr8);
599
 
600
unsigned long get_cr8(struct kvm_vcpu *vcpu)
601
{
602
        if (irqchip_in_kernel(vcpu->kvm))
603
                return kvm_lapic_get_cr8(vcpu);
604
        else
605
                return vcpu->cr8;
606
}
607
EXPORT_SYMBOL_GPL(get_cr8);
608
 
609
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
610
{
611
        if (irqchip_in_kernel(vcpu->kvm))
612
                return vcpu->apic_base;
613
        else
614
                return vcpu->apic_base;
615
}
616
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
617
 
618
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
619
{
620
        /* TODO: reserve bits check */
621
        if (irqchip_in_kernel(vcpu->kvm))
622
                kvm_lapic_set_base(vcpu, data);
623
        else
624
                vcpu->apic_base = data;
625
}
626
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
627
 
628
void fx_init(struct kvm_vcpu *vcpu)
629
{
630
        unsigned after_mxcsr_mask;
631
 
632
        /* Initialize guest FPU by resetting ours and saving into guest's */
633
        preempt_disable();
634
        fx_save(&vcpu->host_fx_image);
635
        fpu_init();
636
        fx_save(&vcpu->guest_fx_image);
637
        fx_restore(&vcpu->host_fx_image);
638
        preempt_enable();
639
 
640
        vcpu->cr0 |= X86_CR0_ET;
641
        after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
642
        vcpu->guest_fx_image.mxcsr = 0x1f80;
643
        memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
644
               0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
645
}
646
EXPORT_SYMBOL_GPL(fx_init);
647
 
648
/*
649
 * Allocate some memory and give it an address in the guest physical address
650
 * space.
651
 *
652
 * Discontiguous memory is allowed, mostly for framebuffers.
653
 */
654
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
655
                                          struct kvm_memory_region *mem)
656
{
657
        int r;
658
        gfn_t base_gfn;
659
        unsigned long npages;
660
        unsigned long i;
661
        struct kvm_memory_slot *memslot;
662
        struct kvm_memory_slot old, new;
663
 
664
        r = -EINVAL;
665
        /* General sanity checks */
666
        if (mem->memory_size & (PAGE_SIZE - 1))
667
                goto out;
668
        if (mem->guest_phys_addr & (PAGE_SIZE - 1))
669
                goto out;
670
        if (mem->slot >= KVM_MEMORY_SLOTS)
671
                goto out;
672
        if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
673
                goto out;
674
 
675
        memslot = &kvm->memslots[mem->slot];
676
        base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
677
        npages = mem->memory_size >> PAGE_SHIFT;
678
 
679
        if (!npages)
680
                mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
681
 
682
        mutex_lock(&kvm->lock);
683
 
684
        new = old = *memslot;
685
 
686
        new.base_gfn = base_gfn;
687
        new.npages = npages;
688
        new.flags = mem->flags;
689
 
690
        /* Disallow changing a memory slot's size. */
691
        r = -EINVAL;
692
        if (npages && old.npages && npages != old.npages)
693
                goto out_unlock;
694
 
695
        /* Check for overlaps */
696
        r = -EEXIST;
697
        for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
698
                struct kvm_memory_slot *s = &kvm->memslots[i];
699
 
700
                if (s == memslot)
701
                        continue;
702
                if (!((base_gfn + npages <= s->base_gfn) ||
703
                      (base_gfn >= s->base_gfn + s->npages)))
704
                        goto out_unlock;
705
        }
706
 
707
        /* Deallocate if slot is being removed */
708
        if (!npages)
709
                new.phys_mem = NULL;
710
 
711
        /* Free page dirty bitmap if unneeded */
712
        if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
713
                new.dirty_bitmap = NULL;
714
 
715
        r = -ENOMEM;
716
 
717
        /* Allocate if a slot is being created */
718
        if (npages && !new.phys_mem) {
719
                new.phys_mem = vmalloc(npages * sizeof(struct page *));
720
 
721
                if (!new.phys_mem)
722
                        goto out_unlock;
723
 
724
                memset(new.phys_mem, 0, npages * sizeof(struct page *));
725
                for (i = 0; i < npages; ++i) {
726
                        new.phys_mem[i] = alloc_page(GFP_HIGHUSER
727
                                                     | __GFP_ZERO);
728
                        if (!new.phys_mem[i])
729
                                goto out_unlock;
730
                        set_page_private(new.phys_mem[i],0);
731
                }
732
        }
733
 
734
        /* Allocate page dirty bitmap if needed */
735
        if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
736
                unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
737
 
738
                new.dirty_bitmap = vmalloc(dirty_bytes);
739
                if (!new.dirty_bitmap)
740
                        goto out_unlock;
741
                memset(new.dirty_bitmap, 0, dirty_bytes);
742
        }
743
 
744
        if (mem->slot >= kvm->nmemslots)
745
                kvm->nmemslots = mem->slot + 1;
746
 
747
        *memslot = new;
748
 
749
        kvm_mmu_slot_remove_write_access(kvm, mem->slot);
750
        kvm_flush_remote_tlbs(kvm);
751
 
752
        mutex_unlock(&kvm->lock);
753
 
754
        kvm_free_physmem_slot(&old, &new);
755
        return 0;
756
 
757
out_unlock:
758
        mutex_unlock(&kvm->lock);
759
        kvm_free_physmem_slot(&new, &old);
760
out:
761
        return r;
762
}
763
 
764
/*
765
 * Get (and clear) the dirty memory log for a memory slot.
766
 */
767
static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
768
                                      struct kvm_dirty_log *log)
769
{
770
        struct kvm_memory_slot *memslot;
771
        int r, i;
772
        int n;
773
        unsigned long any = 0;
774
 
775
        mutex_lock(&kvm->lock);
776
 
777
        r = -EINVAL;
778
        if (log->slot >= KVM_MEMORY_SLOTS)
779
                goto out;
780
 
781
        memslot = &kvm->memslots[log->slot];
782
        r = -ENOENT;
783
        if (!memslot->dirty_bitmap)
784
                goto out;
785
 
786
        n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
787
 
788
        for (i = 0; !any && i < n/sizeof(long); ++i)
789
                any = memslot->dirty_bitmap[i];
790
 
791
        r = -EFAULT;
792
        if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
793
                goto out;
794
 
795
        /* If nothing is dirty, don't bother messing with page tables. */
796
        if (any) {
797
                kvm_mmu_slot_remove_write_access(kvm, log->slot);
798
                kvm_flush_remote_tlbs(kvm);
799
                memset(memslot->dirty_bitmap, 0, n);
800
        }
801
 
802
        r = 0;
803
 
804
out:
805
        mutex_unlock(&kvm->lock);
806
        return r;
807
}
808
 
809
/*
810
 * Set a new alias region.  Aliases map a portion of physical memory into
811
 * another portion.  This is useful for memory windows, for example the PC
812
 * VGA region.
813
 */
814
static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
815
                                         struct kvm_memory_alias *alias)
816
{
817
        int r, n;
818
        struct kvm_mem_alias *p;
819
 
820
        r = -EINVAL;
821
        /* General sanity checks */
822
        if (alias->memory_size & (PAGE_SIZE - 1))
823
                goto out;
824
        if (alias->guest_phys_addr & (PAGE_SIZE - 1))
825
                goto out;
826
        if (alias->slot >= KVM_ALIAS_SLOTS)
827
                goto out;
828
        if (alias->guest_phys_addr + alias->memory_size
829
            < alias->guest_phys_addr)
830
                goto out;
831
        if (alias->target_phys_addr + alias->memory_size
832
            < alias->target_phys_addr)
833
                goto out;
834
 
835
        mutex_lock(&kvm->lock);
836
 
837
        p = &kvm->aliases[alias->slot];
838
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
839
        p->npages = alias->memory_size >> PAGE_SHIFT;
840
        p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
841
 
842
        for (n = KVM_ALIAS_SLOTS; n > 0; --n)
843
                if (kvm->aliases[n - 1].npages)
844
                        break;
845
        kvm->naliases = n;
846
 
847
        kvm_mmu_zap_all(kvm);
848
 
849
        mutex_unlock(&kvm->lock);
850
 
851
        return 0;
852
 
853
out:
854
        return r;
855
}
856
 
857
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
858
{
859
        int r;
860
 
861
        r = 0;
862
        switch (chip->chip_id) {
863
        case KVM_IRQCHIP_PIC_MASTER:
864
                memcpy (&chip->chip.pic,
865
                        &pic_irqchip(kvm)->pics[0],
866
                        sizeof(struct kvm_pic_state));
867
                break;
868
        case KVM_IRQCHIP_PIC_SLAVE:
869
                memcpy (&chip->chip.pic,
870
                        &pic_irqchip(kvm)->pics[1],
871
                        sizeof(struct kvm_pic_state));
872
                break;
873
        case KVM_IRQCHIP_IOAPIC:
874
                memcpy (&chip->chip.ioapic,
875
                        ioapic_irqchip(kvm),
876
                        sizeof(struct kvm_ioapic_state));
877
                break;
878
        default:
879
                r = -EINVAL;
880
                break;
881
        }
882
        return r;
883
}
884
 
885
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
886
{
887
        int r;
888
 
889
        r = 0;
890
        switch (chip->chip_id) {
891
        case KVM_IRQCHIP_PIC_MASTER:
892
                memcpy (&pic_irqchip(kvm)->pics[0],
893
                        &chip->chip.pic,
894
                        sizeof(struct kvm_pic_state));
895
                break;
896
        case KVM_IRQCHIP_PIC_SLAVE:
897
                memcpy (&pic_irqchip(kvm)->pics[1],
898
                        &chip->chip.pic,
899
                        sizeof(struct kvm_pic_state));
900
                break;
901
        case KVM_IRQCHIP_IOAPIC:
902
                memcpy (ioapic_irqchip(kvm),
903
                        &chip->chip.ioapic,
904
                        sizeof(struct kvm_ioapic_state));
905
                break;
906
        default:
907
                r = -EINVAL;
908
                break;
909
        }
910
        kvm_pic_update_irq(pic_irqchip(kvm));
911
        return r;
912
}
913
 
914
static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
915
{
916
        int i;
917
        struct kvm_mem_alias *alias;
918
 
919
        for (i = 0; i < kvm->naliases; ++i) {
920
                alias = &kvm->aliases[i];
921
                if (gfn >= alias->base_gfn
922
                    && gfn < alias->base_gfn + alias->npages)
923
                        return alias->target_gfn + gfn - alias->base_gfn;
924
        }
925
        return gfn;
926
}
927
 
928
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
929
{
930
        int i;
931
 
932
        for (i = 0; i < kvm->nmemslots; ++i) {
933
                struct kvm_memory_slot *memslot = &kvm->memslots[i];
934
 
935
                if (gfn >= memslot->base_gfn
936
                    && gfn < memslot->base_gfn + memslot->npages)
937
                        return memslot;
938
        }
939
        return NULL;
940
}
941
 
942
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
943
{
944
        gfn = unalias_gfn(kvm, gfn);
945
        return __gfn_to_memslot(kvm, gfn);
946
}
947
 
948
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
949
{
950
        struct kvm_memory_slot *slot;
951
 
952
        gfn = unalias_gfn(kvm, gfn);
953
        slot = __gfn_to_memslot(kvm, gfn);
954
        if (!slot)
955
                return NULL;
956
        return slot->phys_mem[gfn - slot->base_gfn];
957
}
958
EXPORT_SYMBOL_GPL(gfn_to_page);
959
 
960
/* WARNING: Does not work on aliased pages. */
961
void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
962
{
963
        struct kvm_memory_slot *memslot;
964
 
965
        memslot = __gfn_to_memslot(kvm, gfn);
966
        if (memslot && memslot->dirty_bitmap) {
967
                unsigned long rel_gfn = gfn - memslot->base_gfn;
968
 
969
                /* avoid RMW */
970
                if (!test_bit(rel_gfn, memslot->dirty_bitmap))
971
                        set_bit(rel_gfn, memslot->dirty_bitmap);
972
        }
973
}
974
 
975
int emulator_read_std(unsigned long addr,
976
                             void *val,
977
                             unsigned int bytes,
978
                             struct kvm_vcpu *vcpu)
979
{
980
        void *data = val;
981
 
982
        while (bytes) {
983
                gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
984
                unsigned offset = addr & (PAGE_SIZE-1);
985
                unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
986
                unsigned long pfn;
987
                struct page *page;
988
                void *page_virt;
989
 
990
                if (gpa == UNMAPPED_GVA)
991
                        return X86EMUL_PROPAGATE_FAULT;
992
                pfn = gpa >> PAGE_SHIFT;
993
                page = gfn_to_page(vcpu->kvm, pfn);
994
                if (!page)
995
                        return X86EMUL_UNHANDLEABLE;
996
                page_virt = kmap_atomic(page, KM_USER0);
997
 
998
                memcpy(data, page_virt + offset, tocopy);
999
 
1000
                kunmap_atomic(page_virt, KM_USER0);
1001
 
1002
                bytes -= tocopy;
1003
                data += tocopy;
1004
                addr += tocopy;
1005
        }
1006
 
1007
        return X86EMUL_CONTINUE;
1008
}
1009
EXPORT_SYMBOL_GPL(emulator_read_std);
1010
 
1011
static int emulator_write_std(unsigned long addr,
1012
                              const void *val,
1013
                              unsigned int bytes,
1014
                              struct kvm_vcpu *vcpu)
1015
{
1016
        pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
1017
        return X86EMUL_UNHANDLEABLE;
1018
}
1019
 
1020
/*
1021
 * Only apic need an MMIO device hook, so shortcut now..
1022
 */
1023
static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1024
                                                gpa_t addr)
1025
{
1026
        struct kvm_io_device *dev;
1027
 
1028
        if (vcpu->apic) {
1029
                dev = &vcpu->apic->dev;
1030
                if (dev->in_range(dev, addr))
1031
                        return dev;
1032
        }
1033
        return NULL;
1034
}
1035
 
1036
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1037
                                                gpa_t addr)
1038
{
1039
        struct kvm_io_device *dev;
1040
 
1041
        dev = vcpu_find_pervcpu_dev(vcpu, addr);
1042
        if (dev == NULL)
1043
                dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1044
        return dev;
1045
}
1046
 
1047
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1048
                                               gpa_t addr)
1049
{
1050
        return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
1051
}
1052
 
1053
static int emulator_read_emulated(unsigned long addr,
1054
                                  void *val,
1055
                                  unsigned int bytes,
1056
                                  struct kvm_vcpu *vcpu)
1057
{
1058
        struct kvm_io_device *mmio_dev;
1059
        gpa_t                 gpa;
1060
 
1061
        if (vcpu->mmio_read_completed) {
1062
                memcpy(val, vcpu->mmio_data, bytes);
1063
                vcpu->mmio_read_completed = 0;
1064
                return X86EMUL_CONTINUE;
1065
        } else if (emulator_read_std(addr, val, bytes, vcpu)
1066
                   == X86EMUL_CONTINUE)
1067
                return X86EMUL_CONTINUE;
1068
 
1069
        gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1070
        if (gpa == UNMAPPED_GVA)
1071
                return X86EMUL_PROPAGATE_FAULT;
1072
 
1073
        /*
1074
         * Is this MMIO handled locally?
1075
         */
1076
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1077
        if (mmio_dev) {
1078
                kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1079
                return X86EMUL_CONTINUE;
1080
        }
1081
 
1082
        vcpu->mmio_needed = 1;
1083
        vcpu->mmio_phys_addr = gpa;
1084
        vcpu->mmio_size = bytes;
1085
        vcpu->mmio_is_write = 0;
1086
 
1087
        return X86EMUL_UNHANDLEABLE;
1088
}
1089
 
1090
static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1091
                               const void *val, int bytes)
1092
{
1093
        struct page *page;
1094
        void *virt;
1095
 
1096
        if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
1097
                return 0;
1098
        page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1099
        if (!page)
1100
                return 0;
1101
        mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
1102
        virt = kmap_atomic(page, KM_USER0);
1103
        kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1104
        memcpy(virt + offset_in_page(gpa), val, bytes);
1105
        kunmap_atomic(virt, KM_USER0);
1106
        return 1;
1107
}
1108
 
1109
static int emulator_write_emulated_onepage(unsigned long addr,
1110
                                           const void *val,
1111
                                           unsigned int bytes,
1112
                                           struct kvm_vcpu *vcpu)
1113
{
1114
        struct kvm_io_device *mmio_dev;
1115
        gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1116
 
1117
        if (gpa == UNMAPPED_GVA) {
1118
                kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
1119
                return X86EMUL_PROPAGATE_FAULT;
1120
        }
1121
 
1122
        if (emulator_write_phys(vcpu, gpa, val, bytes))
1123
                return X86EMUL_CONTINUE;
1124
 
1125
        /*
1126
         * Is this MMIO handled locally?
1127
         */
1128
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1129
        if (mmio_dev) {
1130
                kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1131
                return X86EMUL_CONTINUE;
1132
        }
1133
 
1134
        vcpu->mmio_needed = 1;
1135
        vcpu->mmio_phys_addr = gpa;
1136
        vcpu->mmio_size = bytes;
1137
        vcpu->mmio_is_write = 1;
1138
        memcpy(vcpu->mmio_data, val, bytes);
1139
 
1140
        return X86EMUL_CONTINUE;
1141
}
1142
 
1143
int emulator_write_emulated(unsigned long addr,
1144
                                   const void *val,
1145
                                   unsigned int bytes,
1146
                                   struct kvm_vcpu *vcpu)
1147
{
1148
        /* Crossing a page boundary? */
1149
        if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
1150
                int rc, now;
1151
 
1152
                now = -addr & ~PAGE_MASK;
1153
                rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
1154
                if (rc != X86EMUL_CONTINUE)
1155
                        return rc;
1156
                addr += now;
1157
                val += now;
1158
                bytes -= now;
1159
        }
1160
        return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
1161
}
1162
EXPORT_SYMBOL_GPL(emulator_write_emulated);
1163
 
1164
static int emulator_cmpxchg_emulated(unsigned long addr,
1165
                                     const void *old,
1166
                                     const void *new,
1167
                                     unsigned int bytes,
1168
                                     struct kvm_vcpu *vcpu)
1169
{
1170
        static int reported;
1171
 
1172
        if (!reported) {
1173
                reported = 1;
1174
                printk(KERN_WARNING "kvm: emulating exchange as write\n");
1175
        }
1176
        return emulator_write_emulated(addr, new, bytes, vcpu);
1177
}
1178
 
1179
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
1180
{
1181
        return kvm_x86_ops->get_segment_base(vcpu, seg);
1182
}
1183
 
1184
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
1185
{
1186
        return X86EMUL_CONTINUE;
1187
}
1188
 
1189
int emulate_clts(struct kvm_vcpu *vcpu)
1190
{
1191
        kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS);
1192
        return X86EMUL_CONTINUE;
1193
}
1194
 
1195
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
1196
{
1197
        struct kvm_vcpu *vcpu = ctxt->vcpu;
1198
 
1199
        switch (dr) {
1200
        case 0 ... 3:
1201
                *dest = kvm_x86_ops->get_dr(vcpu, dr);
1202
                return X86EMUL_CONTINUE;
1203
        default:
1204
                pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
1205
                return X86EMUL_UNHANDLEABLE;
1206
        }
1207
}
1208
 
1209
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1210
{
1211
        unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
1212
        int exception;
1213
 
1214
        kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
1215
        if (exception) {
1216
                /* FIXME: better handling */
1217
                return X86EMUL_UNHANDLEABLE;
1218
        }
1219
        return X86EMUL_CONTINUE;
1220
}
1221
 
1222
void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1223
{
1224
        static int reported;
1225
        u8 opcodes[4];
1226
        unsigned long rip = vcpu->rip;
1227
        unsigned long rip_linear;
1228
 
1229
        rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
1230
 
1231
        if (reported)
1232
                return;
1233
 
1234
        emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
1235
 
1236
        printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
1237
               context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1238
        reported = 1;
1239
}
1240
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1241
 
1242
struct x86_emulate_ops emulate_ops = {
1243
        .read_std            = emulator_read_std,
1244
        .write_std           = emulator_write_std,
1245
        .read_emulated       = emulator_read_emulated,
1246
        .write_emulated      = emulator_write_emulated,
1247
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
1248
};
1249
 
1250
int emulate_instruction(struct kvm_vcpu *vcpu,
1251
                        struct kvm_run *run,
1252
                        unsigned long cr2,
1253
                        u16 error_code)
1254
{
1255
        struct x86_emulate_ctxt emulate_ctxt;
1256
        int r;
1257
        int cs_db, cs_l;
1258
 
1259
        vcpu->mmio_fault_cr2 = cr2;
1260
        kvm_x86_ops->cache_regs(vcpu);
1261
 
1262
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
1263
 
1264
        emulate_ctxt.vcpu = vcpu;
1265
        emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
1266
        emulate_ctxt.cr2 = cr2;
1267
        emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
1268
                ? X86EMUL_MODE_REAL : cs_l
1269
                ? X86EMUL_MODE_PROT64 : cs_db
1270
                ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
1271
 
1272
        if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
1273
                emulate_ctxt.cs_base = 0;
1274
                emulate_ctxt.ds_base = 0;
1275
                emulate_ctxt.es_base = 0;
1276
                emulate_ctxt.ss_base = 0;
1277
        } else {
1278
                emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
1279
                emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);
1280
                emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);
1281
                emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
1282
        }
1283
 
1284
        emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);
1285
        emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
1286
 
1287
        vcpu->mmio_is_write = 0;
1288
        vcpu->pio.string = 0;
1289
        r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
1290
        if (vcpu->pio.string)
1291
                return EMULATE_DO_MMIO;
1292
 
1293
        if ((r || vcpu->mmio_is_write) && run) {
1294
                run->exit_reason = KVM_EXIT_MMIO;
1295
                run->mmio.phys_addr = vcpu->mmio_phys_addr;
1296
                memcpy(run->mmio.data, vcpu->mmio_data, 8);
1297
                run->mmio.len = vcpu->mmio_size;
1298
                run->mmio.is_write = vcpu->mmio_is_write;
1299
        }
1300
 
1301
        if (r) {
1302
                if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1303
                        return EMULATE_DONE;
1304
                if (!vcpu->mmio_needed) {
1305
                        kvm_report_emulation_failure(vcpu, "mmio");
1306
                        return EMULATE_FAIL;
1307
                }
1308
                return EMULATE_DO_MMIO;
1309
        }
1310
 
1311
        kvm_x86_ops->decache_regs(vcpu);
1312
        kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags);
1313
 
1314
        if (vcpu->mmio_is_write) {
1315
                vcpu->mmio_needed = 0;
1316
                return EMULATE_DO_MMIO;
1317
        }
1318
 
1319
        return EMULATE_DONE;
1320
}
1321
EXPORT_SYMBOL_GPL(emulate_instruction);
1322
 
1323
/*
1324
 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1325
 */
1326
static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1327
{
1328
        DECLARE_WAITQUEUE(wait, current);
1329
 
1330
        add_wait_queue(&vcpu->wq, &wait);
1331
 
1332
        /*
1333
         * We will block until either an interrupt or a signal wakes us up
1334
         */
1335
        while (!kvm_cpu_has_interrupt(vcpu)
1336
               && !signal_pending(current)
1337
               && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
1338
               && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
1339
                set_current_state(TASK_INTERRUPTIBLE);
1340
                vcpu_put(vcpu);
1341
                schedule();
1342
                vcpu_load(vcpu);
1343
        }
1344
 
1345
        __set_current_state(TASK_RUNNING);
1346
        remove_wait_queue(&vcpu->wq, &wait);
1347
}
1348
 
1349
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1350
{
1351
        ++vcpu->stat.halt_exits;
1352
        if (irqchip_in_kernel(vcpu->kvm)) {
1353
                vcpu->mp_state = VCPU_MP_STATE_HALTED;
1354
                kvm_vcpu_block(vcpu);
1355
                if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
1356
                        return -EINTR;
1357
                return 1;
1358
        } else {
1359
                vcpu->run->exit_reason = KVM_EXIT_HLT;
1360
                return 0;
1361
        }
1362
}
1363
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1364
 
1365
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1366
{
1367
        unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
1368
 
1369
        kvm_x86_ops->cache_regs(vcpu);
1370
        ret = -KVM_EINVAL;
1371
#ifdef CONFIG_X86_64
1372
        if (is_long_mode(vcpu)) {
1373
                nr = vcpu->regs[VCPU_REGS_RAX];
1374
                a0 = vcpu->regs[VCPU_REGS_RDI];
1375
                a1 = vcpu->regs[VCPU_REGS_RSI];
1376
                a2 = vcpu->regs[VCPU_REGS_RDX];
1377
                a3 = vcpu->regs[VCPU_REGS_RCX];
1378
                a4 = vcpu->regs[VCPU_REGS_R8];
1379
                a5 = vcpu->regs[VCPU_REGS_R9];
1380
        } else
1381
#endif
1382
        {
1383
                nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
1384
                a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
1385
                a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
1386
                a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
1387
                a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
1388
                a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
1389
                a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
1390
        }
1391
        switch (nr) {
1392
        default:
1393
                run->hypercall.nr = nr;
1394
                run->hypercall.args[0] = a0;
1395
                run->hypercall.args[1] = a1;
1396
                run->hypercall.args[2] = a2;
1397
                run->hypercall.args[3] = a3;
1398
                run->hypercall.args[4] = a4;
1399
                run->hypercall.args[5] = a5;
1400
                run->hypercall.ret = ret;
1401
                run->hypercall.longmode = is_long_mode(vcpu);
1402
                kvm_x86_ops->decache_regs(vcpu);
1403
                return 0;
1404
        }
1405
        vcpu->regs[VCPU_REGS_RAX] = ret;
1406
        kvm_x86_ops->decache_regs(vcpu);
1407
        return 1;
1408
}
1409
EXPORT_SYMBOL_GPL(kvm_hypercall);
1410
 
1411
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1412
{
1413
        return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
1414
}
1415
 
1416
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1417
{
1418
        struct descriptor_table dt = { limit, base };
1419
 
1420
        kvm_x86_ops->set_gdt(vcpu, &dt);
1421
}
1422
 
1423
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1424
{
1425
        struct descriptor_table dt = { limit, base };
1426
 
1427
        kvm_x86_ops->set_idt(vcpu, &dt);
1428
}
1429
 
1430
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1431
                   unsigned long *rflags)
1432
{
1433
        lmsw(vcpu, msw);
1434
        *rflags = kvm_x86_ops->get_rflags(vcpu);
1435
}
1436
 
1437
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1438
{
1439
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
1440
        switch (cr) {
1441
        case 0:
1442
                return vcpu->cr0;
1443
        case 2:
1444
                return vcpu->cr2;
1445
        case 3:
1446
                return vcpu->cr3;
1447
        case 4:
1448
                return vcpu->cr4;
1449
        default:
1450
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1451
                return 0;
1452
        }
1453
}
1454
 
1455
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1456
                     unsigned long *rflags)
1457
{
1458
        switch (cr) {
1459
        case 0:
1460
                set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
1461
                *rflags = kvm_x86_ops->get_rflags(vcpu);
1462
                break;
1463
        case 2:
1464
                vcpu->cr2 = val;
1465
                break;
1466
        case 3:
1467
                set_cr3(vcpu, val);
1468
                break;
1469
        case 4:
1470
                set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
1471
                break;
1472
        default:
1473
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1474
        }
1475
}
1476
 
1477
/*
1478
 * Register the para guest with the host:
1479
 */
1480
static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1481
{
1482
        struct kvm_vcpu_para_state *para_state;
1483
        hpa_t para_state_hpa, hypercall_hpa;
1484
        struct page *para_state_page;
1485
        unsigned char *hypercall;
1486
        gpa_t hypercall_gpa;
1487
 
1488
        printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
1489
        printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
1490
 
1491
        /*
1492
         * Needs to be page aligned:
1493
         */
1494
        if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
1495
                goto err_gp;
1496
 
1497
        para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
1498
        printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
1499
        if (is_error_hpa(para_state_hpa))
1500
                goto err_gp;
1501
 
1502
        mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1503
        para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1504
        para_state = kmap(para_state_page);
1505
 
1506
        printk(KERN_DEBUG "....  guest version: %d\n", para_state->guest_version);
1507
        printk(KERN_DEBUG "....           size: %d\n", para_state->size);
1508
 
1509
        para_state->host_version = KVM_PARA_API_VERSION;
1510
        /*
1511
         * We cannot support guests that try to register themselves
1512
         * with a newer API version than the host supports:
1513
         */
1514
        if (para_state->guest_version > KVM_PARA_API_VERSION) {
1515
                para_state->ret = -KVM_EINVAL;
1516
                goto err_kunmap_skip;
1517
        }
1518
 
1519
        hypercall_gpa = para_state->hypercall_gpa;
1520
        hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
1521
        printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
1522
        if (is_error_hpa(hypercall_hpa)) {
1523
                para_state->ret = -KVM_EINVAL;
1524
                goto err_kunmap_skip;
1525
        }
1526
 
1527
        printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
1528
        vcpu->para_state_page = para_state_page;
1529
        vcpu->para_state_gpa = para_state_gpa;
1530
        vcpu->hypercall_gpa = hypercall_gpa;
1531
 
1532
        mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1533
        hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1534
                                KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1535
        kvm_x86_ops->patch_hypercall(vcpu, hypercall);
1536
        kunmap_atomic(hypercall, KM_USER1);
1537
 
1538
        para_state->ret = 0;
1539
err_kunmap_skip:
1540
        kunmap(para_state_page);
1541
        return 0;
1542
err_gp:
1543
        return 1;
1544
}
1545
 
1546
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1547
{
1548
        u64 data;
1549
 
1550
        switch (msr) {
1551
        case 0xc0010010: /* SYSCFG */
1552
        case 0xc0010015: /* HWCR */
1553
        case MSR_IA32_PLATFORM_ID:
1554
        case MSR_IA32_P5_MC_ADDR:
1555
        case MSR_IA32_P5_MC_TYPE:
1556
        case MSR_IA32_MC0_CTL:
1557
        case MSR_IA32_MCG_STATUS:
1558
        case MSR_IA32_MCG_CAP:
1559
        case MSR_IA32_MC0_MISC:
1560
        case MSR_IA32_MC0_MISC+4:
1561
        case MSR_IA32_MC0_MISC+8:
1562
        case MSR_IA32_MC0_MISC+12:
1563
        case MSR_IA32_MC0_MISC+16:
1564
        case MSR_IA32_UCODE_REV:
1565
        case MSR_IA32_PERF_STATUS:
1566
        case MSR_IA32_EBL_CR_POWERON:
1567
                /* MTRR registers */
1568
        case 0xfe:
1569
        case 0x200 ... 0x2ff:
1570
                data = 0;
1571
                break;
1572
        case 0xcd: /* fsb frequency */
1573
                data = 3;
1574
                break;
1575
        case MSR_IA32_APICBASE:
1576
                data = kvm_get_apic_base(vcpu);
1577
                break;
1578
        case MSR_IA32_MISC_ENABLE:
1579
                data = vcpu->ia32_misc_enable_msr;
1580
                break;
1581
#ifdef CONFIG_X86_64
1582
        case MSR_EFER:
1583
                data = vcpu->shadow_efer;
1584
                break;
1585
#endif
1586
        default:
1587
                pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1588
                return 1;
1589
        }
1590
        *pdata = data;
1591
        return 0;
1592
}
1593
EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1594
 
1595
/*
1596
 * Reads an msr value (of 'msr_index') into 'pdata'.
1597
 * Returns 0 on success, non-0 otherwise.
1598
 * Assumes vcpu_load() was already called.
1599
 */
1600
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1601
{
1602
        return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1603
}
1604
 
1605
#ifdef CONFIG_X86_64
1606
 
1607
static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
1608
{
1609
        if (efer & EFER_RESERVED_BITS) {
1610
                printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
1611
                       efer);
1612
                inject_gp(vcpu);
1613
                return;
1614
        }
1615
 
1616
        if (is_paging(vcpu)
1617
            && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
1618
                printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
1619
                inject_gp(vcpu);
1620
                return;
1621
        }
1622
 
1623
        kvm_x86_ops->set_efer(vcpu, efer);
1624
 
1625
        efer &= ~EFER_LMA;
1626
        efer |= vcpu->shadow_efer & EFER_LMA;
1627
 
1628
        vcpu->shadow_efer = efer;
1629
}
1630
 
1631
#endif
1632
 
1633
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1634
{
1635
        switch (msr) {
1636
#ifdef CONFIG_X86_64
1637
        case MSR_EFER:
1638
                set_efer(vcpu, data);
1639
                break;
1640
#endif
1641
        case MSR_IA32_MC0_STATUS:
1642
                pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
1643
                       __FUNCTION__, data);
1644
                break;
1645
        case MSR_IA32_MCG_STATUS:
1646
                pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
1647
                        __FUNCTION__, data);
1648
                break;
1649
        case MSR_IA32_UCODE_REV:
1650
        case MSR_IA32_UCODE_WRITE:
1651
        case 0x200 ... 0x2ff: /* MTRRs */
1652
                break;
1653
        case MSR_IA32_APICBASE:
1654
                kvm_set_apic_base(vcpu, data);
1655
                break;
1656
        case MSR_IA32_MISC_ENABLE:
1657
                vcpu->ia32_misc_enable_msr = data;
1658
                break;
1659
        /*
1660
         * This is the 'probe whether the host is KVM' logic:
1661
         */
1662
        case MSR_KVM_API_MAGIC:
1663
                return vcpu_register_para(vcpu, data);
1664
 
1665
        default:
1666
                pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
1667
                return 1;
1668
        }
1669
        return 0;
1670
}
1671
EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1672
 
1673
/*
1674
 * Writes msr value into into the appropriate "register".
1675
 * Returns 0 on success, non-0 otherwise.
1676
 * Assumes vcpu_load() was already called.
1677
 */
1678
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1679
{
1680
        return kvm_x86_ops->set_msr(vcpu, msr_index, data);
1681
}
1682
 
1683
void kvm_resched(struct kvm_vcpu *vcpu)
1684
{
1685
        if (!need_resched())
1686
                return;
1687
        cond_resched();
1688
}
1689
EXPORT_SYMBOL_GPL(kvm_resched);
1690
 
1691
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1692
{
1693
        int i;
1694
        u32 function;
1695
        struct kvm_cpuid_entry *e, *best;
1696
 
1697
        kvm_x86_ops->cache_regs(vcpu);
1698
        function = vcpu->regs[VCPU_REGS_RAX];
1699
        vcpu->regs[VCPU_REGS_RAX] = 0;
1700
        vcpu->regs[VCPU_REGS_RBX] = 0;
1701
        vcpu->regs[VCPU_REGS_RCX] = 0;
1702
        vcpu->regs[VCPU_REGS_RDX] = 0;
1703
        best = NULL;
1704
        for (i = 0; i < vcpu->cpuid_nent; ++i) {
1705
                e = &vcpu->cpuid_entries[i];
1706
                if (e->function == function) {
1707
                        best = e;
1708
                        break;
1709
                }
1710
                /*
1711
                 * Both basic or both extended?
1712
                 */
1713
                if (((e->function ^ function) & 0x80000000) == 0)
1714
                        if (!best || e->function > best->function)
1715
                                best = e;
1716
        }
1717
        if (best) {
1718
                vcpu->regs[VCPU_REGS_RAX] = best->eax;
1719
                vcpu->regs[VCPU_REGS_RBX] = best->ebx;
1720
                vcpu->regs[VCPU_REGS_RCX] = best->ecx;
1721
                vcpu->regs[VCPU_REGS_RDX] = best->edx;
1722
        }
1723
        kvm_x86_ops->decache_regs(vcpu);
1724
        kvm_x86_ops->skip_emulated_instruction(vcpu);
1725
}
1726
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
1727
 
1728
static int pio_copy_data(struct kvm_vcpu *vcpu)
1729
{
1730
        void *p = vcpu->pio_data;
1731
        void *q;
1732
        unsigned bytes;
1733
        int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1734
 
1735
        q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1736
                 PAGE_KERNEL);
1737
        if (!q) {
1738
                free_pio_guest_pages(vcpu);
1739
                return -ENOMEM;
1740
        }
1741
        q += vcpu->pio.guest_page_offset;
1742
        bytes = vcpu->pio.size * vcpu->pio.cur_count;
1743
        if (vcpu->pio.in)
1744
                memcpy(q, p, bytes);
1745
        else
1746
                memcpy(p, q, bytes);
1747
        q -= vcpu->pio.guest_page_offset;
1748
        vunmap(q);
1749
        free_pio_guest_pages(vcpu);
1750
        return 0;
1751
}
1752
 
1753
static int complete_pio(struct kvm_vcpu *vcpu)
1754
{
1755
        struct kvm_pio_request *io = &vcpu->pio;
1756
        long delta;
1757
        int r;
1758
 
1759
        kvm_x86_ops->cache_regs(vcpu);
1760
 
1761
        if (!io->string) {
1762
                if (io->in)
1763
                        memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
1764
                               io->size);
1765
        } else {
1766
                if (io->in) {
1767
                        r = pio_copy_data(vcpu);
1768
                        if (r) {
1769
                                kvm_x86_ops->cache_regs(vcpu);
1770
                                return r;
1771
                        }
1772
                }
1773
 
1774
                delta = 1;
1775
                if (io->rep) {
1776
                        delta *= io->cur_count;
1777
                        /*
1778
                         * The size of the register should really depend on
1779
                         * current address size.
1780
                         */
1781
                        vcpu->regs[VCPU_REGS_RCX] -= delta;
1782
                }
1783
                if (io->down)
1784
                        delta = -delta;
1785
                delta *= io->size;
1786
                if (io->in)
1787
                        vcpu->regs[VCPU_REGS_RDI] += delta;
1788
                else
1789
                        vcpu->regs[VCPU_REGS_RSI] += delta;
1790
        }
1791
 
1792
        kvm_x86_ops->decache_regs(vcpu);
1793
 
1794
        io->count -= io->cur_count;
1795
        io->cur_count = 0;
1796
 
1797
        return 0;
1798
}
1799
 
1800
static void kernel_pio(struct kvm_io_device *pio_dev,
1801
                       struct kvm_vcpu *vcpu,
1802
                       void *pd)
1803
{
1804
        /* TODO: String I/O for in kernel device */
1805
 
1806
        mutex_lock(&vcpu->kvm->lock);
1807
        if (vcpu->pio.in)
1808
                kvm_iodevice_read(pio_dev, vcpu->pio.port,
1809
                                  vcpu->pio.size,
1810
                                  pd);
1811
        else
1812
                kvm_iodevice_write(pio_dev, vcpu->pio.port,
1813
                                   vcpu->pio.size,
1814
                                   pd);
1815
        mutex_unlock(&vcpu->kvm->lock);
1816
}
1817
 
1818
static void pio_string_write(struct kvm_io_device *pio_dev,
1819
                             struct kvm_vcpu *vcpu)
1820
{
1821
        struct kvm_pio_request *io = &vcpu->pio;
1822
        void *pd = vcpu->pio_data;
1823
        int i;
1824
 
1825
        mutex_lock(&vcpu->kvm->lock);
1826
        for (i = 0; i < io->cur_count; i++) {
1827
                kvm_iodevice_write(pio_dev, io->port,
1828
                                   io->size,
1829
                                   pd);
1830
                pd += io->size;
1831
        }
1832
        mutex_unlock(&vcpu->kvm->lock);
1833
}
1834
 
1835
int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1836
                  int size, unsigned port)
1837
{
1838
        struct kvm_io_device *pio_dev;
1839
 
1840
        vcpu->run->exit_reason = KVM_EXIT_IO;
1841
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1842
        vcpu->run->io.size = vcpu->pio.size = size;
1843
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1844
        vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
1845
        vcpu->run->io.port = vcpu->pio.port = port;
1846
        vcpu->pio.in = in;
1847
        vcpu->pio.string = 0;
1848
        vcpu->pio.down = 0;
1849
        vcpu->pio.guest_page_offset = 0;
1850
        vcpu->pio.rep = 0;
1851
 
1852
        kvm_x86_ops->cache_regs(vcpu);
1853
        memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1854
        kvm_x86_ops->decache_regs(vcpu);
1855
 
1856
        kvm_x86_ops->skip_emulated_instruction(vcpu);
1857
 
1858
        pio_dev = vcpu_find_pio_dev(vcpu, port);
1859
        if (pio_dev) {
1860
                kernel_pio(pio_dev, vcpu, vcpu->pio_data);
1861
                complete_pio(vcpu);
1862
                return 1;
1863
        }
1864
        return 0;
1865
}
1866
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
1867
 
1868
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1869
                  int size, unsigned long count, int down,
1870
                  gva_t address, int rep, unsigned port)
1871
{
1872
        unsigned now, in_page;
1873
        int i, ret = 0;
1874
        int nr_pages = 1;
1875
        struct page *page;
1876
        struct kvm_io_device *pio_dev;
1877
 
1878
        vcpu->run->exit_reason = KVM_EXIT_IO;
1879
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1880
        vcpu->run->io.size = vcpu->pio.size = size;
1881
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1882
        vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
1883
        vcpu->run->io.port = vcpu->pio.port = port;
1884
        vcpu->pio.in = in;
1885
        vcpu->pio.string = 1;
1886
        vcpu->pio.down = down;
1887
        vcpu->pio.guest_page_offset = offset_in_page(address);
1888
        vcpu->pio.rep = rep;
1889
 
1890
        if (!count) {
1891
                kvm_x86_ops->skip_emulated_instruction(vcpu);
1892
                return 1;
1893
        }
1894
 
1895
        if (!down)
1896
                in_page = PAGE_SIZE - offset_in_page(address);
1897
        else
1898
                in_page = offset_in_page(address) + size;
1899
        now = min(count, (unsigned long)in_page / size);
1900
        if (!now) {
1901
                /*
1902
                 * String I/O straddles page boundary.  Pin two guest pages
1903
                 * so that we satisfy atomicity constraints.  Do just one
1904
                 * transaction to avoid complexity.
1905
                 */
1906
                nr_pages = 2;
1907
                now = 1;
1908
        }
1909
        if (down) {
1910
                /*
1911
                 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
1912
                 */
1913
                pr_unimpl(vcpu, "guest string pio down\n");
1914
                inject_gp(vcpu);
1915
                return 1;
1916
        }
1917
        vcpu->run->io.count = now;
1918
        vcpu->pio.cur_count = now;
1919
 
1920
        if (vcpu->pio.cur_count == vcpu->pio.count)
1921
                kvm_x86_ops->skip_emulated_instruction(vcpu);
1922
 
1923
        for (i = 0; i < nr_pages; ++i) {
1924
                mutex_lock(&vcpu->kvm->lock);
1925
                page = gva_to_page(vcpu, address + i * PAGE_SIZE);
1926
                if (page)
1927
                        get_page(page);
1928
                vcpu->pio.guest_pages[i] = page;
1929
                mutex_unlock(&vcpu->kvm->lock);
1930
                if (!page) {
1931
                        inject_gp(vcpu);
1932
                        free_pio_guest_pages(vcpu);
1933
                        return 1;
1934
                }
1935
        }
1936
 
1937
        pio_dev = vcpu_find_pio_dev(vcpu, port);
1938
        if (!vcpu->pio.in) {
1939
                /* string PIO write */
1940
                ret = pio_copy_data(vcpu);
1941
                if (ret >= 0 && pio_dev) {
1942
                        pio_string_write(pio_dev, vcpu);
1943
                        complete_pio(vcpu);
1944
                        if (vcpu->pio.count == 0)
1945
                                ret = 1;
1946
                }
1947
        } else if (pio_dev)
1948
                pr_unimpl(vcpu, "no string pio read support yet, "
1949
                       "port %x size %d count %ld\n",
1950
                        port, size, count);
1951
 
1952
        return ret;
1953
}
1954
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
1955
 
1956
/*
1957
 * Check if userspace requested an interrupt window, and that the
1958
 * interrupt window is open.
1959
 *
1960
 * No need to exit to userspace if we already have an interrupt queued.
1961
 */
1962
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1963
                                          struct kvm_run *kvm_run)
1964
{
1965
        return (!vcpu->irq_summary &&
1966
                kvm_run->request_interrupt_window &&
1967
                vcpu->interrupt_window_open &&
1968
                (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
1969
}
1970
 
1971
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1972
                              struct kvm_run *kvm_run)
1973
{
1974
        kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
1975
        kvm_run->cr8 = get_cr8(vcpu);
1976
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
1977
        if (irqchip_in_kernel(vcpu->kvm))
1978
                kvm_run->ready_for_interrupt_injection = 1;
1979
        else
1980
                kvm_run->ready_for_interrupt_injection =
1981
                                        (vcpu->interrupt_window_open &&
1982
                                         vcpu->irq_summary == 0);
1983
}
1984
 
1985
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1986
{
1987
        int r;
1988
 
1989
        if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
1990
                printk("vcpu %d received sipi with vector # %x\n",
1991
                       vcpu->vcpu_id, vcpu->sipi_vector);
1992
                kvm_lapic_reset(vcpu);
1993
                kvm_x86_ops->vcpu_reset(vcpu);
1994
                vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
1995
        }
1996
 
1997
preempted:
1998
        if (vcpu->guest_debug.enabled)
1999
                kvm_x86_ops->guest_debug_pre(vcpu);
2000
 
2001
again:
2002
        r = kvm_mmu_reload(vcpu);
2003
        if (unlikely(r))
2004
                goto out;
2005
 
2006
        preempt_disable();
2007
 
2008
        kvm_x86_ops->prepare_guest_switch(vcpu);
2009
        kvm_load_guest_fpu(vcpu);
2010
 
2011
        local_irq_disable();
2012
 
2013
        if (signal_pending(current)) {
2014
                local_irq_enable();
2015
                preempt_enable();
2016
                r = -EINTR;
2017
                kvm_run->exit_reason = KVM_EXIT_INTR;
2018
                ++vcpu->stat.signal_exits;
2019
                goto out;
2020
        }
2021
 
2022
        if (irqchip_in_kernel(vcpu->kvm))
2023
                kvm_x86_ops->inject_pending_irq(vcpu);
2024
        else if (!vcpu->mmio_read_completed)
2025
                kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
2026
 
2027
        vcpu->guest_mode = 1;
2028
        kvm_guest_enter();
2029
 
2030
        if (vcpu->requests)
2031
                if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
2032
                        kvm_x86_ops->tlb_flush(vcpu);
2033
 
2034
        kvm_x86_ops->run(vcpu, kvm_run);
2035
 
2036
        vcpu->guest_mode = 0;
2037
        local_irq_enable();
2038
 
2039
        ++vcpu->stat.exits;
2040
 
2041
        /*
2042
         * We must have an instruction between local_irq_enable() and
2043
         * kvm_guest_exit(), so the timer interrupt isn't delayed by
2044
         * the interrupt shadow.  The stat.exits increment will do nicely.
2045
         * But we need to prevent reordering, hence this barrier():
2046
         */
2047
        barrier();
2048
 
2049
        kvm_guest_exit();
2050
 
2051
        preempt_enable();
2052
 
2053
        /*
2054
         * Profile KVM exit RIPs:
2055
         */
2056
        if (unlikely(prof_on == KVM_PROFILING)) {
2057
                kvm_x86_ops->cache_regs(vcpu);
2058
                profile_hit(KVM_PROFILING, (void *)vcpu->rip);
2059
        }
2060
 
2061
        r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
2062
 
2063
        if (r > 0) {
2064
                if (dm_request_for_irq_injection(vcpu, kvm_run)) {
2065
                        r = -EINTR;
2066
                        kvm_run->exit_reason = KVM_EXIT_INTR;
2067
                        ++vcpu->stat.request_irq_exits;
2068
                        goto out;
2069
                }
2070
                if (!need_resched()) {
2071
                        ++vcpu->stat.light_exits;
2072
                        goto again;
2073
                }
2074
        }
2075
 
2076
out:
2077
        if (r > 0) {
2078
                kvm_resched(vcpu);
2079
                goto preempted;
2080
        }
2081
 
2082
        post_kvm_run_save(vcpu, kvm_run);
2083
 
2084
        return r;
2085
}
2086
 
2087
 
2088
static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2089
{
2090
        int r;
2091
        sigset_t sigsaved;
2092
 
2093
        vcpu_load(vcpu);
2094
 
2095
        if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
2096
                kvm_vcpu_block(vcpu);
2097
                vcpu_put(vcpu);
2098
                return -EAGAIN;
2099
        }
2100
 
2101
        if (vcpu->sigset_active)
2102
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2103
 
2104
        /* re-sync apic's tpr */
2105
        if (!irqchip_in_kernel(vcpu->kvm))
2106
                set_cr8(vcpu, kvm_run->cr8);
2107
 
2108
        if (vcpu->pio.cur_count) {
2109
                r = complete_pio(vcpu);
2110
                if (r)
2111
                        goto out;
2112
        }
2113
 
2114
        if (vcpu->mmio_needed) {
2115
                memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
2116
                vcpu->mmio_read_completed = 1;
2117
                vcpu->mmio_needed = 0;
2118
                r = emulate_instruction(vcpu, kvm_run,
2119
                                        vcpu->mmio_fault_cr2, 0);
2120
                if (r == EMULATE_DO_MMIO) {
2121
                        /*
2122
                         * Read-modify-write.  Back to userspace.
2123
                         */
2124
                        r = 0;
2125
                        goto out;
2126
                }
2127
        }
2128
 
2129
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
2130
                kvm_x86_ops->cache_regs(vcpu);
2131
                vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
2132
                kvm_x86_ops->decache_regs(vcpu);
2133
        }
2134
 
2135
        r = __vcpu_run(vcpu, kvm_run);
2136
 
2137
out:
2138
        if (vcpu->sigset_active)
2139
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2140
 
2141
        vcpu_put(vcpu);
2142
        return r;
2143
}
2144
 
2145
static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
2146
                                   struct kvm_regs *regs)
2147
{
2148
        vcpu_load(vcpu);
2149
 
2150
        kvm_x86_ops->cache_regs(vcpu);
2151
 
2152
        regs->rax = vcpu->regs[VCPU_REGS_RAX];
2153
        regs->rbx = vcpu->regs[VCPU_REGS_RBX];
2154
        regs->rcx = vcpu->regs[VCPU_REGS_RCX];
2155
        regs->rdx = vcpu->regs[VCPU_REGS_RDX];
2156
        regs->rsi = vcpu->regs[VCPU_REGS_RSI];
2157
        regs->rdi = vcpu->regs[VCPU_REGS_RDI];
2158
        regs->rsp = vcpu->regs[VCPU_REGS_RSP];
2159
        regs->rbp = vcpu->regs[VCPU_REGS_RBP];
2160
#ifdef CONFIG_X86_64
2161
        regs->r8 = vcpu->regs[VCPU_REGS_R8];
2162
        regs->r9 = vcpu->regs[VCPU_REGS_R9];
2163
        regs->r10 = vcpu->regs[VCPU_REGS_R10];
2164
        regs->r11 = vcpu->regs[VCPU_REGS_R11];
2165
        regs->r12 = vcpu->regs[VCPU_REGS_R12];
2166
        regs->r13 = vcpu->regs[VCPU_REGS_R13];
2167
        regs->r14 = vcpu->regs[VCPU_REGS_R14];
2168
        regs->r15 = vcpu->regs[VCPU_REGS_R15];
2169
#endif
2170
 
2171
        regs->rip = vcpu->rip;
2172
        regs->rflags = kvm_x86_ops->get_rflags(vcpu);
2173
 
2174
        /*
2175
         * Don't leak debug flags in case they were set for guest debugging
2176
         */
2177
        if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
2178
                regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
2179
 
2180
        vcpu_put(vcpu);
2181
 
2182
        return 0;
2183
}
2184
 
2185
static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
2186
                                   struct kvm_regs *regs)
2187
{
2188
        vcpu_load(vcpu);
2189
 
2190
        vcpu->regs[VCPU_REGS_RAX] = regs->rax;
2191
        vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
2192
        vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
2193
        vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
2194
        vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
2195
        vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
2196
        vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
2197
        vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
2198
#ifdef CONFIG_X86_64
2199
        vcpu->regs[VCPU_REGS_R8] = regs->r8;
2200
        vcpu->regs[VCPU_REGS_R9] = regs->r9;
2201
        vcpu->regs[VCPU_REGS_R10] = regs->r10;
2202
        vcpu->regs[VCPU_REGS_R11] = regs->r11;
2203
        vcpu->regs[VCPU_REGS_R12] = regs->r12;
2204
        vcpu->regs[VCPU_REGS_R13] = regs->r13;
2205
        vcpu->regs[VCPU_REGS_R14] = regs->r14;
2206
        vcpu->regs[VCPU_REGS_R15] = regs->r15;
2207
#endif
2208
 
2209
        vcpu->rip = regs->rip;
2210
        kvm_x86_ops->set_rflags(vcpu, regs->rflags);
2211
 
2212
        kvm_x86_ops->decache_regs(vcpu);
2213
 
2214
        vcpu_put(vcpu);
2215
 
2216
        return 0;
2217
}
2218
 
2219
static void get_segment(struct kvm_vcpu *vcpu,
2220
                        struct kvm_segment *var, int seg)
2221
{
2222
        return kvm_x86_ops->get_segment(vcpu, var, seg);
2223
}
2224
 
2225
static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2226
                                    struct kvm_sregs *sregs)
2227
{
2228
        struct descriptor_table dt;
2229
        int pending_vec;
2230
 
2231
        vcpu_load(vcpu);
2232
 
2233
        get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2234
        get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2235
        get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2236
        get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2237
        get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2238
        get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2239
 
2240
        get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2241
        get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2242
 
2243
        kvm_x86_ops->get_idt(vcpu, &dt);
2244
        sregs->idt.limit = dt.limit;
2245
        sregs->idt.base = dt.base;
2246
        kvm_x86_ops->get_gdt(vcpu, &dt);
2247
        sregs->gdt.limit = dt.limit;
2248
        sregs->gdt.base = dt.base;
2249
 
2250
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2251
        sregs->cr0 = vcpu->cr0;
2252
        sregs->cr2 = vcpu->cr2;
2253
        sregs->cr3 = vcpu->cr3;
2254
        sregs->cr4 = vcpu->cr4;
2255
        sregs->cr8 = get_cr8(vcpu);
2256
        sregs->efer = vcpu->shadow_efer;
2257
        sregs->apic_base = kvm_get_apic_base(vcpu);
2258
 
2259
        if (irqchip_in_kernel(vcpu->kvm)) {
2260
                memset(sregs->interrupt_bitmap, 0,
2261
                       sizeof sregs->interrupt_bitmap);
2262
                pending_vec = kvm_x86_ops->get_irq(vcpu);
2263
                if (pending_vec >= 0)
2264
                        set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);
2265
        } else
2266
                memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
2267
                       sizeof sregs->interrupt_bitmap);
2268
 
2269
        vcpu_put(vcpu);
2270
 
2271
        return 0;
2272
}
2273
 
2274
static void set_segment(struct kvm_vcpu *vcpu,
2275
                        struct kvm_segment *var, int seg)
2276
{
2277
        return kvm_x86_ops->set_segment(vcpu, var, seg);
2278
}
2279
 
2280
static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2281
                                    struct kvm_sregs *sregs)
2282
{
2283
        int mmu_reset_needed = 0;
2284
        int i, pending_vec, max_bits;
2285
        struct descriptor_table dt;
2286
 
2287
        vcpu_load(vcpu);
2288
 
2289
        dt.limit = sregs->idt.limit;
2290
        dt.base = sregs->idt.base;
2291
        kvm_x86_ops->set_idt(vcpu, &dt);
2292
        dt.limit = sregs->gdt.limit;
2293
        dt.base = sregs->gdt.base;
2294
        kvm_x86_ops->set_gdt(vcpu, &dt);
2295
 
2296
        vcpu->cr2 = sregs->cr2;
2297
        mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
2298
        vcpu->cr3 = sregs->cr3;
2299
 
2300
        set_cr8(vcpu, sregs->cr8);
2301
 
2302
        mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
2303
#ifdef CONFIG_X86_64
2304
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
2305
#endif
2306
        kvm_set_apic_base(vcpu, sregs->apic_base);
2307
 
2308
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2309
 
2310
        mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
2311
        vcpu->cr0 = sregs->cr0;
2312
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
2313
 
2314
        mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
2315
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
2316
        if (!is_long_mode(vcpu) && is_pae(vcpu))
2317
                load_pdptrs(vcpu, vcpu->cr3);
2318
 
2319
        if (mmu_reset_needed)
2320
                kvm_mmu_reset_context(vcpu);
2321
 
2322
        if (!irqchip_in_kernel(vcpu->kvm)) {
2323
                memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
2324
                       sizeof vcpu->irq_pending);
2325
                vcpu->irq_summary = 0;
2326
                for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
2327
                        if (vcpu->irq_pending[i])
2328
                                __set_bit(i, &vcpu->irq_summary);
2329
        } else {
2330
                max_bits = (sizeof sregs->interrupt_bitmap) << 3;
2331
                pending_vec = find_first_bit(
2332
                        (const unsigned long *)sregs->interrupt_bitmap,
2333
                        max_bits);
2334
                /* Only pending external irq is handled here */
2335
                if (pending_vec < max_bits) {
2336
                        kvm_x86_ops->set_irq(vcpu, pending_vec);
2337
                        printk("Set back pending irq %d\n", pending_vec);
2338
                }
2339
        }
2340
 
2341
        set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2342
        set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2343
        set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2344
        set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2345
        set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2346
        set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2347
 
2348
        set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2349
        set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2350
 
2351
        vcpu_put(vcpu);
2352
 
2353
        return 0;
2354
}
2355
 
2356
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2357
{
2358
        struct kvm_segment cs;
2359
 
2360
        get_segment(vcpu, &cs, VCPU_SREG_CS);
2361
        *db = cs.db;
2362
        *l = cs.l;
2363
}
2364
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
2365
 
2366
/*
2367
 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
2368
 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
2369
 *
2370
 * This list is modified at module load time to reflect the
2371
 * capabilities of the host cpu.
2372
 */
2373
static u32 msrs_to_save[] = {
2374
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
2375
        MSR_K6_STAR,
2376
#ifdef CONFIG_X86_64
2377
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
2378
#endif
2379
        MSR_IA32_TIME_STAMP_COUNTER,
2380
};
2381
 
2382
static unsigned num_msrs_to_save;
2383
 
2384
static u32 emulated_msrs[] = {
2385
        MSR_IA32_MISC_ENABLE,
2386
};
2387
 
2388
static __init void kvm_init_msr_list(void)
2389
{
2390
        u32 dummy[2];
2391
        unsigned i, j;
2392
 
2393
        for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
2394
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
2395
                        continue;
2396
                if (j < i)
2397
                        msrs_to_save[j] = msrs_to_save[i];
2398
                j++;
2399
        }
2400
        num_msrs_to_save = j;
2401
}
2402
 
2403
/*
2404
 * Adapt set_msr() to msr_io()'s calling convention
2405
 */
2406
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2407
{
2408
        return kvm_set_msr(vcpu, index, *data);
2409
}
2410
 
2411
/*
2412
 * Read or write a bunch of msrs. All parameters are kernel addresses.
2413
 *
2414
 * @return number of msrs set successfully.
2415
 */
2416
static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2417
                    struct kvm_msr_entry *entries,
2418
                    int (*do_msr)(struct kvm_vcpu *vcpu,
2419
                                  unsigned index, u64 *data))
2420
{
2421
        int i;
2422
 
2423
        vcpu_load(vcpu);
2424
 
2425
        for (i = 0; i < msrs->nmsrs; ++i)
2426
                if (do_msr(vcpu, entries[i].index, &entries[i].data))
2427
                        break;
2428
 
2429
        vcpu_put(vcpu);
2430
 
2431
        return i;
2432
}
2433
 
2434
/*
2435
 * Read or write a bunch of msrs. Parameters are user addresses.
2436
 *
2437
 * @return number of msrs set successfully.
2438
 */
2439
static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2440
                  int (*do_msr)(struct kvm_vcpu *vcpu,
2441
                                unsigned index, u64 *data),
2442
                  int writeback)
2443
{
2444
        struct kvm_msrs msrs;
2445
        struct kvm_msr_entry *entries;
2446
        int r, n;
2447
        unsigned size;
2448
 
2449
        r = -EFAULT;
2450
        if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2451
                goto out;
2452
 
2453
        r = -E2BIG;
2454
        if (msrs.nmsrs >= MAX_IO_MSRS)
2455
                goto out;
2456
 
2457
        r = -ENOMEM;
2458
        size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2459
        entries = vmalloc(size);
2460
        if (!entries)
2461
                goto out;
2462
 
2463
        r = -EFAULT;
2464
        if (copy_from_user(entries, user_msrs->entries, size))
2465
                goto out_free;
2466
 
2467
        r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2468
        if (r < 0)
2469
                goto out_free;
2470
 
2471
        r = -EFAULT;
2472
        if (writeback && copy_to_user(user_msrs->entries, entries, size))
2473
                goto out_free;
2474
 
2475
        r = n;
2476
 
2477
out_free:
2478
        vfree(entries);
2479
out:
2480
        return r;
2481
}
2482
 
2483
/*
2484
 * Translate a guest virtual address to a guest physical address.
2485
 */
2486
static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2487
                                    struct kvm_translation *tr)
2488
{
2489
        unsigned long vaddr = tr->linear_address;
2490
        gpa_t gpa;
2491
 
2492
        vcpu_load(vcpu);
2493
        mutex_lock(&vcpu->kvm->lock);
2494
        gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
2495
        tr->physical_address = gpa;
2496
        tr->valid = gpa != UNMAPPED_GVA;
2497
        tr->writeable = 1;
2498
        tr->usermode = 0;
2499
        mutex_unlock(&vcpu->kvm->lock);
2500
        vcpu_put(vcpu);
2501
 
2502
        return 0;
2503
}
2504
 
2505
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2506
                                    struct kvm_interrupt *irq)
2507
{
2508
        if (irq->irq < 0 || irq->irq >= 256)
2509
                return -EINVAL;
2510
        if (irqchip_in_kernel(vcpu->kvm))
2511
                return -ENXIO;
2512
        vcpu_load(vcpu);
2513
 
2514
        set_bit(irq->irq, vcpu->irq_pending);
2515
        set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
2516
 
2517
        vcpu_put(vcpu);
2518
 
2519
        return 0;
2520
}
2521
 
2522
static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
2523
                                      struct kvm_debug_guest *dbg)
2524
{
2525
        int r;
2526
 
2527
        vcpu_load(vcpu);
2528
 
2529
        r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
2530
 
2531
        vcpu_put(vcpu);
2532
 
2533
        return r;
2534
}
2535
 
2536
static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
2537
                                    unsigned long address,
2538
                                    int *type)
2539
{
2540
        struct kvm_vcpu *vcpu = vma->vm_file->private_data;
2541
        unsigned long pgoff;
2542
        struct page *page;
2543
 
2544
        pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2545
        if (pgoff == 0)
2546
                page = virt_to_page(vcpu->run);
2547
        else if (pgoff == KVM_PIO_PAGE_OFFSET)
2548
                page = virt_to_page(vcpu->pio_data);
2549
        else
2550
                return NOPAGE_SIGBUS;
2551
        get_page(page);
2552
        if (type != NULL)
2553
                *type = VM_FAULT_MINOR;
2554
 
2555
        return page;
2556
}
2557
 
2558
static struct vm_operations_struct kvm_vcpu_vm_ops = {
2559
        .nopage = kvm_vcpu_nopage,
2560
};
2561
 
2562
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
2563
{
2564
        vma->vm_ops = &kvm_vcpu_vm_ops;
2565
        return 0;
2566
}
2567
 
2568
static int kvm_vcpu_release(struct inode *inode, struct file *filp)
2569
{
2570
        struct kvm_vcpu *vcpu = filp->private_data;
2571
 
2572
        fput(vcpu->kvm->filp);
2573
        return 0;
2574
}
2575
 
2576
static struct file_operations kvm_vcpu_fops = {
2577
        .release        = kvm_vcpu_release,
2578
        .unlocked_ioctl = kvm_vcpu_ioctl,
2579
        .compat_ioctl   = kvm_vcpu_ioctl,
2580
        .mmap           = kvm_vcpu_mmap,
2581
};
2582
 
2583
/*
2584
 * Allocates an inode for the vcpu.
2585
 */
2586
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
2587
{
2588
        int fd, r;
2589
        struct inode *inode;
2590
        struct file *file;
2591
 
2592
        r = anon_inode_getfd(&fd, &inode, &file,
2593
                             "kvm-vcpu", &kvm_vcpu_fops, vcpu);
2594
        if (r)
2595
                return r;
2596
        atomic_inc(&vcpu->kvm->filp->f_count);
2597
        return fd;
2598
}
2599
 
2600
/*
2601
 * Creates some virtual cpus.  Good luck creating more than one.
2602
 */
2603
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
2604
{
2605
        int r;
2606
        struct kvm_vcpu *vcpu;
2607
 
2608
        if (!valid_vcpu(n))
2609
                return -EINVAL;
2610
 
2611
        vcpu = kvm_x86_ops->vcpu_create(kvm, n);
2612
        if (IS_ERR(vcpu))
2613
                return PTR_ERR(vcpu);
2614
 
2615
        preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
2616
 
2617
        /* We do fxsave: this must be aligned. */
2618
        BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
2619
 
2620
        vcpu_load(vcpu);
2621
        r = kvm_mmu_setup(vcpu);
2622
        vcpu_put(vcpu);
2623
        if (r < 0)
2624
                goto free_vcpu;
2625
 
2626
        mutex_lock(&kvm->lock);
2627
        if (kvm->vcpus[n]) {
2628
                r = -EEXIST;
2629
                mutex_unlock(&kvm->lock);
2630
                goto mmu_unload;
2631
        }
2632
        kvm->vcpus[n] = vcpu;
2633
        mutex_unlock(&kvm->lock);
2634
 
2635
        /* Now it's all set up, let userspace reach it */
2636
        r = create_vcpu_fd(vcpu);
2637
        if (r < 0)
2638
                goto unlink;
2639
        return r;
2640
 
2641
unlink:
2642
        mutex_lock(&kvm->lock);
2643
        kvm->vcpus[n] = NULL;
2644
        mutex_unlock(&kvm->lock);
2645
 
2646
mmu_unload:
2647
        vcpu_load(vcpu);
2648
        kvm_mmu_unload(vcpu);
2649
        vcpu_put(vcpu);
2650
 
2651
free_vcpu:
2652
        kvm_x86_ops->vcpu_free(vcpu);
2653
        return r;
2654
}
2655
 
2656
static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
2657
{
2658
        u64 efer;
2659
        int i;
2660
        struct kvm_cpuid_entry *e, *entry;
2661
 
2662
        rdmsrl(MSR_EFER, efer);
2663
        entry = NULL;
2664
        for (i = 0; i < vcpu->cpuid_nent; ++i) {
2665
                e = &vcpu->cpuid_entries[i];
2666
                if (e->function == 0x80000001) {
2667
                        entry = e;
2668
                        break;
2669
                }
2670
        }
2671
        if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
2672
                entry->edx &= ~(1 << 20);
2673
                printk(KERN_INFO "kvm: guest NX capability removed\n");
2674
        }
2675
}
2676
 
2677
static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2678
                                    struct kvm_cpuid *cpuid,
2679
                                    struct kvm_cpuid_entry __user *entries)
2680
{
2681
        int r;
2682
 
2683
        r = -E2BIG;
2684
        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2685
                goto out;
2686
        r = -EFAULT;
2687
        if (copy_from_user(&vcpu->cpuid_entries, entries,
2688
                           cpuid->nent * sizeof(struct kvm_cpuid_entry)))
2689
                goto out;
2690
        vcpu->cpuid_nent = cpuid->nent;
2691
        cpuid_fix_nx_cap(vcpu);
2692
        return 0;
2693
 
2694
out:
2695
        return r;
2696
}
2697
 
2698
static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
2699
{
2700
        if (sigset) {
2701
                sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2702
                vcpu->sigset_active = 1;
2703
                vcpu->sigset = *sigset;
2704
        } else
2705
                vcpu->sigset_active = 0;
2706
        return 0;
2707
}
2708
 
2709
/*
2710
 * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
2711
 * we have asm/x86/processor.h
2712
 */
2713
struct fxsave {
2714
        u16     cwd;
2715
        u16     swd;
2716
        u16     twd;
2717
        u16     fop;
2718
        u64     rip;
2719
        u64     rdp;
2720
        u32     mxcsr;
2721
        u32     mxcsr_mask;
2722
        u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
2723
#ifdef CONFIG_X86_64
2724
        u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
2725
#else
2726
        u32     xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
2727
#endif
2728
};
2729
 
2730
static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2731
{
2732
        struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2733
 
2734
        vcpu_load(vcpu);
2735
 
2736
        memcpy(fpu->fpr, fxsave->st_space, 128);
2737
        fpu->fcw = fxsave->cwd;
2738
        fpu->fsw = fxsave->swd;
2739
        fpu->ftwx = fxsave->twd;
2740
        fpu->last_opcode = fxsave->fop;
2741
        fpu->last_ip = fxsave->rip;
2742
        fpu->last_dp = fxsave->rdp;
2743
        memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
2744
 
2745
        vcpu_put(vcpu);
2746
 
2747
        return 0;
2748
}
2749
 
2750
static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2751
{
2752
        struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2753
 
2754
        vcpu_load(vcpu);
2755
 
2756
        memcpy(fxsave->st_space, fpu->fpr, 128);
2757
        fxsave->cwd = fpu->fcw;
2758
        fxsave->swd = fpu->fsw;
2759
        fxsave->twd = fpu->ftwx;
2760
        fxsave->fop = fpu->last_opcode;
2761
        fxsave->rip = fpu->last_ip;
2762
        fxsave->rdp = fpu->last_dp;
2763
        memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
2764
 
2765
        vcpu_put(vcpu);
2766
 
2767
        return 0;
2768
}
2769
 
2770
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2771
                                    struct kvm_lapic_state *s)
2772
{
2773
        vcpu_load(vcpu);
2774
        memcpy(s->regs, vcpu->apic->regs, sizeof *s);
2775
        vcpu_put(vcpu);
2776
 
2777
        return 0;
2778
}
2779
 
2780
static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2781
                                    struct kvm_lapic_state *s)
2782
{
2783
        vcpu_load(vcpu);
2784
        memcpy(vcpu->apic->regs, s->regs, sizeof *s);
2785
        kvm_apic_post_state_restore(vcpu);
2786
        vcpu_put(vcpu);
2787
 
2788
        return 0;
2789
}
2790
 
2791
static long kvm_vcpu_ioctl(struct file *filp,
2792
                           unsigned int ioctl, unsigned long arg)
2793
{
2794
        struct kvm_vcpu *vcpu = filp->private_data;
2795
        void __user *argp = (void __user *)arg;
2796
        int r = -EINVAL;
2797
 
2798
        switch (ioctl) {
2799
        case KVM_RUN:
2800
                r = -EINVAL;
2801
                if (arg)
2802
                        goto out;
2803
                r = kvm_vcpu_ioctl_run(vcpu, vcpu->run);
2804
                break;
2805
        case KVM_GET_REGS: {
2806
                struct kvm_regs kvm_regs;
2807
 
2808
                memset(&kvm_regs, 0, sizeof kvm_regs);
2809
                r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
2810
                if (r)
2811
                        goto out;
2812
                r = -EFAULT;
2813
                if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
2814
                        goto out;
2815
                r = 0;
2816
                break;
2817
        }
2818
        case KVM_SET_REGS: {
2819
                struct kvm_regs kvm_regs;
2820
 
2821
                r = -EFAULT;
2822
                if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
2823
                        goto out;
2824
                r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
2825
                if (r)
2826
                        goto out;
2827
                r = 0;
2828
                break;
2829
        }
2830
        case KVM_GET_SREGS: {
2831
                struct kvm_sregs kvm_sregs;
2832
 
2833
                memset(&kvm_sregs, 0, sizeof kvm_sregs);
2834
                r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
2835
                if (r)
2836
                        goto out;
2837
                r = -EFAULT;
2838
                if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
2839
                        goto out;
2840
                r = 0;
2841
                break;
2842
        }
2843
        case KVM_SET_SREGS: {
2844
                struct kvm_sregs kvm_sregs;
2845
 
2846
                r = -EFAULT;
2847
                if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
2848
                        goto out;
2849
                r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
2850
                if (r)
2851
                        goto out;
2852
                r = 0;
2853
                break;
2854
        }
2855
        case KVM_TRANSLATE: {
2856
                struct kvm_translation tr;
2857
 
2858
                r = -EFAULT;
2859
                if (copy_from_user(&tr, argp, sizeof tr))
2860
                        goto out;
2861
                r = kvm_vcpu_ioctl_translate(vcpu, &tr);
2862
                if (r)
2863
                        goto out;
2864
                r = -EFAULT;
2865
                if (copy_to_user(argp, &tr, sizeof tr))
2866
                        goto out;
2867
                r = 0;
2868
                break;
2869
        }
2870
        case KVM_INTERRUPT: {
2871
                struct kvm_interrupt irq;
2872
 
2873
                r = -EFAULT;
2874
                if (copy_from_user(&irq, argp, sizeof irq))
2875
                        goto out;
2876
                r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2877
                if (r)
2878
                        goto out;
2879
                r = 0;
2880
                break;
2881
        }
2882
        case KVM_DEBUG_GUEST: {
2883
                struct kvm_debug_guest dbg;
2884
 
2885
                r = -EFAULT;
2886
                if (copy_from_user(&dbg, argp, sizeof dbg))
2887
                        goto out;
2888
                r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg);
2889
                if (r)
2890
                        goto out;
2891
                r = 0;
2892
                break;
2893
        }
2894
        case KVM_GET_MSRS:
2895
                r = msr_io(vcpu, argp, kvm_get_msr, 1);
2896
                break;
2897
        case KVM_SET_MSRS:
2898
                r = msr_io(vcpu, argp, do_set_msr, 0);
2899
                break;
2900
        case KVM_SET_CPUID: {
2901
                struct kvm_cpuid __user *cpuid_arg = argp;
2902
                struct kvm_cpuid cpuid;
2903
 
2904
                r = -EFAULT;
2905
                if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2906
                        goto out;
2907
                r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2908
                if (r)
2909
                        goto out;
2910
                break;
2911
        }
2912
        case KVM_SET_SIGNAL_MASK: {
2913
                struct kvm_signal_mask __user *sigmask_arg = argp;
2914
                struct kvm_signal_mask kvm_sigmask;
2915
                sigset_t sigset, *p;
2916
 
2917
                p = NULL;
2918
                if (argp) {
2919
                        r = -EFAULT;
2920
                        if (copy_from_user(&kvm_sigmask, argp,
2921
                                           sizeof kvm_sigmask))
2922
                                goto out;
2923
                        r = -EINVAL;
2924
                        if (kvm_sigmask.len != sizeof sigset)
2925
                                goto out;
2926
                        r = -EFAULT;
2927
                        if (copy_from_user(&sigset, sigmask_arg->sigset,
2928
                                           sizeof sigset))
2929
                                goto out;
2930
                        p = &sigset;
2931
                }
2932
                r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
2933
                break;
2934
        }
2935
        case KVM_GET_FPU: {
2936
                struct kvm_fpu fpu;
2937
 
2938
                memset(&fpu, 0, sizeof fpu);
2939
                r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu);
2940
                if (r)
2941
                        goto out;
2942
                r = -EFAULT;
2943
                if (copy_to_user(argp, &fpu, sizeof fpu))
2944
                        goto out;
2945
                r = 0;
2946
                break;
2947
        }
2948
        case KVM_SET_FPU: {
2949
                struct kvm_fpu fpu;
2950
 
2951
                r = -EFAULT;
2952
                if (copy_from_user(&fpu, argp, sizeof fpu))
2953
                        goto out;
2954
                r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu);
2955
                if (r)
2956
                        goto out;
2957
                r = 0;
2958
                break;
2959
        }
2960
        case KVM_GET_LAPIC: {
2961
                struct kvm_lapic_state lapic;
2962
 
2963
                memset(&lapic, 0, sizeof lapic);
2964
                r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
2965
                if (r)
2966
                        goto out;
2967
                r = -EFAULT;
2968
                if (copy_to_user(argp, &lapic, sizeof lapic))
2969
                        goto out;
2970
                r = 0;
2971
                break;
2972
        }
2973
        case KVM_SET_LAPIC: {
2974
                struct kvm_lapic_state lapic;
2975
 
2976
                r = -EFAULT;
2977
                if (copy_from_user(&lapic, argp, sizeof lapic))
2978
                        goto out;
2979
                r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
2980
                if (r)
2981
                        goto out;
2982
                r = 0;
2983
                break;
2984
        }
2985
        default:
2986
                ;
2987
        }
2988
out:
2989
        return r;
2990
}
2991
 
2992
static long kvm_vm_ioctl(struct file *filp,
2993
                           unsigned int ioctl, unsigned long arg)
2994
{
2995
        struct kvm *kvm = filp->private_data;
2996
        void __user *argp = (void __user *)arg;
2997
        int r = -EINVAL;
2998
 
2999
        switch (ioctl) {
3000
        case KVM_CREATE_VCPU:
3001
                r = kvm_vm_ioctl_create_vcpu(kvm, arg);
3002
                if (r < 0)
3003
                        goto out;
3004
                break;
3005
        case KVM_SET_MEMORY_REGION: {
3006
                struct kvm_memory_region kvm_mem;
3007
 
3008
                r = -EFAULT;
3009
                if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
3010
                        goto out;
3011
                r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
3012
                if (r)
3013
                        goto out;
3014
                break;
3015
        }
3016
        case KVM_GET_DIRTY_LOG: {
3017
                struct kvm_dirty_log log;
3018
 
3019
                r = -EFAULT;
3020
                if (copy_from_user(&log, argp, sizeof log))
3021
                        goto out;
3022
                r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
3023
                if (r)
3024
                        goto out;
3025
                break;
3026
        }
3027
        case KVM_SET_MEMORY_ALIAS: {
3028
                struct kvm_memory_alias alias;
3029
 
3030
                r = -EFAULT;
3031
                if (copy_from_user(&alias, argp, sizeof alias))
3032
                        goto out;
3033
                r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
3034
                if (r)
3035
                        goto out;
3036
                break;
3037
        }
3038
        case KVM_CREATE_IRQCHIP:
3039
                r = -ENOMEM;
3040
                kvm->vpic = kvm_create_pic(kvm);
3041
                if (kvm->vpic) {
3042
                        r = kvm_ioapic_init(kvm);
3043
                        if (r) {
3044
                                kfree(kvm->vpic);
3045
                                kvm->vpic = NULL;
3046
                                goto out;
3047
                        }
3048
                }
3049
                else
3050
                        goto out;
3051
                break;
3052
        case KVM_IRQ_LINE: {
3053
                struct kvm_irq_level irq_event;
3054
 
3055
                r = -EFAULT;
3056
                if (copy_from_user(&irq_event, argp, sizeof irq_event))
3057
                        goto out;
3058
                if (irqchip_in_kernel(kvm)) {
3059
                        mutex_lock(&kvm->lock);
3060
                        if (irq_event.irq < 16)
3061
                                kvm_pic_set_irq(pic_irqchip(kvm),
3062
                                        irq_event.irq,
3063
                                        irq_event.level);
3064
                        kvm_ioapic_set_irq(kvm->vioapic,
3065
                                        irq_event.irq,
3066
                                        irq_event.level);
3067
                        mutex_unlock(&kvm->lock);
3068
                        r = 0;
3069
                }
3070
                break;
3071
        }
3072
        case KVM_GET_IRQCHIP: {
3073
                /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3074
                struct kvm_irqchip chip;
3075
 
3076
                r = -EFAULT;
3077
                if (copy_from_user(&chip, argp, sizeof chip))
3078
                        goto out;
3079
                r = -ENXIO;
3080
                if (!irqchip_in_kernel(kvm))
3081
                        goto out;
3082
                r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
3083
                if (r)
3084
                        goto out;
3085
                r = -EFAULT;
3086
                if (copy_to_user(argp, &chip, sizeof chip))
3087
                        goto out;
3088
                r = 0;
3089
                break;
3090
        }
3091
        case KVM_SET_IRQCHIP: {
3092
                /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3093
                struct kvm_irqchip chip;
3094
 
3095
                r = -EFAULT;
3096
                if (copy_from_user(&chip, argp, sizeof chip))
3097
                        goto out;
3098
                r = -ENXIO;
3099
                if (!irqchip_in_kernel(kvm))
3100
                        goto out;
3101
                r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
3102
                if (r)
3103
                        goto out;
3104
                r = 0;
3105
                break;
3106
        }
3107
        default:
3108
                ;
3109
        }
3110
out:
3111
        return r;
3112
}
3113
 
3114
static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
3115
                                  unsigned long address,
3116
                                  int *type)
3117
{
3118
        struct kvm *kvm = vma->vm_file->private_data;
3119
        unsigned long pgoff;
3120
        struct page *page;
3121
 
3122
        pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
3123
        page = gfn_to_page(kvm, pgoff);
3124
        if (!page)
3125
                return NOPAGE_SIGBUS;
3126
        get_page(page);
3127
        if (type != NULL)
3128
                *type = VM_FAULT_MINOR;
3129
 
3130
        return page;
3131
}
3132
 
3133
static struct vm_operations_struct kvm_vm_vm_ops = {
3134
        .nopage = kvm_vm_nopage,
3135
};
3136
 
3137
static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
3138
{
3139
        vma->vm_ops = &kvm_vm_vm_ops;
3140
        return 0;
3141
}
3142
 
3143
static struct file_operations kvm_vm_fops = {
3144
        .release        = kvm_vm_release,
3145
        .unlocked_ioctl = kvm_vm_ioctl,
3146
        .compat_ioctl   = kvm_vm_ioctl,
3147
        .mmap           = kvm_vm_mmap,
3148
};
3149
 
3150
static int kvm_dev_ioctl_create_vm(void)
3151
{
3152
        int fd, r;
3153
        struct inode *inode;
3154
        struct file *file;
3155
        struct kvm *kvm;
3156
 
3157
        kvm = kvm_create_vm();
3158
        if (IS_ERR(kvm))
3159
                return PTR_ERR(kvm);
3160
        r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
3161
        if (r) {
3162
                kvm_destroy_vm(kvm);
3163
                return r;
3164
        }
3165
 
3166
        kvm->filp = file;
3167
 
3168
        return fd;
3169
}
3170
 
3171
static long kvm_dev_ioctl(struct file *filp,
3172
                          unsigned int ioctl, unsigned long arg)
3173
{
3174
        void __user *argp = (void __user *)arg;
3175
        long r = -EINVAL;
3176
 
3177
        switch (ioctl) {
3178
        case KVM_GET_API_VERSION:
3179
                r = -EINVAL;
3180
                if (arg)
3181
                        goto out;
3182
                r = KVM_API_VERSION;
3183
                break;
3184
        case KVM_CREATE_VM:
3185
                r = -EINVAL;
3186
                if (arg)
3187
                        goto out;
3188
                r = kvm_dev_ioctl_create_vm();
3189
                break;
3190
        case KVM_GET_MSR_INDEX_LIST: {
3191
                struct kvm_msr_list __user *user_msr_list = argp;
3192
                struct kvm_msr_list msr_list;
3193
                unsigned n;
3194
 
3195
                r = -EFAULT;
3196
                if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
3197
                        goto out;
3198
                n = msr_list.nmsrs;
3199
                msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
3200
                if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
3201
                        goto out;
3202
                r = -E2BIG;
3203
                if (n < num_msrs_to_save)
3204
                        goto out;
3205
                r = -EFAULT;
3206
                if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3207
                                 num_msrs_to_save * sizeof(u32)))
3208
                        goto out;
3209
                if (copy_to_user(user_msr_list->indices
3210
                                 + num_msrs_to_save * sizeof(u32),
3211
                                 &emulated_msrs,
3212
                                 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
3213
                        goto out;
3214
                r = 0;
3215
                break;
3216
        }
3217
        case KVM_CHECK_EXTENSION: {
3218
                int ext = (long)argp;
3219
 
3220
                switch (ext) {
3221
                case KVM_CAP_IRQCHIP:
3222
                case KVM_CAP_HLT:
3223
                        r = 1;
3224
                        break;
3225
                default:
3226
                        r = 0;
3227
                        break;
3228
                }
3229
                break;
3230
        }
3231
        case KVM_GET_VCPU_MMAP_SIZE:
3232
                r = -EINVAL;
3233
                if (arg)
3234
                        goto out;
3235
                r = 2 * PAGE_SIZE;
3236
                break;
3237
        default:
3238
                ;
3239
        }
3240
out:
3241
        return r;
3242
}
3243
 
3244
static struct file_operations kvm_chardev_ops = {
3245
        .unlocked_ioctl = kvm_dev_ioctl,
3246
        .compat_ioctl   = kvm_dev_ioctl,
3247
};
3248
 
3249
static struct miscdevice kvm_dev = {
3250
        KVM_MINOR,
3251
        "kvm",
3252
        &kvm_chardev_ops,
3253
};
3254
 
3255
/*
3256
 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
3257
 * cached on it.
3258
 */
3259
static void decache_vcpus_on_cpu(int cpu)
3260
{
3261
        struct kvm *vm;
3262
        struct kvm_vcpu *vcpu;
3263
        int i;
3264
 
3265
        spin_lock(&kvm_lock);
3266
        list_for_each_entry(vm, &vm_list, vm_list)
3267
                for (i = 0; i < KVM_MAX_VCPUS; ++i) {
3268
                        vcpu = vm->vcpus[i];
3269
                        if (!vcpu)
3270
                                continue;
3271
                        /*
3272
                         * If the vcpu is locked, then it is running on some
3273
                         * other cpu and therefore it is not cached on the
3274
                         * cpu in question.
3275
                         *
3276
                         * If it's not locked, check the last cpu it executed
3277
                         * on.
3278
                         */
3279
                        if (mutex_trylock(&vcpu->mutex)) {
3280
                                if (vcpu->cpu == cpu) {
3281
                                        kvm_x86_ops->vcpu_decache(vcpu);
3282
                                        vcpu->cpu = -1;
3283
                                }
3284
                                mutex_unlock(&vcpu->mutex);
3285
                        }
3286
                }
3287
        spin_unlock(&kvm_lock);
3288
}
3289
 
3290
static void hardware_enable(void *junk)
3291
{
3292
        int cpu = raw_smp_processor_id();
3293
 
3294
        if (cpu_isset(cpu, cpus_hardware_enabled))
3295
                return;
3296
        cpu_set(cpu, cpus_hardware_enabled);
3297
        kvm_x86_ops->hardware_enable(NULL);
3298
}
3299
 
3300
static void hardware_disable(void *junk)
3301
{
3302
        int cpu = raw_smp_processor_id();
3303
 
3304
        if (!cpu_isset(cpu, cpus_hardware_enabled))
3305
                return;
3306
        cpu_clear(cpu, cpus_hardware_enabled);
3307
        decache_vcpus_on_cpu(cpu);
3308
        kvm_x86_ops->hardware_disable(NULL);
3309
}
3310
 
3311
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
3312
                           void *v)
3313
{
3314
        int cpu = (long)v;
3315
 
3316
        switch (val) {
3317
        case CPU_DYING:
3318
        case CPU_DYING_FROZEN:
3319
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
3320
                       cpu);
3321
                hardware_disable(NULL);
3322
                break;
3323
        case CPU_UP_CANCELED:
3324
        case CPU_UP_CANCELED_FROZEN:
3325
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
3326
                       cpu);
3327
                smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
3328
                break;
3329
        case CPU_ONLINE:
3330
        case CPU_ONLINE_FROZEN:
3331
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
3332
                       cpu);
3333
                smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
3334
                break;
3335
        }
3336
        return NOTIFY_OK;
3337
}
3338
 
3339
static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
3340
                       void *v)
3341
{
3342
        if (val == SYS_RESTART) {
3343
                /*
3344
                 * Some (well, at least mine) BIOSes hang on reboot if
3345
                 * in vmx root mode.
3346
                 */
3347
                printk(KERN_INFO "kvm: exiting hardware virtualization\n");
3348
                on_each_cpu(hardware_disable, NULL, 0, 1);
3349
        }
3350
        return NOTIFY_OK;
3351
}
3352
 
3353
static struct notifier_block kvm_reboot_notifier = {
3354
        .notifier_call = kvm_reboot,
3355
        .priority = 0,
3356
};
3357
 
3358
void kvm_io_bus_init(struct kvm_io_bus *bus)
3359
{
3360
        memset(bus, 0, sizeof(*bus));
3361
}
3362
 
3363
void kvm_io_bus_destroy(struct kvm_io_bus *bus)
3364
{
3365
        int i;
3366
 
3367
        for (i = 0; i < bus->dev_count; i++) {
3368
                struct kvm_io_device *pos = bus->devs[i];
3369
 
3370
                kvm_iodevice_destructor(pos);
3371
        }
3372
}
3373
 
3374
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
3375
{
3376
        int i;
3377
 
3378
        for (i = 0; i < bus->dev_count; i++) {
3379
                struct kvm_io_device *pos = bus->devs[i];
3380
 
3381
                if (pos->in_range(pos, addr))
3382
                        return pos;
3383
        }
3384
 
3385
        return NULL;
3386
}
3387
 
3388
void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
3389
{
3390
        BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
3391
 
3392
        bus->devs[bus->dev_count++] = dev;
3393
}
3394
 
3395
static struct notifier_block kvm_cpu_notifier = {
3396
        .notifier_call = kvm_cpu_hotplug,
3397
        .priority = 20, /* must be > scheduler priority */
3398
};
3399
 
3400
static u64 stat_get(void *_offset)
3401
{
3402
        unsigned offset = (long)_offset;
3403
        u64 total = 0;
3404
        struct kvm *kvm;
3405
        struct kvm_vcpu *vcpu;
3406
        int i;
3407
 
3408
        spin_lock(&kvm_lock);
3409
        list_for_each_entry(kvm, &vm_list, vm_list)
3410
                for (i = 0; i < KVM_MAX_VCPUS; ++i) {
3411
                        vcpu = kvm->vcpus[i];
3412
                        if (vcpu)
3413
                                total += *(u32 *)((void *)vcpu + offset);
3414
                }
3415
        spin_unlock(&kvm_lock);
3416
        return total;
3417
}
3418
 
3419
DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n");
3420
 
3421
static __init void kvm_init_debug(void)
3422
{
3423
        struct kvm_stats_debugfs_item *p;
3424
 
3425
        debugfs_dir = debugfs_create_dir("kvm", NULL);
3426
        for (p = debugfs_entries; p->name; ++p)
3427
                p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
3428
                                                (void *)(long)p->offset,
3429
                                                &stat_fops);
3430
}
3431
 
3432
static void kvm_exit_debug(void)
3433
{
3434
        struct kvm_stats_debugfs_item *p;
3435
 
3436
        for (p = debugfs_entries; p->name; ++p)
3437
                debugfs_remove(p->dentry);
3438
        debugfs_remove(debugfs_dir);
3439
}
3440
 
3441
static int kvm_suspend(struct sys_device *dev, pm_message_t state)
3442
{
3443
        hardware_disable(NULL);
3444
        return 0;
3445
}
3446
 
3447
static int kvm_resume(struct sys_device *dev)
3448
{
3449
        hardware_enable(NULL);
3450
        return 0;
3451
}
3452
 
3453
static struct sysdev_class kvm_sysdev_class = {
3454
        set_kset_name("kvm"),
3455
        .suspend = kvm_suspend,
3456
        .resume = kvm_resume,
3457
};
3458
 
3459
static struct sys_device kvm_sysdev = {
3460
        .id = 0,
3461
        .cls = &kvm_sysdev_class,
3462
};
3463
 
3464
hpa_t bad_page_address;
3465
 
3466
static inline
3467
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
3468
{
3469
        return container_of(pn, struct kvm_vcpu, preempt_notifier);
3470
}
3471
 
3472
static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3473
{
3474
        struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3475
 
3476
        kvm_x86_ops->vcpu_load(vcpu, cpu);
3477
}
3478
 
3479
static void kvm_sched_out(struct preempt_notifier *pn,
3480
                          struct task_struct *next)
3481
{
3482
        struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3483
 
3484
        kvm_x86_ops->vcpu_put(vcpu);
3485
}
3486
 
3487
int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
3488
                  struct module *module)
3489
{
3490
        int r;
3491
        int cpu;
3492
 
3493
        if (kvm_x86_ops) {
3494
                printk(KERN_ERR "kvm: already loaded the other module\n");
3495
                return -EEXIST;
3496
        }
3497
 
3498
        if (!ops->cpu_has_kvm_support()) {
3499
                printk(KERN_ERR "kvm: no hardware support\n");
3500
                return -EOPNOTSUPP;
3501
        }
3502
        if (ops->disabled_by_bios()) {
3503
                printk(KERN_ERR "kvm: disabled by bios\n");
3504
                return -EOPNOTSUPP;
3505
        }
3506
 
3507
        kvm_x86_ops = ops;
3508
 
3509
        r = kvm_x86_ops->hardware_setup();
3510
        if (r < 0)
3511
                goto out;
3512
 
3513
        for_each_online_cpu(cpu) {
3514
                smp_call_function_single(cpu,
3515
                                kvm_x86_ops->check_processor_compatibility,
3516
                                &r, 0, 1);
3517
                if (r < 0)
3518
                        goto out_free_0;
3519
        }
3520
 
3521
        on_each_cpu(hardware_enable, NULL, 0, 1);
3522
        r = register_cpu_notifier(&kvm_cpu_notifier);
3523
        if (r)
3524
                goto out_free_1;
3525
        register_reboot_notifier(&kvm_reboot_notifier);
3526
 
3527
        r = sysdev_class_register(&kvm_sysdev_class);
3528
        if (r)
3529
                goto out_free_2;
3530
 
3531
        r = sysdev_register(&kvm_sysdev);
3532
        if (r)
3533
                goto out_free_3;
3534
 
3535
        /* A kmem cache lets us meet the alignment requirements of fx_save. */
3536
        kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
3537
                                           __alignof__(struct kvm_vcpu), 0, 0);
3538
        if (!kvm_vcpu_cache) {
3539
                r = -ENOMEM;
3540
                goto out_free_4;
3541
        }
3542
 
3543
        kvm_chardev_ops.owner = module;
3544
 
3545
        r = misc_register(&kvm_dev);
3546
        if (r) {
3547
                printk (KERN_ERR "kvm: misc device register failed\n");
3548
                goto out_free;
3549
        }
3550
 
3551
        kvm_preempt_ops.sched_in = kvm_sched_in;
3552
        kvm_preempt_ops.sched_out = kvm_sched_out;
3553
 
3554
        return r;
3555
 
3556
out_free:
3557
        kmem_cache_destroy(kvm_vcpu_cache);
3558
out_free_4:
3559
        sysdev_unregister(&kvm_sysdev);
3560
out_free_3:
3561
        sysdev_class_unregister(&kvm_sysdev_class);
3562
out_free_2:
3563
        unregister_reboot_notifier(&kvm_reboot_notifier);
3564
        unregister_cpu_notifier(&kvm_cpu_notifier);
3565
out_free_1:
3566
        on_each_cpu(hardware_disable, NULL, 0, 1);
3567
out_free_0:
3568
        kvm_x86_ops->hardware_unsetup();
3569
out:
3570
        kvm_x86_ops = NULL;
3571
        return r;
3572
}
3573
 
3574
void kvm_exit_x86(void)
3575
{
3576
        misc_deregister(&kvm_dev);
3577
        kmem_cache_destroy(kvm_vcpu_cache);
3578
        sysdev_unregister(&kvm_sysdev);
3579
        sysdev_class_unregister(&kvm_sysdev_class);
3580
        unregister_reboot_notifier(&kvm_reboot_notifier);
3581
        unregister_cpu_notifier(&kvm_cpu_notifier);
3582
        on_each_cpu(hardware_disable, NULL, 0, 1);
3583
        kvm_x86_ops->hardware_unsetup();
3584
        kvm_x86_ops = NULL;
3585
}
3586
 
3587
static __init int kvm_init(void)
3588
{
3589
        static struct page *bad_page;
3590
        int r;
3591
 
3592
        r = kvm_mmu_module_init();
3593
        if (r)
3594
                goto out4;
3595
 
3596
        kvm_init_debug();
3597
 
3598
        kvm_init_msr_list();
3599
 
3600
        if ((bad_page = alloc_page(GFP_KERNEL)) == NULL) {
3601
                r = -ENOMEM;
3602
                goto out;
3603
        }
3604
 
3605
        bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
3606
        memset(__va(bad_page_address), 0, PAGE_SIZE);
3607
 
3608
        return 0;
3609
 
3610
out:
3611
        kvm_exit_debug();
3612
        kvm_mmu_module_exit();
3613
out4:
3614
        return r;
3615
}
3616
 
3617
static __exit void kvm_exit(void)
3618
{
3619
        kvm_exit_debug();
3620
        __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
3621
        kvm_mmu_module_exit();
3622
}
3623
 
3624
module_init(kvm_init)
3625
module_exit(kvm_exit)
3626
 
3627
EXPORT_SYMBOL_GPL(kvm_init_x86);
3628
EXPORT_SYMBOL_GPL(kvm_exit_x86);

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.