OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [drivers/] [pci/] [intel-iommu.c] - Blame information for rev 65

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * Copyright (c) 2006, Intel Corporation.
3
 *
4
 * This program is free software; you can redistribute it and/or modify it
5
 * under the terms and conditions of the GNU General Public License,
6
 * version 2, as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11
 * more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along with
14
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
 * Place - Suite 330, Boston, MA 02111-1307 USA.
16
 *
17
 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18
 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19
 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20
 */
21
 
22
#include <linux/init.h>
23
#include <linux/bitmap.h>
24
#include <linux/slab.h>
25
#include <linux/irq.h>
26
#include <linux/interrupt.h>
27
#include <linux/sysdev.h>
28
#include <linux/spinlock.h>
29
#include <linux/pci.h>
30
#include <linux/dmar.h>
31
#include <linux/dma-mapping.h>
32
#include <linux/mempool.h>
33
#include "iova.h"
34
#include "intel-iommu.h"
35
#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36
#include <asm/cacheflush.h>
37
#include <asm/gart.h>
38
#include "pci.h"
39
 
40
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
 
43
#define IOAPIC_RANGE_START      (0xfee00000)
44
#define IOAPIC_RANGE_END        (0xfeefffff)
45
#define IOVA_START_ADDR         (0x1000)
46
 
47
#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
 
49
#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
 
51
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
 
53
static void domain_remove_dev_info(struct dmar_domain *domain);
54
 
55
static int dmar_disabled;
56
static int __initdata dmar_map_gfx = 1;
57
static int dmar_forcedac;
58
 
59
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60
static DEFINE_SPINLOCK(device_domain_lock);
61
static LIST_HEAD(device_domain_list);
62
 
63
static int __init intel_iommu_setup(char *str)
64
{
65
        if (!str)
66
                return -EINVAL;
67
        while (*str) {
68
                if (!strncmp(str, "off", 3)) {
69
                        dmar_disabled = 1;
70
                        printk(KERN_INFO"Intel-IOMMU: disabled\n");
71
                } else if (!strncmp(str, "igfx_off", 8)) {
72
                        dmar_map_gfx = 0;
73
                        printk(KERN_INFO
74
                                "Intel-IOMMU: disable GFX device mapping\n");
75
                } else if (!strncmp(str, "forcedac", 8)) {
76
                        printk (KERN_INFO
77
                                "Intel-IOMMU: Forcing DAC for PCI devices\n");
78
                        dmar_forcedac = 1;
79
                }
80
 
81
                str += strcspn(str, ",");
82
                while (*str == ',')
83
                        str++;
84
        }
85
        return 0;
86
}
87
__setup("intel_iommu=", intel_iommu_setup);
88
 
89
static struct kmem_cache *iommu_domain_cache;
90
static struct kmem_cache *iommu_devinfo_cache;
91
static struct kmem_cache *iommu_iova_cache;
92
 
93
static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94
{
95
        unsigned int flags;
96
        void *vaddr;
97
 
98
        /* trying to avoid low memory issues */
99
        flags = current->flags & PF_MEMALLOC;
100
        current->flags |= PF_MEMALLOC;
101
        vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102
        current->flags &= (~PF_MEMALLOC | flags);
103
        return vaddr;
104
}
105
 
106
 
107
static inline void *alloc_pgtable_page(void)
108
{
109
        unsigned int flags;
110
        void *vaddr;
111
 
112
        /* trying to avoid low memory issues */
113
        flags = current->flags & PF_MEMALLOC;
114
        current->flags |= PF_MEMALLOC;
115
        vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116
        current->flags &= (~PF_MEMALLOC | flags);
117
        return vaddr;
118
}
119
 
120
static inline void free_pgtable_page(void *vaddr)
121
{
122
        free_page((unsigned long)vaddr);
123
}
124
 
125
static inline void *alloc_domain_mem(void)
126
{
127
        return iommu_kmem_cache_alloc(iommu_domain_cache);
128
}
129
 
130
static inline void free_domain_mem(void *vaddr)
131
{
132
        kmem_cache_free(iommu_domain_cache, vaddr);
133
}
134
 
135
static inline void * alloc_devinfo_mem(void)
136
{
137
        return iommu_kmem_cache_alloc(iommu_devinfo_cache);
138
}
139
 
140
static inline void free_devinfo_mem(void *vaddr)
141
{
142
        kmem_cache_free(iommu_devinfo_cache, vaddr);
143
}
144
 
145
struct iova *alloc_iova_mem(void)
146
{
147
        return iommu_kmem_cache_alloc(iommu_iova_cache);
148
}
149
 
150
void free_iova_mem(struct iova *iova)
151
{
152
        kmem_cache_free(iommu_iova_cache, iova);
153
}
154
 
155
static inline void __iommu_flush_cache(
156
        struct intel_iommu *iommu, void *addr, int size)
157
{
158
        if (!ecap_coherent(iommu->ecap))
159
                clflush_cache_range(addr, size);
160
}
161
 
162
/* Gets context entry for a given bus and devfn */
163
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164
                u8 bus, u8 devfn)
165
{
166
        struct root_entry *root;
167
        struct context_entry *context;
168
        unsigned long phy_addr;
169
        unsigned long flags;
170
 
171
        spin_lock_irqsave(&iommu->lock, flags);
172
        root = &iommu->root_entry[bus];
173
        context = get_context_addr_from_root(root);
174
        if (!context) {
175
                context = (struct context_entry *)alloc_pgtable_page();
176
                if (!context) {
177
                        spin_unlock_irqrestore(&iommu->lock, flags);
178
                        return NULL;
179
                }
180
                __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181
                phy_addr = virt_to_phys((void *)context);
182
                set_root_value(root, phy_addr);
183
                set_root_present(root);
184
                __iommu_flush_cache(iommu, root, sizeof(*root));
185
        }
186
        spin_unlock_irqrestore(&iommu->lock, flags);
187
        return &context[devfn];
188
}
189
 
190
static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191
{
192
        struct root_entry *root;
193
        struct context_entry *context;
194
        int ret;
195
        unsigned long flags;
196
 
197
        spin_lock_irqsave(&iommu->lock, flags);
198
        root = &iommu->root_entry[bus];
199
        context = get_context_addr_from_root(root);
200
        if (!context) {
201
                ret = 0;
202
                goto out;
203
        }
204
        ret = context_present(context[devfn]);
205
out:
206
        spin_unlock_irqrestore(&iommu->lock, flags);
207
        return ret;
208
}
209
 
210
static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211
{
212
        struct root_entry *root;
213
        struct context_entry *context;
214
        unsigned long flags;
215
 
216
        spin_lock_irqsave(&iommu->lock, flags);
217
        root = &iommu->root_entry[bus];
218
        context = get_context_addr_from_root(root);
219
        if (context) {
220
                context_clear_entry(context[devfn]);
221
                __iommu_flush_cache(iommu, &context[devfn], \
222
                        sizeof(*context));
223
        }
224
        spin_unlock_irqrestore(&iommu->lock, flags);
225
}
226
 
227
static void free_context_table(struct intel_iommu *iommu)
228
{
229
        struct root_entry *root;
230
        int i;
231
        unsigned long flags;
232
        struct context_entry *context;
233
 
234
        spin_lock_irqsave(&iommu->lock, flags);
235
        if (!iommu->root_entry) {
236
                goto out;
237
        }
238
        for (i = 0; i < ROOT_ENTRY_NR; i++) {
239
                root = &iommu->root_entry[i];
240
                context = get_context_addr_from_root(root);
241
                if (context)
242
                        free_pgtable_page(context);
243
        }
244
        free_pgtable_page(iommu->root_entry);
245
        iommu->root_entry = NULL;
246
out:
247
        spin_unlock_irqrestore(&iommu->lock, flags);
248
}
249
 
250
/* page table handling */
251
#define LEVEL_STRIDE            (9)
252
#define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
253
 
254
static inline int agaw_to_level(int agaw)
255
{
256
        return agaw + 2;
257
}
258
 
259
static inline int agaw_to_width(int agaw)
260
{
261
        return 30 + agaw * LEVEL_STRIDE;
262
 
263
}
264
 
265
static inline int width_to_agaw(int width)
266
{
267
        return (width - 30) / LEVEL_STRIDE;
268
}
269
 
270
static inline unsigned int level_to_offset_bits(int level)
271
{
272
        return (12 + (level - 1) * LEVEL_STRIDE);
273
}
274
 
275
static inline int address_level_offset(u64 addr, int level)
276
{
277
        return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278
}
279
 
280
static inline u64 level_mask(int level)
281
{
282
        return ((u64)-1 << level_to_offset_bits(level));
283
}
284
 
285
static inline u64 level_size(int level)
286
{
287
        return ((u64)1 << level_to_offset_bits(level));
288
}
289
 
290
static inline u64 align_to_level(u64 addr, int level)
291
{
292
        return ((addr + level_size(level) - 1) & level_mask(level));
293
}
294
 
295
static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296
{
297
        int addr_width = agaw_to_width(domain->agaw);
298
        struct dma_pte *parent, *pte = NULL;
299
        int level = agaw_to_level(domain->agaw);
300
        int offset;
301
        unsigned long flags;
302
 
303
        BUG_ON(!domain->pgd);
304
 
305
        addr &= (((u64)1) << addr_width) - 1;
306
        parent = domain->pgd;
307
 
308
        spin_lock_irqsave(&domain->mapping_lock, flags);
309
        while (level > 0) {
310
                void *tmp_page;
311
 
312
                offset = address_level_offset(addr, level);
313
                pte = &parent[offset];
314
                if (level == 1)
315
                        break;
316
 
317
                if (!dma_pte_present(*pte)) {
318
                        tmp_page = alloc_pgtable_page();
319
 
320
                        if (!tmp_page) {
321
                                spin_unlock_irqrestore(&domain->mapping_lock,
322
                                        flags);
323
                                return NULL;
324
                        }
325
                        __iommu_flush_cache(domain->iommu, tmp_page,
326
                                        PAGE_SIZE_4K);
327
                        dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328
                        /*
329
                         * high level table always sets r/w, last level page
330
                         * table control read/write
331
                         */
332
                        dma_set_pte_readable(*pte);
333
                        dma_set_pte_writable(*pte);
334
                        __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335
                }
336
                parent = phys_to_virt(dma_pte_addr(*pte));
337
                level--;
338
        }
339
 
340
        spin_unlock_irqrestore(&domain->mapping_lock, flags);
341
        return pte;
342
}
343
 
344
/* return address's pte at specific level */
345
static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346
                int level)
347
{
348
        struct dma_pte *parent, *pte = NULL;
349
        int total = agaw_to_level(domain->agaw);
350
        int offset;
351
 
352
        parent = domain->pgd;
353
        while (level <= total) {
354
                offset = address_level_offset(addr, total);
355
                pte = &parent[offset];
356
                if (level == total)
357
                        return pte;
358
 
359
                if (!dma_pte_present(*pte))
360
                        break;
361
                parent = phys_to_virt(dma_pte_addr(*pte));
362
                total--;
363
        }
364
        return NULL;
365
}
366
 
367
/* clear one page's page table */
368
static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369
{
370
        struct dma_pte *pte = NULL;
371
 
372
        /* get last level pte */
373
        pte = dma_addr_level_pte(domain, addr, 1);
374
 
375
        if (pte) {
376
                dma_clear_pte(*pte);
377
                __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378
        }
379
}
380
 
381
/* clear last level pte, a tlb flush should be followed */
382
static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383
{
384
        int addr_width = agaw_to_width(domain->agaw);
385
 
386
        start &= (((u64)1) << addr_width) - 1;
387
        end &= (((u64)1) << addr_width) - 1;
388
        /* in case it's partial page */
389
        start = PAGE_ALIGN_4K(start);
390
        end &= PAGE_MASK_4K;
391
 
392
        /* we don't need lock here, nobody else touches the iova range */
393
        while (start < end) {
394
                dma_pte_clear_one(domain, start);
395
                start += PAGE_SIZE_4K;
396
        }
397
}
398
 
399
/* free page table pages. last level pte should already be cleared */
400
static void dma_pte_free_pagetable(struct dmar_domain *domain,
401
        u64 start, u64 end)
402
{
403
        int addr_width = agaw_to_width(domain->agaw);
404
        struct dma_pte *pte;
405
        int total = agaw_to_level(domain->agaw);
406
        int level;
407
        u64 tmp;
408
 
409
        start &= (((u64)1) << addr_width) - 1;
410
        end &= (((u64)1) << addr_width) - 1;
411
 
412
        /* we don't need lock here, nobody else touches the iova range */
413
        level = 2;
414
        while (level <= total) {
415
                tmp = align_to_level(start, level);
416
                if (tmp >= end || (tmp + level_size(level) > end))
417
                        return;
418
 
419
                while (tmp < end) {
420
                        pte = dma_addr_level_pte(domain, tmp, level);
421
                        if (pte) {
422
                                free_pgtable_page(
423
                                        phys_to_virt(dma_pte_addr(*pte)));
424
                                dma_clear_pte(*pte);
425
                                __iommu_flush_cache(domain->iommu,
426
                                                pte, sizeof(*pte));
427
                        }
428
                        tmp += level_size(level);
429
                }
430
                level++;
431
        }
432
        /* free pgd */
433
        if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434
                free_pgtable_page(domain->pgd);
435
                domain->pgd = NULL;
436
        }
437
}
438
 
439
/* iommu handling */
440
static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441
{
442
        struct root_entry *root;
443
        unsigned long flags;
444
 
445
        root = (struct root_entry *)alloc_pgtable_page();
446
        if (!root)
447
                return -ENOMEM;
448
 
449
        __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
 
451
        spin_lock_irqsave(&iommu->lock, flags);
452
        iommu->root_entry = root;
453
        spin_unlock_irqrestore(&iommu->lock, flags);
454
 
455
        return 0;
456
}
457
 
458
#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459
{\
460
        unsigned long start_time = jiffies;\
461
        while (1) {\
462
                sts = op (iommu->reg + offset);\
463
                if (cond)\
464
                        break;\
465
                if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466
                        panic("DMAR hardware is malfunctioning\n");\
467
                cpu_relax();\
468
        }\
469
}
470
 
471
static void iommu_set_root_entry(struct intel_iommu *iommu)
472
{
473
        void *addr;
474
        u32 cmd, sts;
475
        unsigned long flag;
476
 
477
        addr = iommu->root_entry;
478
 
479
        spin_lock_irqsave(&iommu->register_lock, flag);
480
        dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
 
482
        cmd = iommu->gcmd | DMA_GCMD_SRTP;
483
        writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
 
485
        /* Make sure hardware complete it */
486
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487
                readl, (sts & DMA_GSTS_RTPS), sts);
488
 
489
        spin_unlock_irqrestore(&iommu->register_lock, flag);
490
}
491
 
492
static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493
{
494
        u32 val;
495
        unsigned long flag;
496
 
497
        if (!cap_rwbf(iommu->cap))
498
                return;
499
        val = iommu->gcmd | DMA_GCMD_WBF;
500
 
501
        spin_lock_irqsave(&iommu->register_lock, flag);
502
        writel(val, iommu->reg + DMAR_GCMD_REG);
503
 
504
        /* Make sure hardware complete it */
505
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506
                        readl, (!(val & DMA_GSTS_WBFS)), val);
507
 
508
        spin_unlock_irqrestore(&iommu->register_lock, flag);
509
}
510
 
511
/* return value determine if we need a write buffer flush */
512
static int __iommu_flush_context(struct intel_iommu *iommu,
513
        u16 did, u16 source_id, u8 function_mask, u64 type,
514
        int non_present_entry_flush)
515
{
516
        u64 val = 0;
517
        unsigned long flag;
518
 
519
        /*
520
         * In the non-present entry flush case, if hardware doesn't cache
521
         * non-present entry we do nothing and if hardware cache non-present
522
         * entry, we flush entries of domain 0 (the domain id is used to cache
523
         * any non-present entries)
524
         */
525
        if (non_present_entry_flush) {
526
                if (!cap_caching_mode(iommu->cap))
527
                        return 1;
528
                else
529
                        did = 0;
530
        }
531
 
532
        switch (type) {
533
        case DMA_CCMD_GLOBAL_INVL:
534
                val = DMA_CCMD_GLOBAL_INVL;
535
                break;
536
        case DMA_CCMD_DOMAIN_INVL:
537
                val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538
                break;
539
        case DMA_CCMD_DEVICE_INVL:
540
                val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541
                        | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542
                break;
543
        default:
544
                BUG();
545
        }
546
        val |= DMA_CCMD_ICC;
547
 
548
        spin_lock_irqsave(&iommu->register_lock, flag);
549
        dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
 
551
        /* Make sure hardware complete it */
552
        IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553
                dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
 
555
        spin_unlock_irqrestore(&iommu->register_lock, flag);
556
 
557
        /* flush context entry will implictly flush write buffer */
558
        return 0;
559
}
560
 
561
static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562
        int non_present_entry_flush)
563
{
564
        return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565
                non_present_entry_flush);
566
}
567
 
568
static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569
        int non_present_entry_flush)
570
{
571
        return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572
                non_present_entry_flush);
573
}
574
 
575
static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576
        u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577
{
578
        return __iommu_flush_context(iommu, did, source_id, function_mask,
579
                DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580
}
581
 
582
/* return value determine if we need a write buffer flush */
583
static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584
        u64 addr, unsigned int size_order, u64 type,
585
        int non_present_entry_flush)
586
{
587
        int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588
        u64 val = 0, val_iva = 0;
589
        unsigned long flag;
590
 
591
        /*
592
         * In the non-present entry flush case, if hardware doesn't cache
593
         * non-present entry we do nothing and if hardware cache non-present
594
         * entry, we flush entries of domain 0 (the domain id is used to cache
595
         * any non-present entries)
596
         */
597
        if (non_present_entry_flush) {
598
                if (!cap_caching_mode(iommu->cap))
599
                        return 1;
600
                else
601
                        did = 0;
602
        }
603
 
604
        switch (type) {
605
        case DMA_TLB_GLOBAL_FLUSH:
606
                /* global flush doesn't need set IVA_REG */
607
                val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608
                break;
609
        case DMA_TLB_DSI_FLUSH:
610
                val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611
                break;
612
        case DMA_TLB_PSI_FLUSH:
613
                val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614
                /* Note: always flush non-leaf currently */
615
                val_iva = size_order | addr;
616
                break;
617
        default:
618
                BUG();
619
        }
620
        /* Note: set drain read/write */
621
#if 0
622
        /*
623
         * This is probably to be super secure.. Looks like we can
624
         * ignore it without any impact.
625
         */
626
        if (cap_read_drain(iommu->cap))
627
                val |= DMA_TLB_READ_DRAIN;
628
#endif
629
        if (cap_write_drain(iommu->cap))
630
                val |= DMA_TLB_WRITE_DRAIN;
631
 
632
        spin_lock_irqsave(&iommu->register_lock, flag);
633
        /* Note: Only uses first TLB reg currently */
634
        if (val_iva)
635
                dmar_writeq(iommu->reg + tlb_offset, val_iva);
636
        dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
 
638
        /* Make sure hardware complete it */
639
        IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640
                dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
 
642
        spin_unlock_irqrestore(&iommu->register_lock, flag);
643
 
644
        /* check IOTLB invalidation granularity */
645
        if (DMA_TLB_IAIG(val) == 0)
646
                printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647
        if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648
                pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649
                        DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650
        /* flush context entry will implictly flush write buffer */
651
        return 0;
652
}
653
 
654
static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655
        int non_present_entry_flush)
656
{
657
        return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658
                non_present_entry_flush);
659
}
660
 
661
static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662
        int non_present_entry_flush)
663
{
664
        return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665
                non_present_entry_flush);
666
}
667
 
668
static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
669
        u64 addr, unsigned int pages, int non_present_entry_flush)
670
{
671
        unsigned int mask;
672
 
673
        BUG_ON(addr & (~PAGE_MASK_4K));
674
        BUG_ON(pages == 0);
675
 
676
        /* Fallback to domain selective flush if no PSI support */
677
        if (!cap_pgsel_inv(iommu->cap))
678
                return iommu_flush_iotlb_dsi(iommu, did,
679
                        non_present_entry_flush);
680
 
681
        /*
682
         * PSI requires page size to be 2 ^ x, and the base address is naturally
683
         * aligned to the size
684
         */
685
        mask = ilog2(__roundup_pow_of_two(pages));
686
        /* Fallback to domain selective flush if size is too big */
687
        if (mask > cap_max_amask_val(iommu->cap))
688
                return iommu_flush_iotlb_dsi(iommu, did,
689
                        non_present_entry_flush);
690
 
691
        return __iommu_flush_iotlb(iommu, did, addr, mask,
692
                DMA_TLB_PSI_FLUSH, non_present_entry_flush);
693
}
694
 
695
static int iommu_enable_translation(struct intel_iommu *iommu)
696
{
697
        u32 sts;
698
        unsigned long flags;
699
 
700
        spin_lock_irqsave(&iommu->register_lock, flags);
701
        writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
702
 
703
        /* Make sure hardware complete it */
704
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
705
                readl, (sts & DMA_GSTS_TES), sts);
706
 
707
        iommu->gcmd |= DMA_GCMD_TE;
708
        spin_unlock_irqrestore(&iommu->register_lock, flags);
709
        return 0;
710
}
711
 
712
static int iommu_disable_translation(struct intel_iommu *iommu)
713
{
714
        u32 sts;
715
        unsigned long flag;
716
 
717
        spin_lock_irqsave(&iommu->register_lock, flag);
718
        iommu->gcmd &= ~DMA_GCMD_TE;
719
        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
720
 
721
        /* Make sure hardware complete it */
722
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723
                readl, (!(sts & DMA_GSTS_TES)), sts);
724
 
725
        spin_unlock_irqrestore(&iommu->register_lock, flag);
726
        return 0;
727
}
728
 
729
/* iommu interrupt handling. Most stuff are MSI-like. */
730
 
731
static char *fault_reason_strings[] =
732
{
733
        "Software",
734
        "Present bit in root entry is clear",
735
        "Present bit in context entry is clear",
736
        "Invalid context entry",
737
        "Access beyond MGAW",
738
        "PTE Write access is not set",
739
        "PTE Read access is not set",
740
        "Next page table ptr is invalid",
741
        "Root table address invalid",
742
        "Context table ptr is invalid",
743
        "non-zero reserved fields in RTP",
744
        "non-zero reserved fields in CTP",
745
        "non-zero reserved fields in PTE",
746
        "Unknown"
747
};
748
#define MAX_FAULT_REASON_IDX    ARRAY_SIZE(fault_reason_strings) - 1
749
 
750
char *dmar_get_fault_reason(u8 fault_reason)
751
{
752
        if (fault_reason >= MAX_FAULT_REASON_IDX)
753
                return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
754
        else
755
                return fault_reason_strings[fault_reason];
756
}
757
 
758
void dmar_msi_unmask(unsigned int irq)
759
{
760
        struct intel_iommu *iommu = get_irq_data(irq);
761
        unsigned long flag;
762
 
763
        /* unmask it */
764
        spin_lock_irqsave(&iommu->register_lock, flag);
765
        writel(0, iommu->reg + DMAR_FECTL_REG);
766
        /* Read a reg to force flush the post write */
767
        readl(iommu->reg + DMAR_FECTL_REG);
768
        spin_unlock_irqrestore(&iommu->register_lock, flag);
769
}
770
 
771
void dmar_msi_mask(unsigned int irq)
772
{
773
        unsigned long flag;
774
        struct intel_iommu *iommu = get_irq_data(irq);
775
 
776
        /* mask it */
777
        spin_lock_irqsave(&iommu->register_lock, flag);
778
        writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
779
        /* Read a reg to force flush the post write */
780
        readl(iommu->reg + DMAR_FECTL_REG);
781
        spin_unlock_irqrestore(&iommu->register_lock, flag);
782
}
783
 
784
void dmar_msi_write(int irq, struct msi_msg *msg)
785
{
786
        struct intel_iommu *iommu = get_irq_data(irq);
787
        unsigned long flag;
788
 
789
        spin_lock_irqsave(&iommu->register_lock, flag);
790
        writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
791
        writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
792
        writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
793
        spin_unlock_irqrestore(&iommu->register_lock, flag);
794
}
795
 
796
void dmar_msi_read(int irq, struct msi_msg *msg)
797
{
798
        struct intel_iommu *iommu = get_irq_data(irq);
799
        unsigned long flag;
800
 
801
        spin_lock_irqsave(&iommu->register_lock, flag);
802
        msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
803
        msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
804
        msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
805
        spin_unlock_irqrestore(&iommu->register_lock, flag);
806
}
807
 
808
static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
809
                u8 fault_reason, u16 source_id, u64 addr)
810
{
811
        char *reason;
812
 
813
        reason = dmar_get_fault_reason(fault_reason);
814
 
815
        printk(KERN_ERR
816
                "DMAR:[%s] Request device [%02x:%02x.%d] "
817
                "fault addr %llx \n"
818
                "DMAR:[fault reason %02d] %s\n",
819
                (type ? "DMA Read" : "DMA Write"),
820
                (source_id >> 8), PCI_SLOT(source_id & 0xFF),
821
                PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
822
        return 0;
823
}
824
 
825
#define PRIMARY_FAULT_REG_LEN (16)
826
static irqreturn_t iommu_page_fault(int irq, void *dev_id)
827
{
828
        struct intel_iommu *iommu = dev_id;
829
        int reg, fault_index;
830
        u32 fault_status;
831
        unsigned long flag;
832
 
833
        spin_lock_irqsave(&iommu->register_lock, flag);
834
        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
835
 
836
        /* TBD: ignore advanced fault log currently */
837
        if (!(fault_status & DMA_FSTS_PPF))
838
                goto clear_overflow;
839
 
840
        fault_index = dma_fsts_fault_record_index(fault_status);
841
        reg = cap_fault_reg_offset(iommu->cap);
842
        while (1) {
843
                u8 fault_reason;
844
                u16 source_id;
845
                u64 guest_addr;
846
                int type;
847
                u32 data;
848
 
849
                /* highest 32 bits */
850
                data = readl(iommu->reg + reg +
851
                                fault_index * PRIMARY_FAULT_REG_LEN + 12);
852
                if (!(data & DMA_FRCD_F))
853
                        break;
854
 
855
                fault_reason = dma_frcd_fault_reason(data);
856
                type = dma_frcd_type(data);
857
 
858
                data = readl(iommu->reg + reg +
859
                                fault_index * PRIMARY_FAULT_REG_LEN + 8);
860
                source_id = dma_frcd_source_id(data);
861
 
862
                guest_addr = dmar_readq(iommu->reg + reg +
863
                                fault_index * PRIMARY_FAULT_REG_LEN);
864
                guest_addr = dma_frcd_page_addr(guest_addr);
865
                /* clear the fault */
866
                writel(DMA_FRCD_F, iommu->reg + reg +
867
                        fault_index * PRIMARY_FAULT_REG_LEN + 12);
868
 
869
                spin_unlock_irqrestore(&iommu->register_lock, flag);
870
 
871
                iommu_page_fault_do_one(iommu, type, fault_reason,
872
                                source_id, guest_addr);
873
 
874
                fault_index++;
875
                if (fault_index > cap_num_fault_regs(iommu->cap))
876
                        fault_index = 0;
877
                spin_lock_irqsave(&iommu->register_lock, flag);
878
        }
879
clear_overflow:
880
        /* clear primary fault overflow */
881
        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
882
        if (fault_status & DMA_FSTS_PFO)
883
                writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
884
 
885
        spin_unlock_irqrestore(&iommu->register_lock, flag);
886
        return IRQ_HANDLED;
887
}
888
 
889
int dmar_set_interrupt(struct intel_iommu *iommu)
890
{
891
        int irq, ret;
892
 
893
        irq = create_irq();
894
        if (!irq) {
895
                printk(KERN_ERR "IOMMU: no free vectors\n");
896
                return -EINVAL;
897
        }
898
 
899
        set_irq_data(irq, iommu);
900
        iommu->irq = irq;
901
 
902
        ret = arch_setup_dmar_msi(irq);
903
        if (ret) {
904
                set_irq_data(irq, NULL);
905
                iommu->irq = 0;
906
                destroy_irq(irq);
907
                return 0;
908
        }
909
 
910
        /* Force fault register is cleared */
911
        iommu_page_fault(irq, iommu);
912
 
913
        ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
914
        if (ret)
915
                printk(KERN_ERR "IOMMU: can't request irq\n");
916
        return ret;
917
}
918
 
919
static int iommu_init_domains(struct intel_iommu *iommu)
920
{
921
        unsigned long ndomains;
922
        unsigned long nlongs;
923
 
924
        ndomains = cap_ndoms(iommu->cap);
925
        pr_debug("Number of Domains supportd <%ld>\n", ndomains);
926
        nlongs = BITS_TO_LONGS(ndomains);
927
 
928
        /* TBD: there might be 64K domains,
929
         * consider other allocation for future chip
930
         */
931
        iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
932
        if (!iommu->domain_ids) {
933
                printk(KERN_ERR "Allocating domain id array failed\n");
934
                return -ENOMEM;
935
        }
936
        iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
937
                        GFP_KERNEL);
938
        if (!iommu->domains) {
939
                printk(KERN_ERR "Allocating domain array failed\n");
940
                kfree(iommu->domain_ids);
941
                return -ENOMEM;
942
        }
943
 
944
        /*
945
         * if Caching mode is set, then invalid translations are tagged
946
         * with domainid 0. Hence we need to pre-allocate it.
947
         */
948
        if (cap_caching_mode(iommu->cap))
949
                set_bit(0, iommu->domain_ids);
950
        return 0;
951
}
952
 
953
static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
954
{
955
        struct intel_iommu *iommu;
956
        int ret;
957
        int map_size;
958
        u32 ver;
959
 
960
        iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
961
        if (!iommu)
962
                return NULL;
963
        iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
964
        if (!iommu->reg) {
965
                printk(KERN_ERR "IOMMU: can't map the region\n");
966
                goto error;
967
        }
968
        iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
969
        iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
970
 
971
        /* the registers might be more than one page */
972
        map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
973
                cap_max_fault_reg_offset(iommu->cap));
974
        map_size = PAGE_ALIGN_4K(map_size);
975
        if (map_size > PAGE_SIZE_4K) {
976
                iounmap(iommu->reg);
977
                iommu->reg = ioremap(drhd->reg_base_addr, map_size);
978
                if (!iommu->reg) {
979
                        printk(KERN_ERR "IOMMU: can't map the region\n");
980
                        goto error;
981
                }
982
        }
983
 
984
        ver = readl(iommu->reg + DMAR_VER_REG);
985
        pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
986
                drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
987
                iommu->cap, iommu->ecap);
988
        ret = iommu_init_domains(iommu);
989
        if (ret)
990
                goto error_unmap;
991
        spin_lock_init(&iommu->lock);
992
        spin_lock_init(&iommu->register_lock);
993
 
994
        drhd->iommu = iommu;
995
        return iommu;
996
error_unmap:
997
        iounmap(iommu->reg);
998
error:
999
        kfree(iommu);
1000
        return NULL;
1001
}
1002
 
1003
static void domain_exit(struct dmar_domain *domain);
1004
static void free_iommu(struct intel_iommu *iommu)
1005
{
1006
        struct dmar_domain *domain;
1007
        int i;
1008
 
1009
        if (!iommu)
1010
                return;
1011
 
1012
        i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1013
        for (; i < cap_ndoms(iommu->cap); ) {
1014
                domain = iommu->domains[i];
1015
                clear_bit(i, iommu->domain_ids);
1016
                domain_exit(domain);
1017
                i = find_next_bit(iommu->domain_ids,
1018
                        cap_ndoms(iommu->cap), i+1);
1019
        }
1020
 
1021
        if (iommu->gcmd & DMA_GCMD_TE)
1022
                iommu_disable_translation(iommu);
1023
 
1024
        if (iommu->irq) {
1025
                set_irq_data(iommu->irq, NULL);
1026
                /* This will mask the irq */
1027
                free_irq(iommu->irq, iommu);
1028
                destroy_irq(iommu->irq);
1029
        }
1030
 
1031
        kfree(iommu->domains);
1032
        kfree(iommu->domain_ids);
1033
 
1034
        /* free context mapping */
1035
        free_context_table(iommu);
1036
 
1037
        if (iommu->reg)
1038
                iounmap(iommu->reg);
1039
        kfree(iommu);
1040
}
1041
 
1042
static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1043
{
1044
        unsigned long num;
1045
        unsigned long ndomains;
1046
        struct dmar_domain *domain;
1047
        unsigned long flags;
1048
 
1049
        domain = alloc_domain_mem();
1050
        if (!domain)
1051
                return NULL;
1052
 
1053
        ndomains = cap_ndoms(iommu->cap);
1054
 
1055
        spin_lock_irqsave(&iommu->lock, flags);
1056
        num = find_first_zero_bit(iommu->domain_ids, ndomains);
1057
        if (num >= ndomains) {
1058
                spin_unlock_irqrestore(&iommu->lock, flags);
1059
                free_domain_mem(domain);
1060
                printk(KERN_ERR "IOMMU: no free domain ids\n");
1061
                return NULL;
1062
        }
1063
 
1064
        set_bit(num, iommu->domain_ids);
1065
        domain->id = num;
1066
        domain->iommu = iommu;
1067
        iommu->domains[num] = domain;
1068
        spin_unlock_irqrestore(&iommu->lock, flags);
1069
 
1070
        return domain;
1071
}
1072
 
1073
static void iommu_free_domain(struct dmar_domain *domain)
1074
{
1075
        unsigned long flags;
1076
 
1077
        spin_lock_irqsave(&domain->iommu->lock, flags);
1078
        clear_bit(domain->id, domain->iommu->domain_ids);
1079
        spin_unlock_irqrestore(&domain->iommu->lock, flags);
1080
}
1081
 
1082
static struct iova_domain reserved_iova_list;
1083
 
1084
static void dmar_init_reserved_ranges(void)
1085
{
1086
        struct pci_dev *pdev = NULL;
1087
        struct iova *iova;
1088
        int i;
1089
        u64 addr, size;
1090
 
1091
        init_iova_domain(&reserved_iova_list);
1092
 
1093
        /* IOAPIC ranges shouldn't be accessed by DMA */
1094
        iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1095
                IOVA_PFN(IOAPIC_RANGE_END));
1096
        if (!iova)
1097
                printk(KERN_ERR "Reserve IOAPIC range failed\n");
1098
 
1099
        /* Reserve all PCI MMIO to avoid peer-to-peer access */
1100
        for_each_pci_dev(pdev) {
1101
                struct resource *r;
1102
 
1103
                for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1104
                        r = &pdev->resource[i];
1105
                        if (!r->flags || !(r->flags & IORESOURCE_MEM))
1106
                                continue;
1107
                        addr = r->start;
1108
                        addr &= PAGE_MASK_4K;
1109
                        size = r->end - addr;
1110
                        size = PAGE_ALIGN_4K(size);
1111
                        iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1112
                                IOVA_PFN(size + addr) - 1);
1113
                        if (!iova)
1114
                                printk(KERN_ERR "Reserve iova failed\n");
1115
                }
1116
        }
1117
 
1118
}
1119
 
1120
static void domain_reserve_special_ranges(struct dmar_domain *domain)
1121
{
1122
        copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1123
}
1124
 
1125
static inline int guestwidth_to_adjustwidth(int gaw)
1126
{
1127
        int agaw;
1128
        int r = (gaw - 12) % 9;
1129
 
1130
        if (r == 0)
1131
                agaw = gaw;
1132
        else
1133
                agaw = gaw + 9 - r;
1134
        if (agaw > 64)
1135
                agaw = 64;
1136
        return agaw;
1137
}
1138
 
1139
static int domain_init(struct dmar_domain *domain, int guest_width)
1140
{
1141
        struct intel_iommu *iommu;
1142
        int adjust_width, agaw;
1143
        unsigned long sagaw;
1144
 
1145
        init_iova_domain(&domain->iovad);
1146
        spin_lock_init(&domain->mapping_lock);
1147
 
1148
        domain_reserve_special_ranges(domain);
1149
 
1150
        /* calculate AGAW */
1151
        iommu = domain->iommu;
1152
        if (guest_width > cap_mgaw(iommu->cap))
1153
                guest_width = cap_mgaw(iommu->cap);
1154
        domain->gaw = guest_width;
1155
        adjust_width = guestwidth_to_adjustwidth(guest_width);
1156
        agaw = width_to_agaw(adjust_width);
1157
        sagaw = cap_sagaw(iommu->cap);
1158
        if (!test_bit(agaw, &sagaw)) {
1159
                /* hardware doesn't support it, choose a bigger one */
1160
                pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1161
                agaw = find_next_bit(&sagaw, 5, agaw);
1162
                if (agaw >= 5)
1163
                        return -ENODEV;
1164
        }
1165
        domain->agaw = agaw;
1166
        INIT_LIST_HEAD(&domain->devices);
1167
 
1168
        /* always allocate the top pgd */
1169
        domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1170
        if (!domain->pgd)
1171
                return -ENOMEM;
1172
        __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1173
        return 0;
1174
}
1175
 
1176
static void domain_exit(struct dmar_domain *domain)
1177
{
1178
        u64 end;
1179
 
1180
        /* Domain 0 is reserved, so dont process it */
1181
        if (!domain)
1182
                return;
1183
 
1184
        domain_remove_dev_info(domain);
1185
        /* destroy iovas */
1186
        put_iova_domain(&domain->iovad);
1187
        end = DOMAIN_MAX_ADDR(domain->gaw);
1188
        end = end & (~PAGE_MASK_4K);
1189
 
1190
        /* clear ptes */
1191
        dma_pte_clear_range(domain, 0, end);
1192
 
1193
        /* free page tables */
1194
        dma_pte_free_pagetable(domain, 0, end);
1195
 
1196
        iommu_free_domain(domain);
1197
        free_domain_mem(domain);
1198
}
1199
 
1200
static int domain_context_mapping_one(struct dmar_domain *domain,
1201
                u8 bus, u8 devfn)
1202
{
1203
        struct context_entry *context;
1204
        struct intel_iommu *iommu = domain->iommu;
1205
        unsigned long flags;
1206
 
1207
        pr_debug("Set context mapping for %02x:%02x.%d\n",
1208
                bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1209
        BUG_ON(!domain->pgd);
1210
        context = device_to_context_entry(iommu, bus, devfn);
1211
        if (!context)
1212
                return -ENOMEM;
1213
        spin_lock_irqsave(&iommu->lock, flags);
1214
        if (context_present(*context)) {
1215
                spin_unlock_irqrestore(&iommu->lock, flags);
1216
                return 0;
1217
        }
1218
 
1219
        context_set_domain_id(*context, domain->id);
1220
        context_set_address_width(*context, domain->agaw);
1221
        context_set_address_root(*context, virt_to_phys(domain->pgd));
1222
        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1223
        context_set_fault_enable(*context);
1224
        context_set_present(*context);
1225
        __iommu_flush_cache(iommu, context, sizeof(*context));
1226
 
1227
        /* it's a non-present to present mapping */
1228
        if (iommu_flush_context_device(iommu, domain->id,
1229
                        (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1230
                iommu_flush_write_buffer(iommu);
1231
        else
1232
                iommu_flush_iotlb_dsi(iommu, 0, 0);
1233
        spin_unlock_irqrestore(&iommu->lock, flags);
1234
        return 0;
1235
}
1236
 
1237
static int
1238
domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1239
{
1240
        int ret;
1241
        struct pci_dev *tmp, *parent;
1242
 
1243
        ret = domain_context_mapping_one(domain, pdev->bus->number,
1244
                pdev->devfn);
1245
        if (ret)
1246
                return ret;
1247
 
1248
        /* dependent device mapping */
1249
        tmp = pci_find_upstream_pcie_bridge(pdev);
1250
        if (!tmp)
1251
                return 0;
1252
        /* Secondary interface's bus number and devfn 0 */
1253
        parent = pdev->bus->self;
1254
        while (parent != tmp) {
1255
                ret = domain_context_mapping_one(domain, parent->bus->number,
1256
                        parent->devfn);
1257
                if (ret)
1258
                        return ret;
1259
                parent = parent->bus->self;
1260
        }
1261
        if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1262
                return domain_context_mapping_one(domain,
1263
                        tmp->subordinate->number, 0);
1264
        else /* this is a legacy PCI bridge */
1265
                return domain_context_mapping_one(domain,
1266
                        tmp->bus->number, tmp->devfn);
1267
}
1268
 
1269
static int domain_context_mapped(struct dmar_domain *domain,
1270
        struct pci_dev *pdev)
1271
{
1272
        int ret;
1273
        struct pci_dev *tmp, *parent;
1274
 
1275
        ret = device_context_mapped(domain->iommu,
1276
                pdev->bus->number, pdev->devfn);
1277
        if (!ret)
1278
                return ret;
1279
        /* dependent device mapping */
1280
        tmp = pci_find_upstream_pcie_bridge(pdev);
1281
        if (!tmp)
1282
                return ret;
1283
        /* Secondary interface's bus number and devfn 0 */
1284
        parent = pdev->bus->self;
1285
        while (parent != tmp) {
1286
                ret = device_context_mapped(domain->iommu, parent->bus->number,
1287
                        parent->devfn);
1288
                if (!ret)
1289
                        return ret;
1290
                parent = parent->bus->self;
1291
        }
1292
        if (tmp->is_pcie)
1293
                return device_context_mapped(domain->iommu,
1294
                        tmp->subordinate->number, 0);
1295
        else
1296
                return device_context_mapped(domain->iommu,
1297
                        tmp->bus->number, tmp->devfn);
1298
}
1299
 
1300
static int
1301
domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1302
                        u64 hpa, size_t size, int prot)
1303
{
1304
        u64 start_pfn, end_pfn;
1305
        struct dma_pte *pte;
1306
        int index;
1307
 
1308
        if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1309
                return -EINVAL;
1310
        iova &= PAGE_MASK_4K;
1311
        start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1312
        end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1313
        index = 0;
1314
        while (start_pfn < end_pfn) {
1315
                pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1316
                if (!pte)
1317
                        return -ENOMEM;
1318
                /* We don't need lock here, nobody else
1319
                 * touches the iova range
1320
                 */
1321
                BUG_ON(dma_pte_addr(*pte));
1322
                dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1323
                dma_set_pte_prot(*pte, prot);
1324
                __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1325
                start_pfn++;
1326
                index++;
1327
        }
1328
        return 0;
1329
}
1330
 
1331
static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1332
{
1333
        clear_context_table(domain->iommu, bus, devfn);
1334
        iommu_flush_context_global(domain->iommu, 0);
1335
        iommu_flush_iotlb_global(domain->iommu, 0);
1336
}
1337
 
1338
static void domain_remove_dev_info(struct dmar_domain *domain)
1339
{
1340
        struct device_domain_info *info;
1341
        unsigned long flags;
1342
 
1343
        spin_lock_irqsave(&device_domain_lock, flags);
1344
        while (!list_empty(&domain->devices)) {
1345
                info = list_entry(domain->devices.next,
1346
                        struct device_domain_info, link);
1347
                list_del(&info->link);
1348
                list_del(&info->global);
1349
                if (info->dev)
1350
                        info->dev->dev.archdata.iommu = NULL;
1351
                spin_unlock_irqrestore(&device_domain_lock, flags);
1352
 
1353
                detach_domain_for_dev(info->domain, info->bus, info->devfn);
1354
                free_devinfo_mem(info);
1355
 
1356
                spin_lock_irqsave(&device_domain_lock, flags);
1357
        }
1358
        spin_unlock_irqrestore(&device_domain_lock, flags);
1359
}
1360
 
1361
/*
1362
 * find_domain
1363
 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1364
 */
1365
struct dmar_domain *
1366
find_domain(struct pci_dev *pdev)
1367
{
1368
        struct device_domain_info *info;
1369
 
1370
        /* No lock here, assumes no domain exit in normal case */
1371
        info = pdev->dev.archdata.iommu;
1372
        if (info)
1373
                return info->domain;
1374
        return NULL;
1375
}
1376
 
1377
static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1378
     struct pci_dev *dev)
1379
{
1380
        int index;
1381
 
1382
        while (dev) {
1383
                for (index = 0; index < cnt; index ++)
1384
                        if (dev == devices[index])
1385
                                return 1;
1386
 
1387
                /* Check our parent */
1388
                dev = dev->bus->self;
1389
        }
1390
 
1391
        return 0;
1392
}
1393
 
1394
static struct dmar_drhd_unit *
1395
dmar_find_matched_drhd_unit(struct pci_dev *dev)
1396
{
1397
        struct dmar_drhd_unit *drhd = NULL;
1398
 
1399
        list_for_each_entry(drhd, &dmar_drhd_units, list) {
1400
                if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1401
                                                drhd->devices_cnt, dev))
1402
                        return drhd;
1403
        }
1404
 
1405
        return NULL;
1406
}
1407
 
1408
/* domain is initialized */
1409
static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1410
{
1411
        struct dmar_domain *domain, *found = NULL;
1412
        struct intel_iommu *iommu;
1413
        struct dmar_drhd_unit *drhd;
1414
        struct device_domain_info *info, *tmp;
1415
        struct pci_dev *dev_tmp;
1416
        unsigned long flags;
1417
        int bus = 0, devfn = 0;
1418
 
1419
        domain = find_domain(pdev);
1420
        if (domain)
1421
                return domain;
1422
 
1423
        dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1424
        if (dev_tmp) {
1425
                if (dev_tmp->is_pcie) {
1426
                        bus = dev_tmp->subordinate->number;
1427
                        devfn = 0;
1428
                } else {
1429
                        bus = dev_tmp->bus->number;
1430
                        devfn = dev_tmp->devfn;
1431
                }
1432
                spin_lock_irqsave(&device_domain_lock, flags);
1433
                list_for_each_entry(info, &device_domain_list, global) {
1434
                        if (info->bus == bus && info->devfn == devfn) {
1435
                                found = info->domain;
1436
                                break;
1437
                        }
1438
                }
1439
                spin_unlock_irqrestore(&device_domain_lock, flags);
1440
                /* pcie-pci bridge already has a domain, uses it */
1441
                if (found) {
1442
                        domain = found;
1443
                        goto found_domain;
1444
                }
1445
        }
1446
 
1447
        /* Allocate new domain for the device */
1448
        drhd = dmar_find_matched_drhd_unit(pdev);
1449
        if (!drhd) {
1450
                printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1451
                        pci_name(pdev));
1452
                return NULL;
1453
        }
1454
        iommu = drhd->iommu;
1455
 
1456
        domain = iommu_alloc_domain(iommu);
1457
        if (!domain)
1458
                goto error;
1459
 
1460
        if (domain_init(domain, gaw)) {
1461
                domain_exit(domain);
1462
                goto error;
1463
        }
1464
 
1465
        /* register pcie-to-pci device */
1466
        if (dev_tmp) {
1467
                info = alloc_devinfo_mem();
1468
                if (!info) {
1469
                        domain_exit(domain);
1470
                        goto error;
1471
                }
1472
                info->bus = bus;
1473
                info->devfn = devfn;
1474
                info->dev = NULL;
1475
                info->domain = domain;
1476
                /* This domain is shared by devices under p2p bridge */
1477
                domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1478
 
1479
                /* pcie-to-pci bridge already has a domain, uses it */
1480
                found = NULL;
1481
                spin_lock_irqsave(&device_domain_lock, flags);
1482
                list_for_each_entry(tmp, &device_domain_list, global) {
1483
                        if (tmp->bus == bus && tmp->devfn == devfn) {
1484
                                found = tmp->domain;
1485
                                break;
1486
                        }
1487
                }
1488
                if (found) {
1489
                        free_devinfo_mem(info);
1490
                        domain_exit(domain);
1491
                        domain = found;
1492
                } else {
1493
                        list_add(&info->link, &domain->devices);
1494
                        list_add(&info->global, &device_domain_list);
1495
                }
1496
                spin_unlock_irqrestore(&device_domain_lock, flags);
1497
        }
1498
 
1499
found_domain:
1500
        info = alloc_devinfo_mem();
1501
        if (!info)
1502
                goto error;
1503
        info->bus = pdev->bus->number;
1504
        info->devfn = pdev->devfn;
1505
        info->dev = pdev;
1506
        info->domain = domain;
1507
        spin_lock_irqsave(&device_domain_lock, flags);
1508
        /* somebody is fast */
1509
        found = find_domain(pdev);
1510
        if (found != NULL) {
1511
                spin_unlock_irqrestore(&device_domain_lock, flags);
1512
                if (found != domain) {
1513
                        domain_exit(domain);
1514
                        domain = found;
1515
                }
1516
                free_devinfo_mem(info);
1517
                return domain;
1518
        }
1519
        list_add(&info->link, &domain->devices);
1520
        list_add(&info->global, &device_domain_list);
1521
        pdev->dev.archdata.iommu = info;
1522
        spin_unlock_irqrestore(&device_domain_lock, flags);
1523
        return domain;
1524
error:
1525
        /* recheck it here, maybe others set it */
1526
        return find_domain(pdev);
1527
}
1528
 
1529
static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1530
{
1531
        struct dmar_domain *domain;
1532
        unsigned long size;
1533
        u64 base;
1534
        int ret;
1535
 
1536
        printk(KERN_INFO
1537
                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1538
                pci_name(pdev), start, end);
1539
        /* page table init */
1540
        domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1541
        if (!domain)
1542
                return -ENOMEM;
1543
 
1544
        /* The address might not be aligned */
1545
        base = start & PAGE_MASK_4K;
1546
        size = end - base;
1547
        size = PAGE_ALIGN_4K(size);
1548
        if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1549
                        IOVA_PFN(base + size) - 1)) {
1550
                printk(KERN_ERR "IOMMU: reserve iova failed\n");
1551
                ret = -ENOMEM;
1552
                goto error;
1553
        }
1554
 
1555
        pr_debug("Mapping reserved region %lx@%llx for %s\n",
1556
                size, base, pci_name(pdev));
1557
        /*
1558
         * RMRR range might have overlap with physical memory range,
1559
         * clear it first
1560
         */
1561
        dma_pte_clear_range(domain, base, base + size);
1562
 
1563
        ret = domain_page_mapping(domain, base, base, size,
1564
                DMA_PTE_READ|DMA_PTE_WRITE);
1565
        if (ret)
1566
                goto error;
1567
 
1568
        /* context entry init */
1569
        ret = domain_context_mapping(domain, pdev);
1570
        if (!ret)
1571
                return 0;
1572
error:
1573
        domain_exit(domain);
1574
        return ret;
1575
 
1576
}
1577
 
1578
static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1579
        struct pci_dev *pdev)
1580
{
1581
        if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1582
                return 0;
1583
        return iommu_prepare_identity_map(pdev, rmrr->base_address,
1584
                rmrr->end_address + 1);
1585
}
1586
 
1587
#ifdef CONFIG_DMAR_GFX_WA
1588
extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1589
static void __init iommu_prepare_gfx_mapping(void)
1590
{
1591
        struct pci_dev *pdev = NULL;
1592
        u64 base, size;
1593
        int slot;
1594
        int ret;
1595
 
1596
        for_each_pci_dev(pdev) {
1597
                if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1598
                                !IS_GFX_DEVICE(pdev))
1599
                        continue;
1600
                printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1601
                        pci_name(pdev));
1602
                slot = arch_get_ram_range(0, &base, &size);
1603
                while (slot >= 0) {
1604
                        ret = iommu_prepare_identity_map(pdev,
1605
                                        base, base + size);
1606
                        if (ret)
1607
                                goto error;
1608
                        slot = arch_get_ram_range(slot, &base, &size);
1609
                }
1610
                continue;
1611
error:
1612
                printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1613
        }
1614
}
1615
#endif
1616
 
1617
#ifdef CONFIG_DMAR_FLOPPY_WA
1618
static inline void iommu_prepare_isa(void)
1619
{
1620
        struct pci_dev *pdev;
1621
        int ret;
1622
 
1623
        pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1624
        if (!pdev)
1625
                return;
1626
 
1627
        printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1628
        ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1629
 
1630
        if (ret)
1631
                printk("IOMMU: Failed to create 0-64M identity map, "
1632
                        "floppy might not work\n");
1633
 
1634
}
1635
#else
1636
static inline void iommu_prepare_isa(void)
1637
{
1638
        return;
1639
}
1640
#endif /* !CONFIG_DMAR_FLPY_WA */
1641
 
1642
int __init init_dmars(void)
1643
{
1644
        struct dmar_drhd_unit *drhd;
1645
        struct dmar_rmrr_unit *rmrr;
1646
        struct pci_dev *pdev;
1647
        struct intel_iommu *iommu;
1648
        int ret, unit = 0;
1649
 
1650
        /*
1651
         * for each drhd
1652
         *    allocate root
1653
         *    initialize and program root entry to not present
1654
         * endfor
1655
         */
1656
        for_each_drhd_unit(drhd) {
1657
                if (drhd->ignored)
1658
                        continue;
1659
                iommu = alloc_iommu(drhd);
1660
                if (!iommu) {
1661
                        ret = -ENOMEM;
1662
                        goto error;
1663
                }
1664
 
1665
                /*
1666
                 * TBD:
1667
                 * we could share the same root & context tables
1668
                 * amoung all IOMMU's. Need to Split it later.
1669
                 */
1670
                ret = iommu_alloc_root_entry(iommu);
1671
                if (ret) {
1672
                        printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1673
                        goto error;
1674
                }
1675
        }
1676
 
1677
        /*
1678
         * For each rmrr
1679
         *   for each dev attached to rmrr
1680
         *   do
1681
         *     locate drhd for dev, alloc domain for dev
1682
         *     allocate free domain
1683
         *     allocate page table entries for rmrr
1684
         *     if context not allocated for bus
1685
         *           allocate and init context
1686
         *           set present in root table for this bus
1687
         *     init context with domain, translation etc
1688
         *    endfor
1689
         * endfor
1690
         */
1691
        for_each_rmrr_units(rmrr) {
1692
                int i;
1693
                for (i = 0; i < rmrr->devices_cnt; i++) {
1694
                        pdev = rmrr->devices[i];
1695
                        /* some BIOS lists non-exist devices in DMAR table */
1696
                        if (!pdev)
1697
                                continue;
1698
                        ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1699
                        if (ret)
1700
                                printk(KERN_ERR
1701
                                 "IOMMU: mapping reserved region failed\n");
1702
                }
1703
        }
1704
 
1705
        iommu_prepare_gfx_mapping();
1706
 
1707
        iommu_prepare_isa();
1708
 
1709
        /*
1710
         * for each drhd
1711
         *   enable fault log
1712
         *   global invalidate context cache
1713
         *   global invalidate iotlb
1714
         *   enable translation
1715
         */
1716
        for_each_drhd_unit(drhd) {
1717
                if (drhd->ignored)
1718
                        continue;
1719
                iommu = drhd->iommu;
1720
                sprintf (iommu->name, "dmar%d", unit++);
1721
 
1722
                iommu_flush_write_buffer(iommu);
1723
 
1724
                ret = dmar_set_interrupt(iommu);
1725
                if (ret)
1726
                        goto error;
1727
 
1728
                iommu_set_root_entry(iommu);
1729
 
1730
                iommu_flush_context_global(iommu, 0);
1731
                iommu_flush_iotlb_global(iommu, 0);
1732
 
1733
                ret = iommu_enable_translation(iommu);
1734
                if (ret)
1735
                        goto error;
1736
        }
1737
 
1738
        return 0;
1739
error:
1740
        for_each_drhd_unit(drhd) {
1741
                if (drhd->ignored)
1742
                        continue;
1743
                iommu = drhd->iommu;
1744
                free_iommu(iommu);
1745
        }
1746
        return ret;
1747
}
1748
 
1749
static inline u64 aligned_size(u64 host_addr, size_t size)
1750
{
1751
        u64 addr;
1752
        addr = (host_addr & (~PAGE_MASK_4K)) + size;
1753
        return PAGE_ALIGN_4K(addr);
1754
}
1755
 
1756
struct iova *
1757
iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1758
{
1759
        struct iova *piova;
1760
 
1761
        /* Make sure it's in range */
1762
        end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1763
        if (!size || (IOVA_START_ADDR + size > end))
1764
                return NULL;
1765
 
1766
        piova = alloc_iova(&domain->iovad,
1767
                        size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1768
        return piova;
1769
}
1770
 
1771
static struct iova *
1772
__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1773
                size_t size)
1774
{
1775
        struct pci_dev *pdev = to_pci_dev(dev);
1776
        struct iova *iova = NULL;
1777
 
1778
        if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1779
                iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1780
        } else  {
1781
                /*
1782
                 * First try to allocate an io virtual address in
1783
                 * DMA_32BIT_MASK and if that fails then try allocating
1784
                 * from higer range
1785
                 */
1786
                iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1787
                if (!iova)
1788
                        iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1789
        }
1790
 
1791
        if (!iova) {
1792
                printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1793
                return NULL;
1794
        }
1795
 
1796
        return iova;
1797
}
1798
 
1799
static struct dmar_domain *
1800
get_valid_domain_for_dev(struct pci_dev *pdev)
1801
{
1802
        struct dmar_domain *domain;
1803
        int ret;
1804
 
1805
        domain = get_domain_for_dev(pdev,
1806
                        DEFAULT_DOMAIN_ADDRESS_WIDTH);
1807
        if (!domain) {
1808
                printk(KERN_ERR
1809
                        "Allocating domain for %s failed", pci_name(pdev));
1810
                return NULL;
1811
        }
1812
 
1813
        /* make sure context mapping is ok */
1814
        if (unlikely(!domain_context_mapped(domain, pdev))) {
1815
                ret = domain_context_mapping(domain, pdev);
1816
                if (ret) {
1817
                        printk(KERN_ERR
1818
                                "Domain context map for %s failed",
1819
                                pci_name(pdev));
1820
                        return NULL;
1821
                }
1822
        }
1823
 
1824
        return domain;
1825
}
1826
 
1827
static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1828
        size_t size, int dir)
1829
{
1830
        struct pci_dev *pdev = to_pci_dev(hwdev);
1831
        int ret;
1832
        struct dmar_domain *domain;
1833
        unsigned long start_addr;
1834
        struct iova *iova;
1835
        int prot = 0;
1836
 
1837
        BUG_ON(dir == DMA_NONE);
1838
        if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1839
                return virt_to_bus(addr);
1840
 
1841
        domain = get_valid_domain_for_dev(pdev);
1842
        if (!domain)
1843
                return 0;
1844
 
1845
        addr = (void *)virt_to_phys(addr);
1846
        size = aligned_size((u64)addr, size);
1847
 
1848
        iova = __intel_alloc_iova(hwdev, domain, size);
1849
        if (!iova)
1850
                goto error;
1851
 
1852
        start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1853
 
1854
        /*
1855
         * Check if DMAR supports zero-length reads on write only
1856
         * mappings..
1857
         */
1858
        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1859
                        !cap_zlr(domain->iommu->cap))
1860
                prot |= DMA_PTE_READ;
1861
        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1862
                prot |= DMA_PTE_WRITE;
1863
        /*
1864
         * addr - (addr + size) might be partial page, we should map the whole
1865
         * page.  Note: if two part of one page are separately mapped, we
1866
         * might have two guest_addr mapping to the same host addr, but this
1867
         * is not a big problem
1868
         */
1869
        ret = domain_page_mapping(domain, start_addr,
1870
                ((u64)addr) & PAGE_MASK_4K, size, prot);
1871
        if (ret)
1872
                goto error;
1873
 
1874
        pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1875
                pci_name(pdev), size, (u64)addr,
1876
                size, (u64)start_addr, dir);
1877
 
1878
        /* it's a non-present to present mapping */
1879
        ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1880
                        start_addr, size >> PAGE_SHIFT_4K, 1);
1881
        if (ret)
1882
                iommu_flush_write_buffer(domain->iommu);
1883
 
1884
        return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1885
 
1886
error:
1887
        if (iova)
1888
                __free_iova(&domain->iovad, iova);
1889
        printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1890
                pci_name(pdev), size, (u64)addr, dir);
1891
        return 0;
1892
}
1893
 
1894
static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1895
        size_t size, int dir)
1896
{
1897
        struct pci_dev *pdev = to_pci_dev(dev);
1898
        struct dmar_domain *domain;
1899
        unsigned long start_addr;
1900
        struct iova *iova;
1901
 
1902
        if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1903
                return;
1904
        domain = find_domain(pdev);
1905
        BUG_ON(!domain);
1906
 
1907
        iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1908
        if (!iova)
1909
                return;
1910
 
1911
        start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1912
        size = aligned_size((u64)dev_addr, size);
1913
 
1914
        pr_debug("Device %s unmapping: %lx@%llx\n",
1915
                pci_name(pdev), size, (u64)start_addr);
1916
 
1917
        /*  clear the whole page */
1918
        dma_pte_clear_range(domain, start_addr, start_addr + size);
1919
        /* free page tables */
1920
        dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1921
 
1922
        if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1923
                        size >> PAGE_SHIFT_4K, 0))
1924
                iommu_flush_write_buffer(domain->iommu);
1925
 
1926
        /* free iova */
1927
        __free_iova(&domain->iovad, iova);
1928
}
1929
 
1930
static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1931
                       dma_addr_t *dma_handle, gfp_t flags)
1932
{
1933
        void *vaddr;
1934
        int order;
1935
 
1936
        size = PAGE_ALIGN_4K(size);
1937
        order = get_order(size);
1938
        flags &= ~(GFP_DMA | GFP_DMA32);
1939
 
1940
        vaddr = (void *)__get_free_pages(flags, order);
1941
        if (!vaddr)
1942
                return NULL;
1943
        memset(vaddr, 0, size);
1944
 
1945
        *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1946
        if (*dma_handle)
1947
                return vaddr;
1948
        free_pages((unsigned long)vaddr, order);
1949
        return NULL;
1950
}
1951
 
1952
static void intel_free_coherent(struct device *hwdev, size_t size,
1953
        void *vaddr, dma_addr_t dma_handle)
1954
{
1955
        int order;
1956
 
1957
        size = PAGE_ALIGN_4K(size);
1958
        order = get_order(size);
1959
 
1960
        intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1961
        free_pages((unsigned long)vaddr, order);
1962
}
1963
 
1964
#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1965
static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
1966
        int nelems, int dir)
1967
{
1968
        int i;
1969
        struct pci_dev *pdev = to_pci_dev(hwdev);
1970
        struct dmar_domain *domain;
1971
        unsigned long start_addr;
1972
        struct iova *iova;
1973
        size_t size = 0;
1974
        void *addr;
1975
        struct scatterlist *sg;
1976
 
1977
        if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1978
                return;
1979
 
1980
        domain = find_domain(pdev);
1981
 
1982
        iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
1983
        if (!iova)
1984
                return;
1985
        for_each_sg(sglist, sg, nelems, i) {
1986
                addr = SG_ENT_VIRT_ADDRESS(sg);
1987
                size += aligned_size((u64)addr, sg->length);
1988
        }
1989
 
1990
        start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1991
 
1992
        /*  clear the whole page */
1993
        dma_pte_clear_range(domain, start_addr, start_addr + size);
1994
        /* free page tables */
1995
        dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1996
 
1997
        if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1998
                        size >> PAGE_SHIFT_4K, 0))
1999
                iommu_flush_write_buffer(domain->iommu);
2000
 
2001
        /* free iova */
2002
        __free_iova(&domain->iovad, iova);
2003
}
2004
 
2005
static int intel_nontranslate_map_sg(struct device *hddev,
2006
        struct scatterlist *sglist, int nelems, int dir)
2007
{
2008
        int i;
2009
        struct scatterlist *sg;
2010
 
2011
        for_each_sg(sglist, sg, nelems, i) {
2012
                BUG_ON(!sg_page(sg));
2013
                sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2014
                sg->dma_length = sg->length;
2015
        }
2016
        return nelems;
2017
}
2018
 
2019
static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2020
                                int nelems, int dir)
2021
{
2022
        void *addr;
2023
        int i;
2024
        struct pci_dev *pdev = to_pci_dev(hwdev);
2025
        struct dmar_domain *domain;
2026
        size_t size = 0;
2027
        int prot = 0;
2028
        size_t offset = 0;
2029
        struct iova *iova = NULL;
2030
        int ret;
2031
        struct scatterlist *sg;
2032
        unsigned long start_addr;
2033
 
2034
        BUG_ON(dir == DMA_NONE);
2035
        if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2036
                return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2037
 
2038
        domain = get_valid_domain_for_dev(pdev);
2039
        if (!domain)
2040
                return 0;
2041
 
2042
        for_each_sg(sglist, sg, nelems, i) {
2043
                addr = SG_ENT_VIRT_ADDRESS(sg);
2044
                addr = (void *)virt_to_phys(addr);
2045
                size += aligned_size((u64)addr, sg->length);
2046
        }
2047
 
2048
        iova = __intel_alloc_iova(hwdev, domain, size);
2049
        if (!iova) {
2050
                sglist->dma_length = 0;
2051
                return 0;
2052
        }
2053
 
2054
        /*
2055
         * Check if DMAR supports zero-length reads on write only
2056
         * mappings..
2057
         */
2058
        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2059
                        !cap_zlr(domain->iommu->cap))
2060
                prot |= DMA_PTE_READ;
2061
        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2062
                prot |= DMA_PTE_WRITE;
2063
 
2064
        start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2065
        offset = 0;
2066
        for_each_sg(sglist, sg, nelems, i) {
2067
                addr = SG_ENT_VIRT_ADDRESS(sg);
2068
                addr = (void *)virt_to_phys(addr);
2069
                size = aligned_size((u64)addr, sg->length);
2070
                ret = domain_page_mapping(domain, start_addr + offset,
2071
                        ((u64)addr) & PAGE_MASK_4K,
2072
                        size, prot);
2073
                if (ret) {
2074
                        /*  clear the page */
2075
                        dma_pte_clear_range(domain, start_addr,
2076
                                  start_addr + offset);
2077
                        /* free page tables */
2078
                        dma_pte_free_pagetable(domain, start_addr,
2079
                                  start_addr + offset);
2080
                        /* free iova */
2081
                        __free_iova(&domain->iovad, iova);
2082
                        return 0;
2083
                }
2084
                sg->dma_address = start_addr + offset +
2085
                                ((u64)addr & (~PAGE_MASK_4K));
2086
                sg->dma_length = sg->length;
2087
                offset += size;
2088
        }
2089
 
2090
        /* it's a non-present to present mapping */
2091
        if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2092
                        start_addr, offset >> PAGE_SHIFT_4K, 1))
2093
                iommu_flush_write_buffer(domain->iommu);
2094
        return nelems;
2095
}
2096
 
2097
static struct dma_mapping_ops intel_dma_ops = {
2098
        .alloc_coherent = intel_alloc_coherent,
2099
        .free_coherent = intel_free_coherent,
2100
        .map_single = intel_map_single,
2101
        .unmap_single = intel_unmap_single,
2102
        .map_sg = intel_map_sg,
2103
        .unmap_sg = intel_unmap_sg,
2104
};
2105
 
2106
static inline int iommu_domain_cache_init(void)
2107
{
2108
        int ret = 0;
2109
 
2110
        iommu_domain_cache = kmem_cache_create("iommu_domain",
2111
                                         sizeof(struct dmar_domain),
2112
                                         0,
2113
                                         SLAB_HWCACHE_ALIGN,
2114
 
2115
                                         NULL);
2116
        if (!iommu_domain_cache) {
2117
                printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2118
                ret = -ENOMEM;
2119
        }
2120
 
2121
        return ret;
2122
}
2123
 
2124
static inline int iommu_devinfo_cache_init(void)
2125
{
2126
        int ret = 0;
2127
 
2128
        iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2129
                                         sizeof(struct device_domain_info),
2130
                                         0,
2131
                                         SLAB_HWCACHE_ALIGN,
2132
 
2133
                                         NULL);
2134
        if (!iommu_devinfo_cache) {
2135
                printk(KERN_ERR "Couldn't create devinfo cache\n");
2136
                ret = -ENOMEM;
2137
        }
2138
 
2139
        return ret;
2140
}
2141
 
2142
static inline int iommu_iova_cache_init(void)
2143
{
2144
        int ret = 0;
2145
 
2146
        iommu_iova_cache = kmem_cache_create("iommu_iova",
2147
                                         sizeof(struct iova),
2148
                                         0,
2149
                                         SLAB_HWCACHE_ALIGN,
2150
 
2151
                                         NULL);
2152
        if (!iommu_iova_cache) {
2153
                printk(KERN_ERR "Couldn't create iova cache\n");
2154
                ret = -ENOMEM;
2155
        }
2156
 
2157
        return ret;
2158
}
2159
 
2160
static int __init iommu_init_mempool(void)
2161
{
2162
        int ret;
2163
        ret = iommu_iova_cache_init();
2164
        if (ret)
2165
                return ret;
2166
 
2167
        ret = iommu_domain_cache_init();
2168
        if (ret)
2169
                goto domain_error;
2170
 
2171
        ret = iommu_devinfo_cache_init();
2172
        if (!ret)
2173
                return ret;
2174
 
2175
        kmem_cache_destroy(iommu_domain_cache);
2176
domain_error:
2177
        kmem_cache_destroy(iommu_iova_cache);
2178
 
2179
        return -ENOMEM;
2180
}
2181
 
2182
static void __init iommu_exit_mempool(void)
2183
{
2184
        kmem_cache_destroy(iommu_devinfo_cache);
2185
        kmem_cache_destroy(iommu_domain_cache);
2186
        kmem_cache_destroy(iommu_iova_cache);
2187
 
2188
}
2189
 
2190
void __init detect_intel_iommu(void)
2191
{
2192
        if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2193
                return;
2194
        if (early_dmar_detect()) {
2195
                iommu_detected = 1;
2196
        }
2197
}
2198
 
2199
static void __init init_no_remapping_devices(void)
2200
{
2201
        struct dmar_drhd_unit *drhd;
2202
 
2203
        for_each_drhd_unit(drhd) {
2204
                if (!drhd->include_all) {
2205
                        int i;
2206
                        for (i = 0; i < drhd->devices_cnt; i++)
2207
                                if (drhd->devices[i] != NULL)
2208
                                        break;
2209
                        /* ignore DMAR unit if no pci devices exist */
2210
                        if (i == drhd->devices_cnt)
2211
                                drhd->ignored = 1;
2212
                }
2213
        }
2214
 
2215
        if (dmar_map_gfx)
2216
                return;
2217
 
2218
        for_each_drhd_unit(drhd) {
2219
                int i;
2220
                if (drhd->ignored || drhd->include_all)
2221
                        continue;
2222
 
2223
                for (i = 0; i < drhd->devices_cnt; i++)
2224
                        if (drhd->devices[i] &&
2225
                                !IS_GFX_DEVICE(drhd->devices[i]))
2226
                                break;
2227
 
2228
                if (i < drhd->devices_cnt)
2229
                        continue;
2230
 
2231
                /* bypass IOMMU if it is just for gfx devices */
2232
                drhd->ignored = 1;
2233
                for (i = 0; i < drhd->devices_cnt; i++) {
2234
                        if (!drhd->devices[i])
2235
                                continue;
2236
                        drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2237
                }
2238
        }
2239
}
2240
 
2241
int __init intel_iommu_init(void)
2242
{
2243
        int ret = 0;
2244
 
2245
        if (no_iommu || swiotlb || dmar_disabled)
2246
                return -ENODEV;
2247
 
2248
        if (dmar_table_init())
2249
                return  -ENODEV;
2250
 
2251
        iommu_init_mempool();
2252
        dmar_init_reserved_ranges();
2253
 
2254
        init_no_remapping_devices();
2255
 
2256
        ret = init_dmars();
2257
        if (ret) {
2258
                printk(KERN_ERR "IOMMU: dmar init failed\n");
2259
                put_iova_domain(&reserved_iova_list);
2260
                iommu_exit_mempool();
2261
                return ret;
2262
        }
2263
        printk(KERN_INFO
2264
        "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2265
 
2266
        force_iommu = 1;
2267
        dma_ops = &intel_dma_ops;
2268
        return 0;
2269
}
2270
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.