OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [ia64/] [kernel/] [perfmon.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * This file implements the perfmon subsystem which is used
3
 * to program the IA-64 Performance Monitoring Unit (PMU).
4
 *
5
 * Originaly Written by Ganesh Venkitachalam, IBM Corp.
6
 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
7
 *
8
 * Modifications by Stephane Eranian, Hewlett-Packard Co.
9
 * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
10
 *
11
 * Copyright (C) 1999-2003  Hewlett Packard Co
12
 *               Stephane Eranian <eranian@hpl.hp.com>
13
 *               David Mosberger-Tang <davidm@hpl.hp.com>
14
 */
15
 
16
#include <linux/config.h>
17
#include <linux/kernel.h>
18
#include <linux/sched.h>
19
#include <linux/interrupt.h>
20
#include <linux/smp_lock.h>
21
#include <linux/proc_fs.h>
22
#include <linux/init.h>
23
#include <linux/vmalloc.h>
24
#include <linux/wrapper.h>
25
#include <linux/mm.h>
26
#include <linux/sysctl.h>
27
#include <linux/smp.h>
28
 
29
#include <asm/bitops.h>
30
#include <asm/errno.h>
31
#include <asm/page.h>
32
#include <asm/perfmon.h>
33
#include <asm/processor.h>
34
#include <asm/signal.h>
35
#include <asm/system.h>
36
#include <asm/uaccess.h>
37
#include <asm/delay.h> /* for ia64_get_itc() */
38
 
39
#ifdef CONFIG_PERFMON
40
 
41
/*
42
 * For PMUs which rely on the debug registers for some features, you must
43
 * you must enable the following flag to activate the support for
44
 * accessing the registers via the perfmonctl() interface.
45
 */
46
#if defined(CONFIG_ITANIUM) || defined(CONFIG_MCKINLEY)
47
#define PFM_PMU_USES_DBR        1
48
#endif
49
 
50
/*
51
 * perfmon context states
52
 */
53
#define PFM_CTX_DISABLED        0
54
#define PFM_CTX_ENABLED         1
55
 
56
/*
57
 * Reset register flags
58
 */
59
#define PFM_PMD_LONG_RESET      1
60
#define PFM_PMD_SHORT_RESET     2
61
 
62
/*
63
 * Misc macros and definitions
64
 */
65
#define PMU_FIRST_COUNTER       4
66
#define PMU_MAX_PMCS            256
67
#define PMU_MAX_PMDS            256
68
 
69
/*
70
 * type of a PMU register (bitmask).
71
 * bitmask structure:
72
 *      bit0   : register implemented
73
 *      bit1   : end marker
74
 *      bit2-3 : reserved
75
 *      bit4-7 : register type
76
 *      bit8-31: reserved
77
 */
78
#define PFM_REG_IMPL            0x1 /* register implemented */
79
#define PFM_REG_END             0x2 /* end marker */
80
#define PFM_REG_MONITOR         (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
81
#define PFM_REG_COUNTING        (0x2<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */
82
#define PFM_REG_CONTROL         (0x3<<4|PFM_REG_IMPL) /* PMU control register */
83
#define PFM_REG_CONFIG          (0x4<<4|PFM_REG_IMPL) /* refine configuration */
84
#define PFM_REG_BUFFER          (0x5<<4|PFM_REG_IMPL) /* PMD used as buffer */
85
 
86
#define PMC_IS_LAST(i)  (pmu_conf.pmc_desc[i].type & PFM_REG_END)
87
#define PMD_IS_LAST(i)  (pmu_conf.pmd_desc[i].type & PFM_REG_END)
88
 
89
#define PFM_IS_DISABLED() pmu_conf.disabled
90
 
91
#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_soft_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
92
#define PFM_FL_INHERIT_MASK     (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
93
 
94
/* i assume unsigned */
95
#define PMC_IS_IMPL(i)    (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL))
96
#define PMD_IS_IMPL(i)    (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL))
97
 
98
/* XXX: these three assume that register i is implemented */
99
#define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING)
100
#define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING)
101
#define PMC_IS_MONITOR(i)  (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
102
#define PMC_DFL_VAL(i)     pmu_conf.pmc_desc[i].default_value
103
#define PMC_RSVD_MASK(i)   pmu_conf.pmc_desc[i].reserved_mask
104
#define PMD_PMD_DEP(i)     pmu_conf.pmd_desc[i].dep_pmd[0]
105
#define PMC_PMD_DEP(i)     pmu_conf.pmc_desc[i].dep_pmd[0]
106
 
107
/* k assume unsigned */
108
#define IBR_IS_IMPL(k)    (k<pmu_conf.num_ibrs)
109
#define DBR_IS_IMPL(k)    (k<pmu_conf.num_dbrs)
110
 
111
#define CTX_IS_ENABLED(c)       ((c)->ctx_flags.state == PFM_CTX_ENABLED)
112
#define CTX_OVFL_NOBLOCK(c)     ((c)->ctx_fl_block == 0)
113
#define CTX_INHERIT_MODE(c)     ((c)->ctx_fl_inherit)
114
#define CTX_HAS_SMPL(c)         ((c)->ctx_psb != NULL)
115
/* XXX: does not support more than 64 PMDs */
116
#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
117
#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
118
 
119
 
120
#define CTX_USED_IBR(ctx,n)     (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
121
#define CTX_USED_DBR(ctx,n)     (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
122
#define CTX_USES_DBREGS(ctx)    (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
123
 
124
#ifdef CONFIG_SMP
125
#define GET_ACTIVATION()        pmu_owners[smp_processor_id()].activation_number
126
#define INC_ACTIVATION()        pmu_owners[smp_processor_id()].activation_number++
127
#define SET_ACTIVATION(c)       (c)->ctx_last_activation = GET_ACTIVATION()
128
#define SET_LAST_CPU(ctx, v)    (ctx)->ctx_last_cpu = (v)
129
#define GET_LAST_CPU(ctx)       (ctx)->ctx_last_cpu
130
#else /* !CONFIG_SMP */
131
#define SET_ACTIVATION(t)       do {} while(0)
132
#define GET_ACTIVATION(t)       do {} while(0)
133
#define INC_ACTIVATION(t)       do {} while(0)
134
#define SET_LAST_CPU(ctx, v)    do {} while(0)
135
#define GET_LAST_CPU(ctx)       do {} while(0)
136
#endif /* CONFIG_SMP */
137
 
138
 
139
#define PFM_INVALID_ACTIVATION  (~0UL)
140
 
141
#define SET_PMU_OWNER(t)    do { pmu_owners[smp_processor_id()].owner = (t); } while(0)
142
#define PMU_OWNER()         pmu_owners[smp_processor_id()].owner
143
 
144
#define LOCK_PFS()          spin_lock(&pfm_sessions.pfs_lock)
145
#define UNLOCK_PFS()        spin_unlock(&pfm_sessions.pfs_lock)
146
 
147
#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
148
 
149
#define TASK_PTREGS(t) (((struct pt_regs *)((unsigned long) (t) + IA64_STK_OFFSET))-1)
150
 
151
/*
152
 * cmp0 must be the value of pmc0
153
 */
154
#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
155
 
156
 
157
/*
158
 * debugging
159
 */
160
#define DBprintk(a) \
161
        do { \
162
                if (pfm_sysctl.debug >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
163
        } while (0)
164
 
165
#define DBprintk_ovfl(a) \
166
        do { \
167
                if (pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
168
        } while (0)
169
 
170
 
171
 
172
/*
173
 * Architected PMC structure
174
 */
175
typedef struct {
176
        unsigned long pmc_plm:4;        /* privilege level mask */
177
        unsigned long pmc_ev:1;         /* external visibility */
178
        unsigned long pmc_oi:1;         /* overflow interrupt */
179
        unsigned long pmc_pm:1;         /* privileged monitor */
180
        unsigned long pmc_ig1:1;        /* reserved */
181
        unsigned long pmc_es:8;         /* event select */
182
        unsigned long pmc_ig2:48;       /* reserved */
183
} pfm_monitor_t;
184
 
185
/*
186
 * There is one such data structure per perfmon context. It is used to describe the
187
 * sampling buffer. It is to be shared among siblings whereas the pfm_context
188
 * is not.
189
 * Therefore we maintain a refcnt which is incremented on fork().
190
 * This buffer is private to the kernel only the actual sampling buffer
191
 * including its header are exposed to the user. This construct allows us to
192
 * export the buffer read-write, if needed, without worrying about security
193
 * problems.
194
 */
195
typedef struct _pfm_smpl_buffer_desc {
196
        spinlock_t              psb_lock;       /* protection lock */
197
        unsigned long           psb_refcnt;     /* how many users for the buffer */
198
        int                     psb_flags;      /* bitvector of flags (not yet used) */
199
 
200
        void                    *psb_addr;      /* points to location of first entry */
201
        unsigned long           psb_entries;    /* maximum number of entries */
202
        unsigned long           psb_size;       /* aligned size of buffer */
203
        unsigned long           psb_index;      /* next free entry slot XXX: must use the one in buffer */
204
        unsigned long           psb_entry_size; /* size of each entry including entry header */
205
 
206
        perfmon_smpl_hdr_t      *psb_hdr;       /* points to sampling buffer header */
207
 
208
        struct _pfm_smpl_buffer_desc *psb_next; /* next psb, used for rvfreeing of psb_hdr */
209
 
210
} pfm_smpl_buffer_desc_t;
211
 
212
/*
213
 * psb_flags
214
 */
215
#define PSB_HAS_VMA     0x1             /* a virtual mapping for the buffer exists */
216
 
217
#define LOCK_PSB(p)     spin_lock(&(p)->psb_lock)
218
#define UNLOCK_PSB(p)   spin_unlock(&(p)->psb_lock)
219
 
220
/*
221
 * 64-bit software counter structure
222
 */
223
typedef struct {
224
        u64 val;        /* virtual 64bit counter value */
225
        u64 lval;       /* last value */
226
        u64 long_reset; /* reset value on sampling overflow */
227
        u64 short_reset;/* reset value on overflow */
228
        u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
229
        u64 seed;       /* seed for random-number generator */
230
        u64 mask;       /* mask for random-number generator */
231
        unsigned int flags; /* notify/do not notify */
232
} pfm_counter_t;
233
 
234
/*
235
 * perfmon context. One per process, is cloned on fork() depending on
236
 * inheritance flags
237
 */
238
typedef struct {
239
        unsigned int state:1;           /* 0=disabled, 1=enabled */
240
        unsigned int inherit:2;         /* inherit mode */
241
        unsigned int block:1;           /* when 1, task will blocked on user notifications */
242
        unsigned int system:1;          /* do system wide monitoring */
243
        unsigned int frozen:1;          /* pmu must be kept frozen on ctxsw in */
244
        unsigned int protected:1;       /* allow access to creator of context only */
245
        unsigned int using_dbreg:1;     /* using range restrictions (debug registers) */
246
        unsigned int excl_idle:1;       /* exclude idle task in system wide session */
247
        unsigned int unsecure:1;        /* sp = 0 for non self-monitored task */
248
        unsigned int reserved:22;
249
} pfm_context_flags_t;
250
 
251
/*
252
 * perfmon context: encapsulates all the state of a monitoring session
253
 * XXX: probably need to change layout
254
 */
255
typedef struct pfm_context {
256
        pfm_smpl_buffer_desc_t  *ctx_psb;               /* sampling buffer, if any */
257
        unsigned long           ctx_smpl_vaddr;         /* user level virtual address of smpl buffer */
258
 
259
        spinlock_t              ctx_lock;
260
        pfm_context_flags_t     ctx_flags;              /* block/noblock */
261
 
262
        struct task_struct      *ctx_notify_task;       /* who to notify on overflow */
263
        struct task_struct      *ctx_owner;             /* pid of creator (debug) */
264
 
265
        unsigned long           ctx_ovfl_regs[4];       /* which registers overflowed (notification) */
266
        unsigned long           ctx_smpl_regs[4];       /* which registers to record on overflow */
267
 
268
        struct semaphore        ctx_restart_sem;        /* use for blocking notification mode */
269
 
270
        unsigned long           ctx_used_pmds[4];       /* bitmask of PMD used                 */
271
        unsigned long           ctx_reload_pmds[4];     /* bitmask of PMD to reload on ctxsw   */
272
 
273
        unsigned long           ctx_used_pmcs[4];       /* bitmask PMC used by context         */
274
        unsigned long           ctx_reload_pmcs[4];     /* bitmask of PMC to reload on ctxsw   */
275
 
276
        unsigned long           ctx_used_ibrs[4];       /* bitmask of used IBR (speedup ctxsw) */
277
        unsigned long           ctx_used_dbrs[4];       /* bitmask of used DBR (speedup ctxsw) */
278
 
279
        pfm_counter_t           ctx_soft_pmds[IA64_NUM_PMD_REGS]; /* XXX: size should be dynamic */
280
 
281
        u64                     ctx_saved_psr;          /* copy of psr used for lazy ctxsw */
282
        unsigned long           ctx_saved_cpus_allowed; /* copy of the task cpus_allowed (system wide) */
283
        unsigned long           ctx_last_activation;    /* context last activation number for last_cpu */
284
        unsigned int            ctx_last_cpu;           /* CPU id of current or last CPU used (SMP only) */
285
        unsigned int            ctx_cpu;                /* cpu to which perfmon is applied (system wide) */
286
 
287
        struct tasklet_struct   ctx_tasklet;            /* used for sending signal-based notifications */
288
} pfm_context_t;
289
 
290
#define PFM_GET_CTX(t)  ((pfm_context_t *)(t)->thread.pfm_context)
291
#define LOCK_CTX(ctx)   spin_lock(&(ctx)->ctx_lock)
292
#define UNLOCK_CTX(ctx) spin_unlock(&(ctx)->ctx_lock)
293
 
294
#define ctx_fl_inherit          ctx_flags.inherit
295
#define ctx_fl_block            ctx_flags.block
296
#define ctx_fl_system           ctx_flags.system
297
#define ctx_fl_frozen           ctx_flags.frozen
298
#define ctx_fl_protected        ctx_flags.protected
299
#define ctx_fl_using_dbreg      ctx_flags.using_dbreg
300
#define ctx_fl_excl_idle        ctx_flags.excl_idle
301
#define ctx_fl_unsecure         ctx_flags.unsecure
302
 
303
/*
304
 * global information about all sessions
305
 * mostly used to synchronize between system wide and per-process
306
 */
307
typedef struct {
308
        spinlock_t              pfs_lock;                  /* lock the structure */
309
 
310
        unsigned int            pfs_task_sessions;         /* number of per task sessions */
311
        unsigned int            pfs_sys_sessions;          /* number of per system wide sessions */
312
        unsigned int            pfs_sys_use_dbregs;        /* incremented when a system wide session uses debug regs */
313
        unsigned int            pfs_ptrace_use_dbregs;     /* incremented when a process uses debug regs */
314
        struct task_struct      *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
315
} pfm_session_t;
316
 
317
/*
318
 * information about a PMC or PMD.
319
 * dep_pmd[]: a bitmask of dependent PMD registers
320
 * dep_pmc[]: a bitmask of dependent PMC registers
321
 */
322
typedef struct {
323
        unsigned int            type;
324
        int                     pm_pos;
325
        unsigned long           default_value;  /* power-on default value */
326
        unsigned long           reserved_mask;  /* bitmask of reserved bits */
327
        int                     (*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
328
        int                     (*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
329
        unsigned long           dep_pmd[4];
330
        unsigned long           dep_pmc[4];
331
} pfm_reg_desc_t;
332
 
333
/* assume cnum is a valid monitor */
334
#define PMC_PM(cnum, val)       (((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1)
335
#define PMC_WR_FUNC(cnum)       (pmu_conf.pmc_desc[cnum].write_check)
336
#define PMD_WR_FUNC(cnum)       (pmu_conf.pmd_desc[cnum].write_check)
337
#define PMD_RD_FUNC(cnum)       (pmu_conf.pmd_desc[cnum].read_check)
338
 
339
/*
340
 * This structure is initialized at boot time and contains
341
 * a description of the PMU main characteristics.
342
 */
343
typedef struct {
344
        unsigned int  disabled;         /* indicates if perfmon is working properly */
345
        unsigned long ovfl_val;         /* overflow value for generic counters   */
346
        unsigned long impl_pmcs[4];     /* bitmask of implemented PMCS */
347
        unsigned long impl_pmds[4];     /* bitmask of implemented PMDS */
348
        unsigned int  num_pmcs;         /* number of implemented PMCS */
349
        unsigned int  num_pmds;         /* number of implemented PMDS */
350
        unsigned int  num_ibrs;         /* number of implemented IBRS */
351
        unsigned int  num_dbrs;         /* number of implemented DBRS */
352
        unsigned int  num_counters;     /* number of PMD/PMC counters */
353
        pfm_reg_desc_t *pmc_desc;       /* detailed PMC register dependencies descriptions */
354
        pfm_reg_desc_t *pmd_desc;       /* detailed PMD register dependencies descriptions */
355
} pmu_config_t;
356
 
357
/*
358
 * perfmon command descriptions
359
 */
360
typedef struct {
361
        int             (*cmd_func)(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
362
        int             cmd_flags;
363
        unsigned int    cmd_narg;
364
        size_t          cmd_argsize;
365
} pfm_cmd_desc_t;
366
 
367
#define PFM_CMD_PID             0x1     /* command requires pid argument */
368
#define PFM_CMD_ARG_READ        0x2     /* command must read argument(s) */
369
#define PFM_CMD_ARG_RW          0x4     /* command must read/write argument(s) */
370
#define PFM_CMD_CTX             0x8     /* command needs a perfmon context */
371
#define PFM_CMD_NOCHK           0x10    /* command does not need to check task's state */
372
 
373
#define PFM_CMD_IDX(cmd)        (cmd)
374
 
375
#define PFM_CMD_IS_VALID(cmd)   ((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \
376
                                  && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL)
377
 
378
#define PFM_CMD_USE_PID(cmd)    ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)
379
#define PFM_CMD_READ_ARG(cmd)   ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)
380
#define PFM_CMD_RW_ARG(cmd)     ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW) != 0)
381
#define PFM_CMD_USE_CTX(cmd)    ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)
382
#define PFM_CMD_CHK(cmd)        ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)
383
 
384
#define PFM_CMD_ARG_MANY        -1 /* cannot be zero */
385
#define PFM_CMD_NARG(cmd)       (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
386
#define PFM_CMD_ARG_SIZE(cmd)   (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
387
 
388
typedef struct {
389
        int     debug;          /* turn on/off debugging via syslog */
390
        int     debug_ovfl;     /* turn on/off debug printk in overflow handler */
391
        int     fastctxsw;      /* turn on/off fast (unsecure) ctxsw */
392
} pfm_sysctl_t;
393
 
394
typedef struct {
395
        unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
396
        unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
397
        unsigned long pfm_recorded_samples_count;
398
        unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */
399
        char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
400
} pfm_stats_t;
401
 
402
/*
403
 * perfmon internal variables
404
 */
405
static pfm_session_t    pfm_sessions;   /* global sessions information */
406
static struct proc_dir_entry *perfmon_dir; /* for debug only */
407
static pfm_stats_t      pfm_stats[NR_CPUS];
408
static pfm_intr_handler_desc_t  *pfm_alternate_intr_handler;
409
 
410
/* sysctl() controls */
411
static pfm_sysctl_t pfm_sysctl;
412
 
413
static ctl_table pfm_ctl_table[]={
414
        {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
415
        {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
416
        {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
417
        { 0, },
418
};
419
static ctl_table pfm_sysctl_dir[] = {
420
        {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
421
        {0,},
422
};
423
static ctl_table pfm_sysctl_root[] = {
424
        {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
425
        {0,},
426
};
427
static struct ctl_table_header *pfm_sysctl_header;
428
 
429
static void pfm_vm_close(struct vm_area_struct * area);
430
 
431
static struct vm_operations_struct pfm_vm_ops={
432
        .close =  pfm_vm_close
433
};
434
 
435
/*
436
 * keep track of task owning the PMU per CPU.
437
 */
438
static struct {
439
        struct task_struct *owner;
440
        unsigned long      activation_number;
441
        char               pad[SMP_CACHE_BYTES] ____cacheline_aligned;
442
} pmu_owners[NR_CPUS];
443
 
444
 
445
 
446
/*
447
 * forward declarations
448
 */
449
static void pfm_reset_pmu(struct task_struct *);
450
#ifndef CONFIG_SMP
451
static unsigned long pfm_lazy_save_regs (struct task_struct *ta);
452
#endif
453
 
454
#if   defined(CONFIG_ITANIUM)
455
#include "perfmon_itanium.h"
456
#elif defined(CONFIG_MCKINLEY)
457
#include "perfmon_mckinley.h"
458
#else
459
#include "perfmon_generic.h"
460
#endif
461
 
462
static inline void
463
pfm_clear_psr_pp(void)
464
{
465
        __asm__ __volatile__ ("rsm psr.pp;; srlz.i;;"::: "memory");
466
}
467
 
468
static inline void
469
pfm_set_psr_pp(void)
470
{
471
        __asm__ __volatile__ ("ssm psr.pp;; srlz.i;;"::: "memory");
472
}
473
 
474
static inline void
475
pfm_clear_psr_up(void)
476
{
477
        __asm__ __volatile__ ("rsm psr.up;; srlz.i;;"::: "memory");
478
}
479
 
480
static inline void
481
pfm_set_psr_up(void)
482
{
483
        __asm__ __volatile__ ("ssm psr.up;; srlz.i;;"::: "memory");
484
}
485
 
486
static inline unsigned long
487
pfm_get_psr(void)
488
{
489
        unsigned long tmp;
490
        __asm__ __volatile__ ("mov %0=psr;;": "=r"(tmp) :: "memory");
491
        return tmp;
492
}
493
 
494
static inline void
495
pfm_set_psr_l(unsigned long val)
496
{
497
        __asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory");
498
}
499
 
500
 
501
 
502
static inline void
503
pfm_freeze_pmu(void)
504
{
505
        ia64_set_pmc(0,1UL);
506
        ia64_srlz_d();
507
}
508
 
509
static inline void
510
pfm_unfreeze_pmu(void)
511
{
512
        ia64_set_pmc(0,0UL);
513
        ia64_srlz_d();
514
}
515
 
516
static inline void
517
pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
518
{
519
        int i;
520
 
521
        for (i=0; i < nibrs; i++) {
522
                ia64_set_ibr(i, ibrs[i]);
523
        }
524
        ia64_srlz_i();
525
}
526
 
527
static inline void
528
pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
529
{
530
        int i;
531
 
532
        for (i=0; i < ndbrs; i++) {
533
                ia64_set_dbr(i, dbrs[i]);
534
        }
535
        ia64_srlz_d();
536
}
537
 
538
static inline void
539
pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
540
{
541
        int i;
542
 
543
        DBprintk(("mask=0x%lx\n", mask));
544
        for (i=0; mask; i++, mask>>=1) {
545
                if ((mask & 0x1) == 0) continue;
546
                ia64_set_pmc(i, pmcs[i]);
547
                DBprintk(("pmc[%d]=0x%lx\n", i, pmcs[i]));
548
        }
549
        ia64_srlz_d();
550
}
551
 
552
static inline void
553
pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
554
{
555
        int i;
556
        unsigned long val, ovfl_val = pmu_conf.ovfl_val;
557
 
558
        DBprintk(("mask=0x%lx\n", mask));
559
        for (i=0; mask; i++, mask>>=1) {
560
                if ((mask & 0x1) == 0) continue;
561
                val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
562
                ia64_set_pmd(i, val);
563
                DBprintk(("pmd[%d]=0x%lx\n", i, val));
564
        }
565
        ia64_srlz_d();
566
}
567
 
568
static inline void
569
pfm_save_pmds(unsigned long *pmds, unsigned long mask)
570
{
571
        int i;
572
 
573
        ia64_srlz_d();
574
 
575
        for (i=0; mask; i++, mask>>=1) {
576
                if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
577
        }
578
}
579
 
580
static inline unsigned long
581
pfm_read_soft_counter(pfm_context_t *ctx, int i)
582
{
583
        return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val);
584
}
585
 
586
static inline void
587
pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
588
{
589
        ctx->ctx_soft_pmds[i].val = val  & ~pmu_conf.ovfl_val;
590
        /*
591
         * writing to unimplemented part is ignore, so we do not need to
592
         * mask off top part
593
         */
594
        ia64_set_pmd(i, val & pmu_conf.ovfl_val);
595
}
596
 
597
/*
598
 * Generates a unique (per CPU) timestamp
599
 */
600
static inline unsigned long
601
pfm_get_stamp(void)
602
{
603
        /*
604
         * XXX: must find something more efficient
605
         */
606
        return ia64_get_itc();
607
}
608
 
609
/* Here we want the physical address of the memory.
610
 * This is used when initializing the contents of the
611
 * area and marking the pages as reserved.
612
 */
613
static inline unsigned long
614
pfm_kvirt_to_pa(unsigned long adr)
615
{
616
        __u64 pa = ia64_tpa(adr);
617
        //DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa));
618
        return pa;
619
}
620
 
621
static void *
622
pfm_rvmalloc(unsigned long size)
623
{
624
        void *mem;
625
        unsigned long adr, page;
626
 
627
        mem=vmalloc(size);
628
        if (mem) {
629
                //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
630
                memset(mem, 0, size); /* Clear the ram out, no junk to the user */
631
                adr=(unsigned long) mem;
632
                while (size > 0) {
633
                        page = pfm_kvirt_to_pa(adr);
634
                        mem_map_reserve(virt_to_page(__va(page)));
635
                        adr  += PAGE_SIZE;
636
                        size -= PAGE_SIZE;
637
                }
638
        }
639
        return mem;
640
}
641
 
642
static void
643
pfm_rvfree(void *mem, unsigned long size)
644
{
645
        unsigned long adr, page = 0;
646
 
647
        if (mem) {
648
                adr=(unsigned long) mem;
649
                while (size > 0) {
650
                        page = pfm_kvirt_to_pa(adr);
651
                        mem_map_unreserve(virt_to_page(__va(page)));
652
                        adr+=PAGE_SIZE;
653
                        size-=PAGE_SIZE;
654
                }
655
                vfree(mem);
656
        }
657
        return;
658
}
659
 
660
/*
661
 * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer
662
 * attached to the context AND the current task has a mapping for it, i.e., it is the original
663
 * creator of the context.
664
 *
665
 * This function is used to remember the fact that the vma describing the sampling buffer
666
 * has now been removed. It can only be called when no other tasks share the same mm context.
667
 *
668
 */
669
static void
670
pfm_vm_close(struct vm_area_struct *vma)
671
{
672
        pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data;
673
 
674
        if (psb == NULL) {
675
                printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
676
                return;
677
        }
678
        /*
679
         * Add PSB to list of buffers to free on release_thread() when no more users
680
         *
681
         * This call is safe because, once the count is zero is cannot be modified anymore.
682
         * This is not because there is no more user of the mm context, that the sampling
683
         * buffer is not being used anymore outside of this task. In fact, it can still
684
         * be accessed from within the kernel by another task (such as the monitored task).
685
         *
686
         * Therefore, we only move the psb into the list of buffers to free when we know
687
         * nobody else is using it.
688
         * The linked list if independent of the perfmon context, because in the case of
689
         * multi-threaded processes, the last thread may not have been involved with
690
         * monitoring however it will be the one removing the vma and it should therefore
691
         * also remove the sampling buffer. This buffer cannot be removed until the vma
692
         * is removed.
693
         *
694
         * This function cannot remove the buffer from here, because exit_mmap() must first
695
         * complete. Given that there is no other vma related callback in the generic code,
696
         * we have created our own with the linked list of sampling buffers to free. The list
697
         * is part of the thread structure. In release_thread() we check if the list is
698
         * empty. If not we call into perfmon to free the buffer and psb. That is the only
699
         * way to ensure a safe deallocation of the sampling buffer which works when
700
         * the buffer is shared between distinct processes or with multi-threaded programs.
701
         *
702
         * We need to lock the psb because the refcnt test and flag manipulation must
703
         * looked like an atomic operation vis a vis pfm_context_exit()
704
         */
705
        LOCK_PSB(psb);
706
 
707
        if (psb->psb_refcnt == 0) {
708
 
709
                psb->psb_next = current->thread.pfm_smpl_buf_list;
710
                current->thread.pfm_smpl_buf_list = psb;
711
 
712
                DBprintk(("[%d] add smpl @%p size %lu to smpl_buf_list psb_flags=0x%x\n",
713
                        current->pid, psb->psb_hdr, psb->psb_size, psb->psb_flags));
714
        }
715
        DBprintk(("[%d] clearing psb_flags=0x%x smpl @%p size %lu\n",
716
                        current->pid, psb->psb_flags, psb->psb_hdr, psb->psb_size));
717
        /*
718
         * decrement the number vma for the buffer
719
         */
720
        psb->psb_flags &= ~PSB_HAS_VMA;
721
 
722
        UNLOCK_PSB(psb);
723
}
724
 
725
/*
726
 * This function is called from pfm_destroy_context() and also from pfm_inherit()
727
 * to explicitely remove the sampling buffer mapping from the user level address space.
728
 */
729
static int
730
pfm_remove_smpl_mapping(struct task_struct *task)
731
{
732
        pfm_context_t *ctx = task->thread.pfm_context;
733
        pfm_smpl_buffer_desc_t *psb;
734
        int r;
735
 
736
        /*
737
         * some sanity checks first
738
         */
739
        if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) {
740
                printk(KERN_DEBUG "perfmon: invalid context mm=%p\n", task->mm);
741
                return -1;
742
        }
743
        psb = ctx->ctx_psb;
744
 
745
        down_write(&task->mm->mmap_sem);
746
 
747
        r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size);
748
 
749
        up_write(&task->mm->mmap_sem);
750
        if (r !=0) {
751
                printk(KERN_DEBUG "perfmon: pid %d unable to unmap sampling buffer "
752
                       "@0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
753
        }
754
 
755
        DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n",
756
                task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags));
757
 
758
        return 0;
759
}
760
 
761
static pfm_context_t *
762
pfm_context_alloc(void)
763
{
764
        pfm_context_t *ctx;
765
 
766
        /* allocate context descriptor */
767
        ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
768
        if (ctx) memset(ctx, 0, sizeof(pfm_context_t));
769
 
770
        return ctx;
771
}
772
 
773
static void
774
pfm_context_free(pfm_context_t *ctx)
775
{
776
        if (ctx) {
777
                DBprintk(("kill tasklet for ctx %p\n", ctx));
778
 
779
                tasklet_kill(&ctx->ctx_tasklet);
780
 
781
                DBprintk(("free ctx @%p\n", ctx));
782
                kfree(ctx);
783
        }
784
}
785
 
786
static int
787
pfm_remap_buffer(unsigned long buf, unsigned long addr, unsigned long size)
788
{
789
        unsigned long page;
790
 
791
        DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
792
 
793
        while (size > 0) {
794
                page = pfm_kvirt_to_pa(buf);
795
 
796
                if (remap_page_range(addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
797
 
798
                addr  += PAGE_SIZE;
799
                buf   += PAGE_SIZE;
800
                size  -= PAGE_SIZE;
801
        }
802
        return 0;
803
}
804
 
805
/*
806
 * counts the number of PMDS to save per entry.
807
 * This code is generic enough to accomodate more than 64 PMDS when they become available
808
 */
809
static unsigned long
810
pfm_smpl_entry_size(unsigned long *which, unsigned long size)
811
{
812
        unsigned long res = 0;
813
        int i;
814
 
815
        for (i=0; i < size; i++, which++) res += hweight64(*which);
816
 
817
        DBprintk(("weight=%ld\n", res));
818
 
819
        return res;
820
}
821
 
822
/*
823
 * Allocates the sampling buffer and remaps it into caller's address space
824
 */
825
static int
826
pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries,
827
                      void **user_vaddr)
828
{
829
        struct mm_struct *mm = current->mm;
830
        struct vm_area_struct *vma = NULL;
831
        unsigned long size, regcount;
832
        void *smpl_buf;
833
        pfm_smpl_buffer_desc_t *psb;
834
 
835
 
836
        /* note that regcount might be 0, in this case only the header for each
837
         * entry will be recorded.
838
         */
839
        regcount = pfm_smpl_entry_size(which_pmds, 1);
840
 
841
        if ((sizeof(perfmon_smpl_hdr_t)+ entries*sizeof(perfmon_smpl_entry_t)) <= entries) {
842
                DBprintk(("requested entries %lu is too big\n", entries));
843
                return -EINVAL;
844
        }
845
 
846
        /*
847
         * 1 buffer hdr and for each entry a header + regcount PMDs to save
848
         */
849
        size = PAGE_ALIGN(  sizeof(perfmon_smpl_hdr_t)
850
                          + entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64)));
851
 
852
        DBprintk(("sampling buffer size=%lu bytes\n", size));
853
 
854
        /*
855
         * check requested size to avoid Denial-of-service attacks
856
         * XXX: may have to refine this test
857
         * Check against address space limit.
858
         *
859
         * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur)
860
         *      return -ENOMEM;
861
         */
862
        if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
863
 
864
        /*
865
         * We do the easy to undo allocations first.
866
         *
867
         * pfm_rvmalloc(), clears the buffer, so there is no leak
868
         */
869
        smpl_buf = pfm_rvmalloc(size);
870
        if (smpl_buf == NULL) {
871
                DBprintk(("Can't allocate sampling buffer\n"));
872
                return -ENOMEM;
873
        }
874
 
875
        DBprintk(("smpl_buf @%p\n", smpl_buf));
876
 
877
        /* allocate sampling buffer descriptor now */
878
        psb = kmalloc(sizeof(*psb), GFP_KERNEL);
879
        if (psb == NULL) {
880
                DBprintk(("Can't allocate sampling buffer descriptor\n"));
881
                goto error_kmalloc;
882
        }
883
 
884
        /* allocate vma */
885
        vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
886
        if (!vma) {
887
                DBprintk(("Cannot allocate vma\n"));
888
                goto error_kmem;
889
        }
890
        /*
891
         * partially initialize the vma for the sampling buffer
892
         *
893
         * The VM_DONTCOPY flag is very important as it ensures that the mapping
894
         * will never be inherited for any child process (via fork()) which is always
895
         * what we want.
896
         */
897
        vma->vm_mm           = mm;
898
        vma->vm_flags        = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY;
899
        vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
900
        vma->vm_ops          = &pfm_vm_ops; /* necesarry to get the close() callback */
901
        vma->vm_pgoff        = 0;
902
        vma->vm_file         = NULL;
903
        vma->vm_raend        = 0;
904
        vma->vm_private_data = psb;     /* information needed by the pfm_vm_close() function */
905
 
906
        /*
907
         * Now we have everything we need and we can initialize
908
         * and connect all the data structures
909
         */
910
 
911
        psb->psb_hdr     = smpl_buf;
912
        psb->psb_addr    = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */
913
        psb->psb_size    = size; /* aligned size */
914
        psb->psb_index   = 0;
915
        psb->psb_entries = entries;
916
        psb->psb_refcnt  = 1;
917
        psb->psb_flags   = PSB_HAS_VMA;
918
 
919
        spin_lock_init(&psb->psb_lock);
920
 
921
        /*
922
         * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and
923
         * multitask monitoring.
924
         */
925
        psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64);
926
 
927
        DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p refcnt=%lu psb_flags=0x%x\n",
928
                  (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr,
929
                  (void *)psb->psb_addr, psb->psb_refcnt, psb->psb_flags));
930
 
931
        /* initialize some of the fields of user visible buffer header */
932
        psb->psb_hdr->hdr_version    = PFM_SMPL_VERSION;
933
        psb->psb_hdr->hdr_entry_size = psb->psb_entry_size;
934
        psb->psb_hdr->hdr_pmds[0]    = which_pmds[0];
935
 
936
        /*
937
         * Let's do the difficult operations next.
938
         *
939
         * now we atomically find some area in the address space and
940
         * remap the buffer in it.
941
         */
942
        down_write(&current->mm->mmap_sem);
943
 
944
 
945
        /* find some free area in address space, must have mmap sem held */
946
        vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS);
947
        if (vma->vm_start == 0UL) {
948
                DBprintk(("Cannot find unmapped area for size %ld\n", size));
949
                up_write(&current->mm->mmap_sem);
950
                goto error;
951
        }
952
        vma->vm_end = vma->vm_start + size;
953
 
954
        DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start));
955
 
956
        /* can only be applied to current, need to have the mm semaphore held when called */
957
        if (pfm_remap_buffer((unsigned long)smpl_buf, vma->vm_start, size)) {
958
                DBprintk(("Can't remap buffer\n"));
959
                up_write(&current->mm->mmap_sem);
960
                goto error;
961
        }
962
 
963
        /*
964
         * now insert the vma in the vm list for the process, must be
965
         * done with mmap lock held
966
         */
967
        insert_vm_struct(mm, vma);
968
 
969
        mm->total_vm  += size >> PAGE_SHIFT;
970
 
971
        up_write(&current->mm->mmap_sem);
972
 
973
        /* store which PMDS to record */
974
        ctx->ctx_smpl_regs[0] = which_pmds[0];
975
 
976
 
977
        /* link to perfmon context */
978
        ctx->ctx_psb        = psb;
979
 
980
        /*
981
         * keep track of user level virtual address
982
         */
983
        ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start;
984
 
985
        return 0;
986
 
987
error:
988
        kmem_cache_free(vm_area_cachep, vma);
989
error_kmem:
990
        kfree(psb);
991
error_kmalloc:
992
        pfm_rvfree(smpl_buf, size);
993
        return -ENOMEM;
994
}
995
 
996
static int
997
pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
998
{
999
        unsigned long m, undo_mask;
1000
        unsigned int n, i;
1001
 
1002
        /*
1003
         * validy checks on cpu_mask have been done upstream
1004
         */
1005
        LOCK_PFS();
1006
 
1007
        if (is_syswide) {
1008
                /*
1009
                 * cannot mix system wide and per-task sessions
1010
                 */
1011
                if (pfm_sessions.pfs_task_sessions > 0UL) {
1012
                        DBprintk(("system wide not possible, %u conflicting task_sessions\n",
1013
                                pfm_sessions.pfs_task_sessions));
1014
                        goto abort;
1015
                }
1016
 
1017
                m = cpu_mask; undo_mask = 0UL; n = 0;
1018
                DBprintk(("cpu_mask=0x%lx\n", cpu_mask));
1019
                for(i=0; m; i++, m>>=1) {
1020
 
1021
                        if ((m & 0x1) == 0UL) continue;
1022
 
1023
                        if (pfm_sessions.pfs_sys_session[i]) goto undo;
1024
 
1025
                        DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id()));
1026
 
1027
                        pfm_sessions.pfs_sys_session[i] = task;
1028
                        undo_mask |= 1UL << i;
1029
                        n++;
1030
                }
1031
                pfm_sessions.pfs_sys_sessions += n;
1032
        } else {
1033
                if (pfm_sessions.pfs_sys_sessions) goto abort;
1034
                pfm_sessions.pfs_task_sessions++;
1035
        }
1036
        UNLOCK_PFS();
1037
        return 0;
1038
undo:
1039
        DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",
1040
                pfm_sessions.pfs_sys_session[i]->pid, i));
1041
 
1042
        for(i=0; undo_mask; i++, undo_mask >>=1) {
1043
                pfm_sessions.pfs_sys_session[i] = NULL;
1044
        }
1045
abort:
1046
        UNLOCK_PFS();
1047
 
1048
        return -EBUSY;
1049
 
1050
}
1051
 
1052
static int
1053
pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
1054
{
1055
        pfm_context_t *ctx;
1056
        unsigned long m;
1057
        unsigned int n, i;
1058
 
1059
        ctx = task ? task->thread.pfm_context : NULL;
1060
 
1061
        /*
1062
         * validy checks on cpu_mask have been done upstream
1063
         */
1064
        LOCK_PFS();
1065
 
1066
        DBprintk(("[%d] sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu_mask=0x%lx\n",
1067
                task->pid,
1068
                pfm_sessions.pfs_sys_sessions,
1069
                pfm_sessions.pfs_task_sessions,
1070
                pfm_sessions.pfs_sys_use_dbregs,
1071
                is_syswide,
1072
                cpu_mask));
1073
 
1074
 
1075
        if (is_syswide) {
1076
                m = cpu_mask; n = 0;
1077
                for(i=0; m; i++, m>>=1) {
1078
                        if ((m & 0x1) == 0UL) continue;
1079
                        pfm_sessions.pfs_sys_session[i] = NULL;
1080
                        n++;
1081
                }
1082
                /*
1083
                 * would not work with perfmon+more than one bit in cpu_mask
1084
                 */
1085
                if (ctx && ctx->ctx_fl_using_dbreg) {
1086
                        if (pfm_sessions.pfs_sys_use_dbregs == 0) {
1087
                                printk(KERN_DEBUG "perfmon: invalid release for [%d] "
1088
                                       "sys_use_dbregs=0\n", task->pid);
1089
                        } else {
1090
                                pfm_sessions.pfs_sys_use_dbregs--;
1091
                        }
1092
                }
1093
                pfm_sessions.pfs_sys_sessions -= n;
1094
 
1095
                DBprintk(("CPU%d sys_sessions=%u\n",
1096
                        smp_processor_id(), pfm_sessions.pfs_sys_sessions));
1097
        } else {
1098
                pfm_sessions.pfs_task_sessions--;
1099
                DBprintk(("[%d] task_sessions=%u\n",
1100
                        task->pid, pfm_sessions.pfs_task_sessions));
1101
        }
1102
 
1103
        UNLOCK_PFS();
1104
 
1105
        return 0;
1106
}
1107
 
1108
static void
1109
pfm_send_notification_signal(unsigned long data)
1110
{
1111
        pfm_context_t *ctx = (pfm_context_t *)data;
1112
        struct siginfo si;
1113
        int ret;
1114
 
1115
        DBprintk(("[%d] tasklet called\n", current->pid));
1116
 
1117
        LOCK_CTX(ctx);
1118
 
1119
        if (ctx->ctx_notify_task == NULL) {
1120
                printk(KERN_INFO "perfmon: tasklet lost notify_task\n");
1121
                goto nothing_to_do;
1122
        }
1123
        /* no leak */
1124
        memset(&si,0, sizeof(si));
1125
 
1126
        si.si_addr        = NULL;
1127
        si.si_pid         = current->pid; /* irrelevant */
1128
        si.si_signo       = SIGPROF;
1129
        si.si_code        = PROF_OVFL; /* indicates a perfmon SIGPROF signal */
1130
        si.si_pfm_ovfl[0] = ctx->ctx_ovfl_regs[0];
1131
 
1132
        if (ctx->ctx_notify_task != current) read_lock(&tasklist_lock);
1133
 
1134
        DBprintk_ovfl(("[%d] tasklet sending notification to [%d]\n", current->pid, ctx->ctx_notify_task->pid));
1135
 
1136
        ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
1137
        if (ret != 0) printk(KERN_ERR "send_sig_info(process %d, SIGPROF)=%d\n", ctx->ctx_notify_task->pid, ret);
1138
 
1139
        /*
1140
         * now undo the protections in order
1141
         */
1142
        if (ctx->ctx_notify_task != current) read_unlock(&tasklist_lock);
1143
nothing_to_do:
1144
        UNLOCK_CTX(ctx);
1145
}
1146
 
1147
/*
1148
 * XXX: do something better here
1149
 */
1150
static int
1151
pfm_bad_permissions(struct task_struct *task)
1152
{
1153
        /* stolen from bad_signal() */
1154
        return (current->session != task->session)
1155
            && (current->euid ^ task->suid) && (current->euid ^ task->uid)
1156
            && (current->uid ^ task->suid) && (current->uid ^ task->uid);
1157
}
1158
 
1159
static int
1160
pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
1161
{
1162
        unsigned long smpl_pmds = pfx->ctx_smpl_regs[0];
1163
        int ctx_flags;
1164
        int cpu;
1165
 
1166
        /* valid signal */
1167
 
1168
        /* cannot send to process 1, 0 means do not notify */
1169
        if (pfx->ctx_notify_pid == 1) {
1170
                DBprintk(("invalid notify_pid %d\n", pfx->ctx_notify_pid));
1171
                return -EINVAL;
1172
        }
1173
        ctx_flags = pfx->ctx_flags;
1174
 
1175
        if ((ctx_flags & PFM_FL_INHERIT_MASK) == (PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)) {
1176
                DBprintk(("invalid inherit mask 0x%x\n",ctx_flags & PFM_FL_INHERIT_MASK));
1177
                return -EINVAL;
1178
        }
1179
 
1180
        if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
1181
                DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask));
1182
                /*
1183
                 * cannot block in this mode
1184
                 */
1185
                if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
1186
                        DBprintk(("cannot use blocking mode when in system wide monitoring\n"));
1187
                        return -EINVAL;
1188
                }
1189
                /*
1190
                 * must only have one bit set in the CPU mask
1191
                 */
1192
                if (hweight64(pfx->ctx_cpu_mask) != 1UL) {
1193
                        DBprintk(("invalid CPU mask specified\n"));
1194
                        return -EINVAL;
1195
                }
1196
                /*
1197
                 * and it must be a valid CPU
1198
                 */
1199
                cpu = ffz(~pfx->ctx_cpu_mask);
1200
                if (cpu_online(cpu) == 0) {
1201
                        DBprintk(("CPU%d is not online\n", cpu));
1202
                        return -EINVAL;
1203
                }
1204
                /*
1205
                 * check for pre-existing pinning, if conflicting reject
1206
                 */
1207
                if (task->cpus_allowed != ~0UL && (task->cpus_allowed & (1UL<<cpu)) == 0) {
1208
                        DBprintk(("[%d] pinned on 0x%lx, mask for CPU%d \n", task->pid,
1209
                                task->cpus_allowed, cpu));
1210
                        return -EINVAL;
1211
                }
1212
 
1213
        } else {
1214
                /*
1215
                 * must provide a target for the signal in blocking mode even when
1216
                 * no counter is configured with PFM_FL_REG_OVFL_NOTIFY
1217
                 */
1218
                if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) {
1219
                        DBprintk(("must have notify_pid when blocking for [%d]\n", task->pid));
1220
                        return -EINVAL;
1221
                }
1222
#if 0
1223
                if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == task->pid) {
1224
                        DBprintk(("cannot notify self when blocking for [%d]\n", task->pid));
1225
                        return -EINVAL;
1226
                }
1227
#endif
1228
        }
1229
        /* verify validity of smpl_regs */
1230
        if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) {
1231
                DBprintk(("invalid smpl_regs 0x%lx\n", smpl_pmds));
1232
                return -EINVAL;
1233
        }
1234
        /* probably more to add here */
1235
 
1236
        return 0;
1237
}
1238
 
1239
static int
1240
pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int count,
1241
                   struct pt_regs *regs)
1242
{
1243
        pfarg_context_t tmp;
1244
        void *uaddr = NULL;
1245
        int ret;
1246
        int ctx_flags;
1247
        pid_t notify_pid;
1248
 
1249
        /* a context has already been defined */
1250
        if (ctx) return -EBUSY;
1251
 
1252
        /*
1253
         * not yet supported
1254
         */
1255
        if (task != current) return -EINVAL;
1256
 
1257
        if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1258
 
1259
        ret = pfx_is_sane(task, &tmp);
1260
        if (ret < 0) return ret;
1261
 
1262
        ctx_flags = tmp.ctx_flags;
1263
 
1264
        ret = pfm_reserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE, tmp.ctx_cpu_mask);
1265
        if (ret) goto abort;
1266
 
1267
        ret = -ENOMEM;
1268
 
1269
        ctx = pfm_context_alloc();
1270
        if (!ctx) goto error;
1271
 
1272
        /* record the creator (important for inheritance) */
1273
        ctx->ctx_owner = current;
1274
 
1275
        notify_pid = tmp.ctx_notify_pid;
1276
 
1277
        spin_lock_init(&ctx->ctx_lock);
1278
 
1279
        if (notify_pid == current->pid) {
1280
 
1281
                ctx->ctx_notify_task = current;
1282
                task->thread.pfm_context = ctx;
1283
 
1284
        } else if (notify_pid!=0) {
1285
                struct task_struct *notify_task;
1286
 
1287
                read_lock(&tasklist_lock);
1288
 
1289
                notify_task = find_task_by_pid(notify_pid);
1290
 
1291
                if (notify_task) {
1292
 
1293
                        ret = -EPERM;
1294
 
1295
                        /*
1296
                         * check if we can send this task a signal
1297
                         */
1298
                        if (pfm_bad_permissions(notify_task)) {
1299
                                read_unlock(&tasklist_lock);
1300
                                goto buffer_error;
1301
                        }
1302
 
1303
                        /*
1304
                         * make visible
1305
                         * must be done inside critical section
1306
                         *
1307
                         * if the initialization does not go through it is still
1308
                         * okay because child will do the scan for nothing which
1309
                         * won't hurt.
1310
                         */
1311
                        task->thread.pfm_context = ctx;
1312
 
1313
                        /*
1314
                         * will cause task to check on exit for monitored
1315
                         * processes that would notify it. see release_thread()
1316
                         * Note: the scan MUST be done in release thread, once the
1317
                         * task has been detached from the tasklist otherwise you are
1318
                         * exposed to race conditions.
1319
                         */
1320
                        atomic_add(1, &ctx->ctx_notify_task->thread.pfm_notifiers_check);
1321
 
1322
                        ctx->ctx_notify_task = notify_task;
1323
                }
1324
                read_unlock(&tasklist_lock);
1325
        }
1326
 
1327
        /*
1328
         * notification process does not exist
1329
         */
1330
        if (notify_pid != 0 && ctx->ctx_notify_task == NULL) {
1331
                ret = -EINVAL;
1332
                goto buffer_error;
1333
        }
1334
 
1335
        if (tmp.ctx_smpl_entries) {
1336
                DBprintk(("sampling entries=%lu\n",tmp.ctx_smpl_entries));
1337
 
1338
                ret = pfm_smpl_buffer_alloc(ctx, tmp.ctx_smpl_regs,
1339
                                                 tmp.ctx_smpl_entries, &uaddr);
1340
                if (ret<0) goto buffer_error;
1341
 
1342
                tmp.ctx_smpl_vaddr = uaddr;
1343
        }
1344
        /* initialization of context's flags */
1345
        ctx->ctx_fl_inherit   = ctx_flags & PFM_FL_INHERIT_MASK;
1346
        ctx->ctx_fl_block     = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
1347
        ctx->ctx_fl_system    = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
1348
        ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
1349
        ctx->ctx_fl_unsecure  = (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
1350
        ctx->ctx_fl_frozen    = 0;
1351
        /*
1352
         * setting this flag to 0 here means, that the creator or the task that the
1353
         * context is being attached are granted access. Given that a context can only
1354
         * be created for the calling process this, in effect only allows the creator
1355
         * to access the context. See pfm_protect() for more.
1356
         */
1357
        ctx->ctx_fl_protected = 0;
1358
 
1359
        /* for system wide mode only (only 1 bit set) */
1360
        ctx->ctx_cpu = ffz(~tmp.ctx_cpu_mask);
1361
 
1362
        /* SMP only, means no CPU */
1363
        ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
1364
        SET_LAST_CPU(ctx, -1);
1365
 
1366
        sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
1367
 
1368
        /*
1369
         * initialize tasklet for signal notifications
1370
         *
1371
         * ALL signal-based (or any notification using data structures
1372
         * external to perfmon) MUST use tasklets to avoid lock contentions
1373
         * when a signal has to be sent for overflow interrupt handler.
1374
         */
1375
        tasklet_init(&ctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)ctx);
1376
 
1377
        if (__copy_to_user(req, &tmp, sizeof(tmp))) {
1378
                ret = -EFAULT;
1379
                goto buffer_error;
1380
        }
1381
 
1382
        DBprintk(("context=%p, pid=%d notify_task=%p\n",
1383
                        (void *)ctx, task->pid, ctx->ctx_notify_task));
1384
 
1385
        DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d unsecure=%d\n",
1386
                        (void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit,
1387
                        ctx->ctx_fl_block, ctx->ctx_fl_system,
1388
                        ctx->ctx_fl_excl_idle,
1389
                        ctx->ctx_fl_unsecure));
1390
 
1391
        /*
1392
         * when no notification is required, we can make this visible at the last moment
1393
         */
1394
        if (notify_pid == 0) task->thread.pfm_context = ctx;
1395
        /*
1396
         * pin task to CPU and force reschedule on exit to ensure
1397
         * that when back to user level the task runs on the designated
1398
         * CPU.
1399
         */
1400
        if (ctx->ctx_fl_system) {
1401
                ctx->ctx_saved_cpus_allowed = task->cpus_allowed;
1402
                task->cpus_allowed = tmp.ctx_cpu_mask;
1403
                task->need_resched = 1;
1404
                DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid, task->cpus_allowed));
1405
        }
1406
 
1407
        return 0;
1408
 
1409
buffer_error:
1410
        pfm_context_free(ctx);
1411
error:
1412
        pfm_unreserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE , tmp.ctx_cpu_mask);
1413
abort:
1414
        /* make sure we don't leave anything behind */
1415
        task->thread.pfm_context = NULL;
1416
 
1417
        return ret;
1418
}
1419
 
1420
static inline unsigned long
1421
pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
1422
{
1423
        unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
1424
        unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
1425
        extern unsigned long carta_random32 (unsigned long seed);
1426
 
1427
        if (reg->flags & PFM_REGFL_RANDOM) {
1428
                new_seed = carta_random32(old_seed);
1429
                val -= (old_seed & mask);       /* counter values are negative numbers! */
1430
                if ((mask >> 32) != 0)
1431
                        /* construct a full 64-bit random value: */
1432
                        new_seed |= carta_random32(old_seed >> 32) << 32;
1433
                reg->seed = new_seed;
1434
        }
1435
        reg->lval = val;
1436
        return val;
1437
}
1438
 
1439
static void
1440
pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
1441
{
1442
        unsigned long mask = ovfl_regs[0];
1443
        unsigned long reset_others = 0UL;
1444
        unsigned long val;
1445
        int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
1446
 
1447
        /*
1448
         * now restore reset value on sampling overflowed counters
1449
         */
1450
        mask >>= PMU_FIRST_COUNTER;
1451
        for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
1452
                if (mask & 0x1) {
1453
                        val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
1454
                        reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
1455
 
1456
                        DBprintk_ovfl(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
1457
                                  is_long_reset ? "long" : "short", i, val));
1458
 
1459
                        /* upper part is ignored on rval */
1460
                        pfm_write_soft_counter(ctx, i, val);
1461
                }
1462
        }
1463
 
1464
        /*
1465
         * Now take care of resetting the other registers
1466
         */
1467
        for(i = 0; reset_others; i++, reset_others >>= 1) {
1468
 
1469
                if ((reset_others & 0x1) == 0) continue;
1470
 
1471
                val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
1472
 
1473
                if (PMD_IS_COUNTING(i)) {
1474
                        pfm_write_soft_counter(ctx, i, val);
1475
                } else {
1476
                        ia64_set_pmd(i, val);
1477
                }
1478
                DBprintk_ovfl(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
1479
                          is_long_reset ? "long" : "short", i, val));
1480
        }
1481
        ia64_srlz_d();
1482
}
1483
 
1484
static int
1485
pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1486
{
1487
        struct thread_struct *th = &task->thread;
1488
        pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
1489
        unsigned long value, reset_pmds;
1490
        unsigned int cnum, reg_flags, flags;
1491
        int is_monitor, is_counting;
1492
        int i, ret = -EINVAL;
1493
#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
1494
 
1495
        /* we don't quite support this right now */
1496
        if (task != current) return -EINVAL;
1497
 
1498
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1499
 
1500
 
1501
        /* XXX: ctx locking may be required here */
1502
 
1503
        for (i = 0; i < count; i++, req++) {
1504
 
1505
                if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1506
 
1507
                cnum       = tmp.reg_num;
1508
                reg_flags  = tmp.reg_flags;
1509
                value      = tmp.reg_value;
1510
                reset_pmds = tmp.reg_reset_pmds[0];
1511
                flags      = 0;
1512
 
1513
                is_counting = PMC_IS_COUNTING(cnum);
1514
                is_monitor  = PMC_IS_MONITOR(cnum);
1515
 
1516
                /*
1517
                 * we reject all non implemented PMC as well
1518
                 * as attempts to modify PMC[0-3] which are used
1519
                 * as status registers by the PMU
1520
                 */
1521
                if (!PMC_IS_IMPL(cnum) || cnum < 4) {
1522
                        DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum));
1523
                        goto error;
1524
                }
1525
                /*
1526
                 * If the PMC is a monitor, then if the value is not the default:
1527
                 *      - system-wide session: PMCx.pm=1 (privileged monitor)
1528
                 *      - per-task           : PMCx.pm=0 (user monitor)
1529
                 */
1530
                if ((is_monitor || is_counting) && value != PMC_DFL_VAL(cnum) && PFM_CHECK_PMC_PM(ctx, cnum, value)) {
1531
                        DBprintk(("pmc%u pmc_pm=%ld fl_system=%d\n",
1532
                                cnum,
1533
                                PMC_PM(cnum, value),
1534
                                ctx->ctx_fl_system));
1535
                        goto error;
1536
                }
1537
 
1538
                if (is_counting) {
1539
                        pfm_monitor_t *p = (pfm_monitor_t *)&value;
1540
                        /*
1541
                         * enforce generation of overflow interrupt. Necessary on all
1542
                         * CPUs.
1543
                         */
1544
                        p->pmc_oi = 1;
1545
 
1546
                        if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
1547
                                /*
1548
                                 * must have a target for the signal
1549
                                 */
1550
                                if (ctx->ctx_notify_task == NULL) {
1551
                                        DBprintk(("cannot set ovfl_notify: no notify_task\n"));
1552
                                        goto error;
1553
                                }
1554
                                flags |= PFM_REGFL_OVFL_NOTIFY;
1555
                        }
1556
 
1557
                        if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
1558
 
1559
                        /* verify validity of reset_pmds */
1560
                        if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) {
1561
                                DBprintk(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
1562
                                goto error;
1563
                        }
1564
                } else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
1565
                                DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum));
1566
                                goto error;
1567
                }
1568
 
1569
                /*
1570
                 * execute write checker, if any
1571
                 */
1572
                if (PMC_WR_FUNC(cnum)) {
1573
                        ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs);
1574
                        if (ret) goto error;
1575
                        ret = -EINVAL;
1576
                }
1577
 
1578
                /*
1579
                 * no error on this register
1580
                 */
1581
                PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
1582
 
1583
                /*
1584
                 * update register return value, abort all if problem during copy.
1585
                 * we only modify the reg_flags field. no check mode is fine because
1586
                 * access has been verified upfront in sys_perfmonctl().
1587
                 *
1588
                 * If this fails, then the software state is not modified
1589
                 */
1590
                if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
1591
 
1592
                /*
1593
                 * Now we commit the changes to the software state
1594
                 */
1595
 
1596
                /*
1597
                 * full flag update each time a register is programmed
1598
                 */
1599
                ctx->ctx_soft_pmds[cnum].flags = flags;
1600
 
1601
                if (is_counting) {
1602
                        ctx->ctx_soft_pmds[cnum].reset_pmds[0] = reset_pmds;
1603
 
1604
                        /* mark all PMDS to be accessed as used */
1605
                        CTX_USED_PMD(ctx, reset_pmds);
1606
                }
1607
 
1608
                /*
1609
                 * Needed in case the user does not initialize the equivalent
1610
                 * PMD. Clearing is done in reset_pmu() so there is no possible
1611
                 * leak here.
1612
                 */
1613
                CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]);
1614
 
1615
                /*
1616
                 * keep copy the pmc, used for register reload
1617
                 */
1618
                th->pmc[cnum] = value;
1619
 
1620
                ia64_set_pmc(cnum, value);
1621
 
1622
                DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n",
1623
                          task->pid, cnum, value,
1624
                          ctx->ctx_soft_pmds[cnum].flags,
1625
                          ctx->ctx_used_pmds[0]));
1626
 
1627
        }
1628
 
1629
        return 0;
1630
 
1631
error:
1632
        PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
1633
 
1634
        if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
1635
 
1636
        DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret));
1637
 
1638
        return ret;
1639
}
1640
 
1641
static int
1642
pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1643
{
1644
        pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
1645
        unsigned long value, hw_value;
1646
        unsigned int cnum;
1647
        int i;
1648
        int ret = -EINVAL;
1649
 
1650
        /* we don't quite support this right now */
1651
        if (task != current) return -EINVAL;
1652
 
1653
        /*
1654
         * Cannot do anything before PMU is enabled
1655
         */
1656
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1657
 
1658
        /* XXX: ctx locking may be required here */
1659
 
1660
 
1661
        for (i = 0; i < count; i++, req++) {
1662
 
1663
                if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1664
 
1665
                cnum  = tmp.reg_num;
1666
                value = tmp.reg_value;
1667
 
1668
                if (!PMD_IS_IMPL(cnum)) {
1669
                        DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum));
1670
                        goto abort_mission;
1671
                }
1672
 
1673
                /*
1674
                 * execute write checker, if any
1675
                 */
1676
                if (PMD_WR_FUNC(cnum)) {
1677
                        unsigned long v = value;
1678
                        ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs);
1679
                        if (ret) goto abort_mission;
1680
                        value = v;
1681
                        ret = -EINVAL;
1682
                }
1683
                hw_value = value;
1684
                /*
1685
                 * no error on this register
1686
                 */
1687
                PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
1688
 
1689
                if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
1690
 
1691
                /*
1692
                 * now commit changes to software state
1693
                 */
1694
 
1695
                /* update virtualized (64bits) counter */
1696
                if (PMD_IS_COUNTING(cnum)) {
1697
                        ctx->ctx_soft_pmds[cnum].lval = value;
1698
                        ctx->ctx_soft_pmds[cnum].val  = value & ~pmu_conf.ovfl_val;
1699
 
1700
                        hw_value = value & pmu_conf.ovfl_val;
1701
 
1702
                        ctx->ctx_soft_pmds[cnum].long_reset  = tmp.reg_long_reset;
1703
                        ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
1704
 
1705
                        ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed;
1706
                        ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask;
1707
                }
1708
 
1709
                /* keep track of what we use */
1710
                CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
1711
 
1712
                /* mark this register as used as well */
1713
                CTX_USED_PMD(ctx, RDEP(cnum));
1714
 
1715
                /* writes to unimplemented part is ignored, so this is safe */
1716
                ia64_set_pmd(cnum, hw_value);
1717
 
1718
                /* to go away */
1719
                ia64_srlz_d();
1720
 
1721
                DBprintk(("[%d] pmd[%u]: value=0x%lx hw_value=0x%lx soft_pmd=0x%lx  short_reset=0x%lx "
1722
                          "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx psr=%d\n",
1723
                                task->pid, cnum,
1724
                                value, hw_value,
1725
                                ctx->ctx_soft_pmds[cnum].val,
1726
                                ctx->ctx_soft_pmds[cnum].short_reset,
1727
                                ctx->ctx_soft_pmds[cnum].long_reset,
1728
                                ia64_get_pmd(cnum) & pmu_conf.ovfl_val,
1729
                                PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
1730
                                ctx->ctx_used_pmds[0],
1731
                                ctx->ctx_soft_pmds[cnum].reset_pmds[0], ia64_psr(regs)->sp));
1732
        }
1733
 
1734
        return 0;
1735
 
1736
abort_mission:
1737
        /*
1738
         * for now, we have only one possibility for error
1739
         */
1740
        PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
1741
 
1742
        /*
1743
         * we change the return value to EFAULT in case we cannot write register return code.
1744
         * The caller first must correct this error, then a resubmission of the request will
1745
         * eventually yield the EINVAL.
1746
         */
1747
        if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
1748
 
1749
        DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret));
1750
 
1751
        return ret;
1752
}
1753
 
1754
static int
1755
pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1756
{
1757
        struct thread_struct *th = &task->thread;
1758
        unsigned long val, lval;
1759
        pfarg_reg_t *req = (pfarg_reg_t *)arg;
1760
        unsigned int cnum, reg_flags = 0;
1761
        int i, ret = 0;
1762
#if __GNUC__ < 3
1763
        int foo;
1764
#endif
1765
 
1766
        if (!CTX_IS_ENABLED(ctx)) {
1767
                DBprintk(("context for [%d] is disabled\n", task->pid));
1768
                return -EINVAL;
1769
        }
1770
 
1771
        /*
1772
         * XXX: MUST MAKE SURE WE DON"T HAVE ANY PENDING OVERFLOW BEFORE READING
1773
         * This is required when the monitoring has been stoppped by user or kernel.
1774
         * If it is still going on, then that's fine because we a re not guaranteed
1775
         * to return an accurate value in this case.
1776
         */
1777
 
1778
        /* XXX: ctx locking may be required here */
1779
 
1780
        /*
1781
         * should we need to access the PMU, serialization is needed
1782
         */
1783
        ia64_srlz_d();
1784
 
1785
        for (i = 0; i < count; i++, req++) {
1786
 
1787
#if __GNUC__ < 3
1788
                foo = __get_user(cnum, &req->reg_num);
1789
                if (foo) return -EFAULT;
1790
                foo = __get_user(reg_flags, &req->reg_flags);
1791
                if (foo) return -EFAULT;
1792
#else
1793
                if (__get_user(cnum, &req->reg_num)) return -EFAULT;
1794
                if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT;
1795
#endif
1796
                lval = 0UL;
1797
 
1798
                if (!PMD_IS_IMPL(cnum)) goto abort_mission;
1799
                /*
1800
                 * we can only read the register that we use. That includes
1801
                 * the one we explicitely initialize AND the one we want included
1802
                 * in the sampling buffer (smpl_regs).
1803
                 *
1804
                 * Having this restriction allows optimization in the ctxsw routine
1805
                 * without compromising security (leaks)
1806
                 */
1807
                if (!CTX_IS_USED_PMD(ctx, cnum)) goto abort_mission;
1808
 
1809
                /*
1810
                 * we can access the registers directly only when task
1811
                 * is the OWNER of the local PMU. In SMP, this can
1812
                 * happen only when task == current. In addition
1813
                 * this can happen when task != currrent but
1814
                 * only in UP mode.
1815
                 */
1816
                if (task == PMU_OWNER()) {
1817
                        val = ia64_get_pmd(cnum);
1818
                        DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
1819
                } else {
1820
                        /* context has been saved */
1821
                        val = th->pmd[cnum];
1822
                }
1823
 
1824
                if (PMD_IS_COUNTING(cnum)) {
1825
                        /*
1826
                         * XXX: need to check for overflow
1827
                         */
1828
                        val &= pmu_conf.ovfl_val;
1829
                        val += ctx->ctx_soft_pmds[cnum].val;
1830
 
1831
                        lval = ctx->ctx_soft_pmds[cnum].lval;
1832
                }
1833
 
1834
                /*
1835
                 * execute read checker, if any
1836
                 */
1837
                if (PMD_RD_FUNC(cnum)) {
1838
                        unsigned long v = val;
1839
                        ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs);
1840
                        val = v;
1841
                }
1842
 
1843
                PFM_REG_RETFLAG_SET(reg_flags, ret);
1844
 
1845
                DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n",
1846
                                        cnum, ret, val, ia64_get_pmc(cnum)));
1847
 
1848
                /*
1849
                 * update register return value, abort all if problem during copy.
1850
                 * we only modify the reg_flags field. no check mode is fine because
1851
                 * access has been verified upfront in sys_perfmonctl().
1852
                 */
1853
                if (__put_user(cnum, &req->reg_num)) return -EFAULT;
1854
                if (__put_user(val, &req->reg_value)) return -EFAULT;
1855
                if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT;
1856
                if (__put_user(lval, &req->reg_last_reset_value)) return -EFAULT;
1857
        }
1858
 
1859
        return 0;
1860
 
1861
abort_mission:
1862
        PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL);
1863
        /*
1864
         * XXX: if this fails, we stick with the original failure, flag not updated!
1865
         */
1866
        __put_user(reg_flags, &req->reg_flags);
1867
 
1868
        return -EINVAL;
1869
}
1870
 
1871
#ifdef PFM_PMU_USES_DBR
1872
/*
1873
 * Only call this function when a process it trying to
1874
 * write the debug registers (reading is always allowed)
1875
 */
1876
int
1877
pfm_use_debug_registers(struct task_struct *task)
1878
{
1879
        pfm_context_t *ctx = task->thread.pfm_context;
1880
        int ret = 0;
1881
 
1882
        DBprintk(("called for [%d]\n", task->pid));
1883
 
1884
        /*
1885
         * do it only once
1886
         */
1887
        if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
1888
 
1889
        /*
1890
         * Even on SMP, we do not need to use an atomic here because
1891
         * the only way in is via ptrace() and this is possible only when the
1892
         * process is stopped. Even in the case where the ctxsw out is not totally
1893
         * completed by the time we come here, there is no way the 'stopped' process
1894
         * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
1895
         * So this is always safe.
1896
         */
1897
        if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
1898
 
1899
        LOCK_PFS();
1900
 
1901
        /*
1902
         * We cannot allow setting breakpoints when system wide monitoring
1903
         * sessions are using the debug registers.
1904
         */
1905
        if (pfm_sessions.pfs_sys_use_dbregs> 0)
1906
                ret = -1;
1907
        else
1908
                pfm_sessions.pfs_ptrace_use_dbregs++;
1909
 
1910
        DBprintk(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
1911
                  pfm_sessions.pfs_ptrace_use_dbregs,
1912
                  pfm_sessions.pfs_sys_use_dbregs,
1913
                  task->pid, ret));
1914
 
1915
        UNLOCK_PFS();
1916
 
1917
        return ret;
1918
}
1919
 
1920
/*
1921
 * This function is called for every task that exits with the
1922
 * IA64_THREAD_DBG_VALID set. This indicates a task which was
1923
 * able to use the debug registers for debugging purposes via
1924
 * ptrace(). Therefore we know it was not using them for
1925
 * perfmormance monitoring, so we only decrement the number
1926
 * of "ptraced" debug register users to keep the count up to date
1927
 */
1928
int
1929
pfm_release_debug_registers(struct task_struct *task)
1930
{
1931
        int ret;
1932
 
1933
        LOCK_PFS();
1934
        if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
1935
                printk(KERN_DEBUG "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n",
1936
                       task->pid);
1937
                ret = -1;
1938
        }  else {
1939
                pfm_sessions.pfs_ptrace_use_dbregs--;
1940
                ret = 0;
1941
        }
1942
        UNLOCK_PFS();
1943
 
1944
        return ret;
1945
}
1946
#else /* PFM_PMU_USES_DBR is true */
1947
/*
1948
 * in case, the PMU does not use the debug registers, these two functions are nops.
1949
 * The first function is called from arch/ia64/kernel/ptrace.c.
1950
 * The second function is called from arch/ia64/kernel/process.c.
1951
 */
1952
int
1953
pfm_use_debug_registers(struct task_struct *task)
1954
{
1955
        return 0;
1956
}
1957
 
1958
int
1959
pfm_release_debug_registers(struct task_struct *task)
1960
{
1961
        return 0;
1962
}
1963
#endif /* PFM_PMU_USES_DBR */
1964
 
1965
static int
1966
pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
1967
         struct pt_regs *regs)
1968
{
1969
        void *sem = &ctx->ctx_restart_sem;
1970
 
1971
        /*
1972
         * Cannot do anything before PMU is enabled
1973
         */
1974
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1975
 
1976
        if (task == current) {
1977
                DBprintk(("restarting self %d frozen=%d ovfl_regs=0x%lx\n",
1978
                        task->pid,
1979
                        ctx->ctx_fl_frozen,
1980
                        ctx->ctx_ovfl_regs[0]));
1981
 
1982
                pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
1983
 
1984
                ctx->ctx_ovfl_regs[0] = 0UL;
1985
 
1986
                /*
1987
                 * We ignore block/don't block because we never block
1988
                 * for a self-monitoring process.
1989
                 */
1990
                ctx->ctx_fl_frozen = 0;
1991
 
1992
                if (CTX_HAS_SMPL(ctx)) {
1993
                        ctx->ctx_psb->psb_hdr->hdr_count = 0;
1994
                        ctx->ctx_psb->psb_index = 0;
1995
                }
1996
 
1997
                /* simply unfreeze */
1998
                pfm_unfreeze_pmu();
1999
 
2000
                return 0;
2001
        }
2002
        /* restart on another task */
2003
 
2004
        /*
2005
         * if blocking, then post the semaphore.
2006
         * if non-blocking, then we ensure that the task will go into
2007
         * pfm_overflow_must_block() before returning to user mode.
2008
         * We cannot explicitely reset another task, it MUST always
2009
         * be done by the task itself. This works for system wide because
2010
         * the tool that is controlling the session is doing "self-monitoring".
2011
         *
2012
         * XXX: what if the task never goes back to user?
2013
         *
2014
         */
2015
        if (CTX_OVFL_NOBLOCK(ctx) == 0) {
2016
                DBprintk(("unblocking %d \n", task->pid));
2017
                up(sem);
2018
        } else {
2019
                task->thread.pfm_ovfl_block_reset = 1;
2020
        }
2021
#if 0
2022
        /*
2023
         * in case of non blocking mode, then it's just a matter of
2024
         * of reseting the sampling buffer (if any) index. The PMU
2025
         * is already active.
2026
         */
2027
 
2028
        /*
2029
         * must reset the header count first
2030
         */
2031
        if (CTX_HAS_SMPL(ctx)) {
2032
                DBprintk(("resetting sampling indexes for %d \n", task->pid));
2033
                ctx->ctx_psb->psb_hdr->hdr_count = 0;
2034
                ctx->ctx_psb->psb_index = 0;
2035
        }
2036
#endif
2037
        return 0;
2038
}
2039
 
2040
static int
2041
pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2042
         struct pt_regs *regs)
2043
{
2044
        /* we don't quite support this right now */
2045
        if (task != current) return -EINVAL;
2046
 
2047
        /*
2048
         * Cannot do anything before PMU is enabled
2049
         */
2050
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2051
 
2052
        DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
2053
                                current->pid,
2054
                                ctx->ctx_fl_system, PMU_OWNER(),
2055
                                current));
2056
 
2057
        /* simply stop monitoring but not the PMU */
2058
        if (ctx->ctx_fl_system) {
2059
 
2060
                /* disable dcr pp */
2061
                ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
2062
 
2063
                /* stop monitoring */
2064
                pfm_clear_psr_pp();
2065
                ia64_srlz_i();
2066
 
2067
                PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
2068
 
2069
                ia64_psr(regs)->pp = 0;
2070
 
2071
        } else {
2072
 
2073
                /* stop monitoring */
2074
                pfm_clear_psr_up();
2075
                ia64_srlz_i();
2076
 
2077
                /*
2078
                 * clear user level psr.up
2079
                 */
2080
                ia64_psr(regs)->up = 0;
2081
        }
2082
        return 0;
2083
}
2084
 
2085
static int
2086
pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2087
           struct pt_regs *regs)
2088
{
2089
        /* we don't quite support this right now */
2090
        if (task != current) return -EINVAL;
2091
 
2092
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2093
 
2094
        /*
2095
         * stop monitoring, freeze PMU, and save state in context
2096
         * this call will clear IA64_THREAD_PM_VALID for per-task sessions.
2097
         */
2098
        pfm_flush_regs(task);
2099
 
2100
        if (ctx->ctx_fl_system) {
2101
                ia64_psr(regs)->pp = 0;
2102
        } else {
2103
                ia64_psr(regs)->up = 0;
2104
        }
2105
        /*
2106
         * goes back to default behavior: no user level control
2107
         * no need to change live psr.sp because useless at the kernel level
2108
         */
2109
        ia64_psr(regs)->sp = 1;
2110
 
2111
        DBprintk(("enabling psr.sp for [%d]\n", current->pid));
2112
 
2113
        ctx->ctx_flags.state = PFM_CTX_DISABLED;
2114
 
2115
        return 0;
2116
}
2117
 
2118
static int
2119
pfm_context_destroy(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2120
         struct pt_regs *regs)
2121
{
2122
        /* we don't quite support this right now */
2123
        if (task != current) return -EINVAL;
2124
 
2125
        /*
2126
         * if context was never enabled, then there is not much
2127
         * to do
2128
         */
2129
        if (!CTX_IS_ENABLED(ctx)) goto skipped_stop;
2130
 
2131
        /*
2132
         * Disable context: stop monitoring, flush regs to software state (useless here),
2133
         * and freeze PMU
2134
         *
2135
         * The IA64_THREAD_PM_VALID is cleared by pfm_flush_regs() called from pfm_disable()
2136
         */
2137
        pfm_disable(task, ctx, arg, count, regs);
2138
 
2139
        if (ctx->ctx_fl_system) {
2140
                ia64_psr(regs)->pp = 0;
2141
        } else {
2142
                ia64_psr(regs)->up = 0;
2143
        }
2144
 
2145
skipped_stop:
2146
        /*
2147
         * remove sampling buffer mapping, if any
2148
         */
2149
        if (ctx->ctx_smpl_vaddr) {
2150
                pfm_remove_smpl_mapping(task);
2151
                ctx->ctx_smpl_vaddr = 0UL;
2152
        }
2153
        /* now free context and related state */
2154
        pfm_context_exit(task);
2155
 
2156
        return 0;
2157
}
2158
 
2159
/*
2160
 * does nothing at the moment
2161
 */
2162
static int
2163
pfm_context_unprotect(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2164
         struct pt_regs *regs)
2165
{
2166
        return 0;
2167
}
2168
 
2169
static int
2170
pfm_protect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2171
         struct pt_regs *regs)
2172
{
2173
        /*
2174
         * from now on, only the creator of the context has access to it
2175
         */
2176
        ctx->ctx_fl_protected = 1;
2177
 
2178
        /*
2179
         * reinforce secure monitoring: cannot toggle psr.up
2180
         */
2181
        if (ctx->ctx_fl_unsecure == 0) ia64_psr(regs)->sp = 1;
2182
 
2183
        DBprintk(("[%d] protected psr.sp=%d\n", task->pid, ia64_psr(regs)->sp));
2184
 
2185
        return 0;
2186
}
2187
 
2188
static int
2189
pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2190
         struct pt_regs *regs)
2191
{
2192
        unsigned int mode = *(unsigned int *)arg;
2193
 
2194
        pfm_sysctl.debug = mode == 0 ? 0 : 1;
2195
 
2196
        printk(KERN_INFO "perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off");
2197
 
2198
        return 0;
2199
}
2200
 
2201
#ifdef PFM_PMU_USES_DBR
2202
 
2203
typedef struct {
2204
        unsigned long ibr_mask:56;
2205
        unsigned long ibr_plm:4;
2206
        unsigned long ibr_ig:3;
2207
        unsigned long ibr_x:1;
2208
} ibr_mask_reg_t;
2209
 
2210
typedef struct {
2211
        unsigned long dbr_mask:56;
2212
        unsigned long dbr_plm:4;
2213
        unsigned long dbr_ig:2;
2214
        unsigned long dbr_w:1;
2215
        unsigned long dbr_r:1;
2216
} dbr_mask_reg_t;
2217
 
2218
typedef union {
2219
        unsigned long  val;
2220
        ibr_mask_reg_t ibr;
2221
        dbr_mask_reg_t dbr;
2222
} dbreg_t;
2223
 
2224
 
2225
static int
2226
pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs)
2227
{
2228
        struct thread_struct *thread = &task->thread;
2229
        pfm_context_t *ctx = task->thread.pfm_context;
2230
        pfarg_dbreg_t tmp, *req = (pfarg_dbreg_t *)arg;
2231
        dbreg_t dbreg;
2232
        unsigned int rnum;
2233
        int first_time;
2234
        int i, ret = 0;
2235
 
2236
        /*
2237
         * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
2238
         * ensuring that no real breakpoint can be installed via this call.
2239
         */
2240
 
2241
        first_time = ctx->ctx_fl_using_dbreg == 0;
2242
 
2243
        /*
2244
         * check for debug registers in system wide mode
2245
         *
2246
         */
2247
        LOCK_PFS();
2248
        if (ctx->ctx_fl_system && first_time) {
2249
                if (pfm_sessions.pfs_ptrace_use_dbregs)
2250
                        ret = -EBUSY;
2251
                else
2252
                        pfm_sessions.pfs_sys_use_dbregs++;
2253
        }
2254
        UNLOCK_PFS();
2255
 
2256
        if (ret != 0) return ret;
2257
 
2258
        if (ctx->ctx_fl_system) {
2259
                /* we mark ourselves as owner  of the debug registers */
2260
                ctx->ctx_fl_using_dbreg = 1;
2261
                DBprintk(("system-wide setting fl_using_dbreg for [%d]\n", task->pid));
2262
        } else if (first_time) {
2263
                        ret= -EBUSY;
2264
                        if ((thread->flags & IA64_THREAD_DBG_VALID) != 0) {
2265
                                DBprintk(("debug registers already in use for [%d]\n", task->pid));
2266
                                goto abort_mission;
2267
                        }
2268
                        /* we mark ourselves as owner  of the debug registers */
2269
                        ctx->ctx_fl_using_dbreg = 1;
2270
 
2271
                        DBprintk(("setting fl_using_dbreg for [%d]\n", task->pid));
2272
                        /*
2273
                         * Given debug registers cannot be used for both debugging
2274
                         * and performance monitoring at the same time, we reuse
2275
                         * the storage area to save and restore the registers on ctxsw.
2276
                         */
2277
                        memset(task->thread.dbr, 0, sizeof(task->thread.dbr));
2278
                        memset(task->thread.ibr, 0, sizeof(task->thread.ibr));
2279
        }
2280
 
2281
        if (first_time) {
2282
                DBprintk(("[%d] clearing ibrs,dbrs\n", task->pid));
2283
                /*
2284
                 * clear hardware registers to make sure we don't
2285
                 * pick up stale state.
2286
                 *
2287
                 * for a system wide session, we do not use
2288
                 * thread.dbr, thread.ibr because this process
2289
                 * never leaves the current CPU and the state
2290
                 * is shared by all processes running on it
2291
                 */
2292
                for (i=0; i < pmu_conf.num_ibrs; i++) {
2293
                        ia64_set_ibr(i, 0UL);
2294
                }
2295
                ia64_srlz_i();
2296
                for (i=0; i < pmu_conf.num_dbrs; i++) {
2297
                        ia64_set_dbr(i, 0UL);
2298
                }
2299
                ia64_srlz_d();
2300
        }
2301
 
2302
        ret = -EFAULT;
2303
 
2304
        /*
2305
         * Now install the values into the registers
2306
         */
2307
        for (i = 0; i < count; i++, req++) {
2308
 
2309
                if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
2310
 
2311
                rnum      = tmp.dbreg_num;
2312
                dbreg.val = tmp.dbreg_value;
2313
 
2314
                ret = -EINVAL;
2315
 
2316
                if ((mode == 0 && !IBR_IS_IMPL(rnum)) || ((mode == 1) && !DBR_IS_IMPL(rnum))) {
2317
                        DBprintk(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
2318
                                  rnum, dbreg.val, mode, i, count));
2319
 
2320
                        goto abort_mission;
2321
                }
2322
 
2323
                /*
2324
                 * make sure we do not install enabled breakpoint
2325
                 */
2326
                if (rnum & 0x1) {
2327
                        if (mode == 0)
2328
                                dbreg.ibr.ibr_x = 0;
2329
                        else
2330
                                dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
2331
                }
2332
 
2333
                /*
2334
                 * clear return flags and copy back to user
2335
                 *
2336
                 * XXX: fix once EAGAIN is implemented
2337
                 */
2338
                ret = -EFAULT;
2339
 
2340
                PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0);
2341
 
2342
                if (__copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission;
2343
 
2344
                /*
2345
                 * Debug registers, just like PMC, can only be modified
2346
                 * by a kernel call. Moreover, perfmon() access to those
2347
                 * registers are centralized in this routine. The hardware
2348
                 * does not modify the value of these registers, therefore,
2349
                 * if we save them as they are written, we can avoid having
2350
                 * to save them on context switch out. This is made possible
2351
                 * by the fact that when perfmon uses debug registers, ptrace()
2352
                 * won't be able to modify them concurrently.
2353
                 */
2354
                if (mode == 0) {
2355
                        CTX_USED_IBR(ctx, rnum);
2356
 
2357
                        ia64_set_ibr(rnum, dbreg.val);
2358
                        ia64_srlz_i();
2359
 
2360
                        thread->ibr[rnum] = dbreg.val;
2361
 
2362
                        DBprintk(("write ibr%u=0x%lx used_ibrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_ibrs[0]));
2363
                } else {
2364
                        CTX_USED_DBR(ctx, rnum);
2365
 
2366
                        ia64_set_dbr(rnum, dbreg.val);
2367
                        ia64_srlz_d();
2368
 
2369
                        thread->dbr[rnum] = dbreg.val;
2370
 
2371
                        DBprintk(("write dbr%u=0x%lx used_dbrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_dbrs[0]));
2372
                }
2373
        }
2374
 
2375
        return 0;
2376
 
2377
abort_mission:
2378
        /*
2379
         * in case it was our first attempt, we undo the global modifications
2380
         */
2381
        if (first_time) {
2382
                LOCK_PFS();
2383
                if (ctx->ctx_fl_system) {
2384
                        pfm_sessions.pfs_sys_use_dbregs--;
2385
                }
2386
                UNLOCK_PFS();
2387
                ctx->ctx_fl_using_dbreg = 0;
2388
        }
2389
        /*
2390
         * install error return flag
2391
         */
2392
        if (ret != -EFAULT) {
2393
                /*
2394
                 * XXX: for now we can only come here on EINVAL
2395
                 */
2396
                PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL);
2397
                if (__put_user(tmp.dbreg_flags, &req->dbreg_flags)) ret = -EFAULT;
2398
        }
2399
        return ret;
2400
}
2401
 
2402
static int
2403
pfm_write_ibrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2404
         struct pt_regs *regs)
2405
{
2406
        /* we don't quite support this right now */
2407
        if (task != current) return -EINVAL;
2408
 
2409
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2410
 
2411
        return pfm_write_ibr_dbr(0, task, arg, count, regs);
2412
}
2413
 
2414
static int
2415
pfm_write_dbrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2416
         struct pt_regs *regs)
2417
{
2418
        /* we don't quite support this right now */
2419
        if (task != current) return -EINVAL;
2420
 
2421
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2422
 
2423
        return pfm_write_ibr_dbr(1, task, arg, count, regs);
2424
}
2425
 
2426
#endif /* PFM_PMU_USES_DBR */
2427
 
2428
static int
2429
pfm_get_features(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
2430
{
2431
        pfarg_features_t tmp;
2432
 
2433
        memset(&tmp, 0, sizeof(tmp));
2434
 
2435
        tmp.ft_version      = PFM_VERSION;
2436
        tmp.ft_smpl_version = PFM_SMPL_VERSION;
2437
 
2438
        if (__copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT;
2439
 
2440
        return 0;
2441
}
2442
 
2443
static int
2444
pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2445
          struct pt_regs *regs)
2446
{
2447
        /* we don't quite support this right now */
2448
        if (task != current) return -EINVAL;
2449
 
2450
        /*
2451
         * Cannot do anything before PMU is enabled
2452
         */
2453
        if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2454
 
2455
        DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
2456
                                current->pid,
2457
                                ctx->ctx_fl_system, PMU_OWNER(),
2458
                                current));
2459
 
2460
        if (PMU_OWNER() != task) {
2461
                printk(KERN_DEBUG "perfmon: pfm_start task [%d] not pmu owner\n", task->pid);
2462
                return -EINVAL;
2463
        }
2464
 
2465
        if (ctx->ctx_fl_system) {
2466
 
2467
                PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
2468
 
2469
                /* set user level psr.pp */
2470
                ia64_psr(regs)->pp = 1;
2471
 
2472
                /* start monitoring at kernel level */
2473
                pfm_set_psr_pp();
2474
 
2475
                /* enable dcr pp */
2476
                ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
2477
 
2478
                ia64_srlz_i();
2479
 
2480
        } else {
2481
                if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
2482
                        printk(KERN_DEBUG "perfmon: pfm_start task flag not set for [%d]\n",
2483
                               task->pid);
2484
                        return -EINVAL;
2485
                }
2486
                /* set user level psr.up */
2487
                ia64_psr(regs)->up = 1;
2488
 
2489
                /* start monitoring at kernel level */
2490
                pfm_set_psr_up();
2491
 
2492
                ia64_srlz_i();
2493
        }
2494
 
2495
        return 0;
2496
}
2497
 
2498
static int
2499
pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2500
           struct pt_regs *regs)
2501
{
2502
        /* we don't quite support this right now */
2503
        if (task != current) {
2504
                DBprintk(("task [%d] != current [%d]\n", task->pid, current->pid));
2505
                return -EINVAL;
2506
        }
2507
 
2508
#ifndef CONFIG_SMP
2509
        if (ctx->ctx_fl_system == 0 && PMU_OWNER()  && PMU_OWNER() != current)
2510
                pfm_lazy_save_regs(PMU_OWNER());
2511
#endif
2512
 
2513
        /* reset all registers to stable quiet state */
2514
        pfm_reset_pmu(task);
2515
 
2516
        /* make sure nothing starts */
2517
        if (ctx->ctx_fl_system) {
2518
                ia64_psr(regs)->pp = 0;
2519
                ia64_psr(regs)->up = 0; /* just to make sure! */
2520
 
2521
                /* make sure monitoring is stopped */
2522
                pfm_clear_psr_pp();
2523
                ia64_srlz_i();
2524
 
2525
                PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
2526
                PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
2527
                if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
2528
        } else {
2529
                /*
2530
                 * needed in case the task was a passive task during
2531
                 * a system wide session and now wants to have its own
2532
                 * session
2533
                 */
2534
                ia64_psr(regs)->pp = 0; /* just to make sure! */
2535
                ia64_psr(regs)->up = 0;
2536
 
2537
                /* make sure monitoring is stopped */
2538
                pfm_clear_psr_up();
2539
                ia64_srlz_i();
2540
 
2541
                DBprintk(("clearing psr.sp for [%d]\n", current->pid));
2542
 
2543
                /* allow user level control  */
2544
                ia64_psr(regs)->sp = 0;
2545
 
2546
                /* PMU state will be saved/restored on ctxsw */
2547
                task->thread.flags |= IA64_THREAD_PM_VALID;
2548
        }
2549
 
2550
        SET_PMU_OWNER(task);
2551
 
2552
        ctx->ctx_flags.state = PFM_CTX_ENABLED;
2553
        SET_LAST_CPU(ctx, smp_processor_id());
2554
        INC_ACTIVATION();
2555
        SET_ACTIVATION(ctx);
2556
 
2557
        /* simply unfreeze */
2558
        pfm_unfreeze_pmu();
2559
 
2560
        return 0;
2561
}
2562
 
2563
static int
2564
pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2565
           struct pt_regs *regs)
2566
{
2567
        pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
2568
        unsigned int cnum;
2569
        int i, ret = -EINVAL;
2570
 
2571
        for (i = 0; i < count; i++, req++) {
2572
 
2573
                if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
2574
 
2575
                cnum = tmp.reg_num;
2576
 
2577
                if (!PMC_IS_IMPL(cnum)) goto abort_mission;
2578
 
2579
                tmp.reg_value = PMC_DFL_VAL(cnum);
2580
 
2581
                PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
2582
 
2583
                DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value));
2584
 
2585
                if (__copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
2586
        }
2587
        return 0;
2588
abort_mission:
2589
        PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
2590
        if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
2591
 
2592
        return ret;
2593
}
2594
 
2595
/*
2596
 * functions MUST be listed in the increasing order of their index (see permfon.h)
2597
 */
2598
static pfm_cmd_desc_t pfm_cmd_tab[]={
2599
/* 0  */{ NULL, 0, 0, 0}, /* not used */
2600
/* 1  */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2601
/* 2  */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2602
/* 3  */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2603
/* 4  */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2604
/* 5  */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2605
/* 6  */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2606
/* 7  */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2607
/* 8  */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)},
2608
/* 9  */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2609
/* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
2610
/* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2611
/* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0},
2612
/* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
2613
/* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2614
/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2615
/* 16 */{ NULL, 0, 0, 0}, /* not used */
2616
/* 17 */{ NULL, 0, 0, 0}, /* not used */
2617
/* 18 */{ NULL, 0, 0, 0}, /* not used */
2618
/* 19 */{ NULL, 0, 0, 0}, /* not used */
2619
/* 20 */{ NULL, 0, 0, 0}, /* not used */
2620
/* 21 */{ NULL, 0, 0, 0}, /* not used */
2621
/* 22 */{ NULL, 0, 0, 0}, /* not used */
2622
/* 23 */{ NULL, 0, 0, 0}, /* not used */
2623
/* 24 */{ NULL, 0, 0, 0}, /* not used */
2624
/* 25 */{ NULL, 0, 0, 0}, /* not used */
2625
/* 26 */{ NULL, 0, 0, 0}, /* not used */
2626
/* 27 */{ NULL, 0, 0, 0}, /* not used */
2627
/* 28 */{ NULL, 0, 0, 0}, /* not used */
2628
/* 29 */{ NULL, 0, 0, 0}, /* not used */
2629
/* 30 */{ NULL, 0, 0, 0}, /* not used */
2630
/* 31 */{ NULL, 0, 0, 0}, /* not used */
2631
#ifdef PFM_PMU_USES_DBR
2632
/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
2633
/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
2634
#endif
2635
};
2636
#define PFM_CMD_COUNT   (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
2637
 
2638
static int
2639
check_task_state(struct task_struct *task)
2640
{
2641
        int ret = 0;
2642
#ifdef CONFIG_SMP
2643
        /* We must wait until the state has been completely
2644
         * saved. There can be situations where the reader arrives before
2645
         * after the task is marked as STOPPED but before pfm_save_regs()
2646
         * is completed.
2647
         */
2648
        for (;;) {
2649
 
2650
                task_lock(task);
2651
                DBprintk((" [%d] state=%ld\n", task->pid, task->state));
2652
                if (!task_has_cpu(task)) break;
2653
                task_unlock(task);
2654
 
2655
                do {
2656
                        if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
2657
                                DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
2658
                                return -EBUSY;
2659
                        }
2660
                        barrier();
2661
                        cpu_relax();
2662
                } while (task_has_cpu(task));
2663
        }
2664
        task_unlock(task);
2665
#else
2666
        if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
2667
                DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
2668
                ret = -EBUSY;
2669
        }
2670
#endif
2671
        return ret;
2672
}
2673
 
2674
asmlinkage long
2675
sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
2676
                long arg8, long stack)
2677
{
2678
        struct pt_regs *regs = (struct pt_regs *)&stack;
2679
        struct task_struct *task = current;
2680
        pfm_context_t *ctx;
2681
        size_t sz;
2682
        long ret;
2683
        int narg;
2684
 
2685
        /*
2686
         * reject any call if perfmon was disabled at initialization time
2687
         */
2688
        if (PFM_IS_DISABLED()) return -ENOSYS;
2689
 
2690
        DBprintk(("cmd=%d idx=%d valid=%d narg=0x%x\n", cmd, PFM_CMD_IDX(cmd),
2691
                  PFM_CMD_IS_VALID(cmd), PFM_CMD_NARG(cmd)));
2692
 
2693
        if (PFM_CMD_IS_VALID(cmd) == 0) return -EINVAL;
2694
 
2695
        /* ingore arguments when command has none */
2696
        narg = PFM_CMD_NARG(cmd);
2697
        if ((narg == PFM_CMD_ARG_MANY  && count == 0) || (narg > 0 && narg != count)) return -EINVAL;
2698
 
2699
        sz = PFM_CMD_ARG_SIZE(cmd);
2700
 
2701
        if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT;
2702
 
2703
        if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
2704
 
2705
        if (PFM_CMD_USE_PID(cmd))  {
2706
                /*
2707
                 * XXX: may need to fine tune this one
2708
                 */
2709
                if (pid < 2) return -EPERM;
2710
 
2711
                if (pid != current->pid) {
2712
 
2713
                        ret = -ESRCH;
2714
 
2715
                        read_lock(&tasklist_lock);
2716
 
2717
                        task = find_task_by_pid(pid);
2718
 
2719
                        if (!task) goto abort_call;
2720
 
2721
                        ret = -EPERM;
2722
 
2723
                        if (pfm_bad_permissions(task)) goto abort_call;
2724
 
2725
                        if (PFM_CMD_CHK(cmd)) {
2726
                                ret = check_task_state(task);
2727
                                if (ret != 0) {
2728
                                        DBprintk(("check_task_state=%ld for [%d]\n", ret, task->pid));
2729
                                        goto abort_call;
2730
                                }
2731
                        }
2732
                }
2733
        }
2734
 
2735
        ctx = PFM_GET_CTX(task);
2736
 
2737
        if (PFM_CMD_USE_CTX(cmd)) {
2738
                ret = -EINVAL;
2739
               if (ctx == NULL) {
2740
                        DBprintk(("no context for task %d\n", task->pid));
2741
                        goto abort_call;
2742
               }
2743
 
2744
 
2745
               ret = -EPERM;
2746
               /*
2747
                * we only grant access to the context if:
2748
                *       - the caller is the creator of the context (ctx_owner)
2749
                *  OR   - the context is attached to the caller AND The context IS NOT
2750
                *         in protected mode
2751
                */
2752
               if (ctx->ctx_owner != current && (ctx->ctx_fl_protected || task != current)) {
2753
                                DBprintk(("context protected, no access for [%d]\n", task->pid));
2754
                                goto abort_call;
2755
               }
2756
        }
2757
 
2758
        ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(task, ctx, arg, count, regs);
2759
 
2760
abort_call:
2761
        if (task != current) read_unlock(&tasklist_lock);
2762
 
2763
        return ret;
2764
}
2765
 
2766
void asmlinkage
2767
pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
2768
                      u64 arg6, u64 arg7, long info)
2769
{
2770
        struct thread_struct *th = &current->thread;
2771
        pfm_context_t *ctx = current->thread.pfm_context;
2772
        int ret;
2773
 
2774
        /*
2775
         * clear the flag, to make sure we won't get here
2776
         * again
2777
         */
2778
        th->pfm_ovfl_block_reset = 0;
2779
 
2780
        /*
2781
         * do some sanity checks first
2782
         */
2783
        if (!ctx) {
2784
                printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid);
2785
                return;
2786
        }
2787
 
2788
        if (CTX_OVFL_NOBLOCK(ctx)) goto non_blocking;
2789
 
2790
        DBprintk(("[%d] before sleeping\n", current->pid));
2791
 
2792
        /*
2793
         * may go through without blocking on SMP systems
2794
         * if restart has been received already by the time we call down()
2795
         */
2796
        ret = down_interruptible(&ctx->ctx_restart_sem);
2797
 
2798
        DBprintk(("[%d] after sleeping ret=%d\n", current->pid, ret));
2799
 
2800
        /*
2801
         * in case of interruption of down() we don't restart anything
2802
         */
2803
        if (ret >= 0) {
2804
 
2805
non_blocking:
2806
                /* we reactivate on context switch */
2807
                ctx->ctx_fl_frozen = 0;
2808
                /*
2809
                 * the ovfl_sem is cleared by the restart task and this is safe because we always
2810
                 * use the local reference
2811
                 */
2812
 
2813
                pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
2814
 
2815
                ctx->ctx_ovfl_regs[0] = 0UL;
2816
 
2817
                /*
2818
                 * Unlock sampling buffer and reset index atomically
2819
                 * XXX: not really needed when blocking
2820
                 */
2821
                if (CTX_HAS_SMPL(ctx)) {
2822
                        ctx->ctx_psb->psb_hdr->hdr_count = 0;
2823
                        ctx->ctx_psb->psb_index = 0;
2824
                }
2825
 
2826
                pfm_unfreeze_pmu();
2827
 
2828
                /* state restored, can go back to work (user mode) */
2829
        }
2830
}
2831
 
2832
/*
2833
 * This function will record an entry in the sampling if it is not full already.
2834
 * Input:
2835
 *      ovfl_mask: mask of overflowed PMD. MUST NEVER be 0.
2836
 * Return:
2837
 *      0 : buffer is not full (did not BECOME full: still space or was already full)
2838
 *      1 : buffer is full (recorded the last entry)
2839
 */
2840
static int
2841
pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ovfl_mask, struct pt_regs *regs)
2842
{
2843
        pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
2844
        unsigned long *e, m, idx;
2845
        perfmon_smpl_entry_t *h;
2846
        int j;
2847
 
2848
        idx = ia64_fetch_and_add(1, &psb->psb_index);
2849
        DBprintk_ovfl(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
2850
 
2851
        /*
2852
         * XXX: there is a small chance that we could run out on index before resetting
2853
         * but index is unsigned long, so it will take some time.....
2854
         * We use > instead of == because fetch_and_add() is off by one (see below)
2855
         *
2856
         * This case can happen in non-blocking mode or with multiple processes.
2857
         * For non-blocking, we need to reload and continue.
2858
         */
2859
        if (idx > psb->psb_entries) return 0;
2860
 
2861
        /* first entry is really entry 0, not 1 caused by fetch_and_add */
2862
        idx--;
2863
 
2864
        h = (perfmon_smpl_entry_t *)(((char *)psb->psb_addr) + idx*(psb->psb_entry_size));
2865
 
2866
        /*
2867
         * initialize entry header
2868
         */
2869
        h->pid  = ctx->ctx_fl_system ? current->pid : task->pid;
2870
        h->cpu  = smp_processor_id();
2871
        h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL;
2872
        h->ip   = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;
2873
        h->regs = ovfl_mask;                    /* which registers overflowed */
2874
 
2875
        /* guaranteed to monotonically increase on each cpu */
2876
        h->stamp  = pfm_get_stamp();
2877
 
2878
        /* position for first pmd */
2879
        e = (unsigned long *)(h+1);
2880
 
2881
        /*
2882
         * selectively store PMDs in increasing index number
2883
         */
2884
        m = ctx->ctx_smpl_regs[0];
2885
        for (j=0; m; m >>=1, j++) {
2886
 
2887
                if ((m & 0x1) == 0) continue;
2888
 
2889
                if (PMD_IS_COUNTING(j)) {
2890
                        *e  =  pfm_read_soft_counter(ctx, j);
2891
                } else {
2892
                        *e = ia64_get_pmd(j); /* slow */
2893
                }
2894
                DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
2895
                e++;
2896
        }
2897
        pfm_stats[smp_processor_id()].pfm_recorded_samples_count++;
2898
 
2899
        /*
2900
         * make the new entry visible to user, needs to be atomic
2901
         */
2902
        ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count);
2903
 
2904
        DBprintk_ovfl(("index=%ld entries=%ld hdr_count=%ld\n",
2905
                                idx, psb->psb_entries, psb->psb_hdr->hdr_count));
2906
        /*
2907
         * sampling buffer full ?
2908
         */
2909
        if (idx == (psb->psb_entries-1)) {
2910
                DBprintk_ovfl(("sampling buffer full\n"));
2911
                /*
2912
                 * XXX: must reset buffer in blocking mode and lost notified
2913
                 */
2914
                pfm_stats[smp_processor_id()].pfm_full_smpl_buffer_count++;
2915
                return 1;
2916
        }
2917
        return 0;
2918
}
2919
 
2920
/*
2921
 * main overflow processing routine.
2922
 * it can be called from the interrupt path or explicitely during the context switch code
2923
 * Return:
2924
 *      new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
2925
 */
2926
static unsigned long
2927
pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
2928
{
2929
        unsigned long mask;
2930
        struct thread_struct *t;
2931
        unsigned long old_val;
2932
        unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
2933
        int i;
2934
        int ret = 1;
2935
        /*
2936
         * It is never safe to access the task for which the overflow interrupt is destinated
2937
         * using the current variable as the interrupt may occur in the middle of a context switch
2938
         * where current does not hold the task that is running yet.
2939
         *
2940
         * For monitoring, however, we do need to get access to the task which caused the overflow
2941
         * to account for overflow on the counters.
2942
         *
2943
         * We accomplish this by maintaining a current owner of the PMU per CPU. During context
2944
         * switch the ownership is changed in a way such that the reflected owner is always the
2945
         * valid one, i.e. the one that caused the interrupt.
2946
         */
2947
 
2948
        t   = &task->thread;
2949
 
2950
        /*
2951
         * XXX: debug test
2952
         * Don't think this could happen given upfront tests
2953
         */
2954
        if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
2955
                printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d not "
2956
                       "using perfmon\n", task->pid);
2957
                return 0x1;
2958
        }
2959
        /*
2960
         * sanity test. Should never happen
2961
         */
2962
        if ((pmc0 & 0x1) == 0) {
2963
                printk(KERN_DEBUG "perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n",
2964
                       task->pid, pmc0);
2965
                return 0x0;
2966
        }
2967
 
2968
        mask = pmc0 >> PMU_FIRST_COUNTER;
2969
 
2970
        DBprintk_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s"
2971
                  " mode used_pmds=0x%lx used_pmcs=0x%lx reload_pmcs=0x%lx\n",
2972
                        pmc0, task->pid, (regs ? regs->cr_iip : 0),
2973
                        CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
2974
                        ctx->ctx_used_pmds[0],
2975
                        ctx->ctx_used_pmcs[0],
2976
                        ctx->ctx_reload_pmcs[0]));
2977
 
2978
        /*
2979
         * First we update the virtual counters
2980
         */
2981
        for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
2982
 
2983
                /* skip pmd which did not overflow */
2984
                if ((mask & 0x1) == 0) continue;
2985
 
2986
                DBprintk_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n",
2987
                          i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
2988
 
2989
                /*
2990
                 * Note that the pmd is not necessarily 0 at this point as qualified events
2991
                 * may have happened before the PMU was frozen. The residual count is not
2992
                 * taken into consideration here but will be with any read of the pmd via
2993
                 * pfm_read_pmds().
2994
                 */
2995
                old_val                    = ctx->ctx_soft_pmds[i].val;
2996
                ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
2997
 
2998
                /*
2999
                 * check for overflow condition
3000
                 */
3001
                if (old_val > ctx->ctx_soft_pmds[i].val) {
3002
 
3003
                        ovfl_pmds |= 1UL << i;
3004
 
3005
                        if (PMC_OVFL_NOTIFY(ctx, i)) {
3006
                                ovfl_notify |= 1UL << i;
3007
                        }
3008
                } else {
3009
                        /*
3010
                         * clear top bits (maintain counts in lower part, may not always be zero)
3011
                         */
3012
                        ia64_set_pmd(i, ia64_get_pmd(i) & pmu_conf.ovfl_val);
3013
                }
3014
                DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
3015
                          i, ctx->ctx_soft_pmds[i].val, old_val,
3016
                          ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify));
3017
        }
3018
 
3019
        /*
3020
         * check for sampling buffer
3021
         *
3022
         * if present, record sample only when a 64-bit counter has overflowed.
3023
         * We propagate notification ONLY when buffer becomes full.
3024
         */
3025
        if(CTX_HAS_SMPL(ctx) && ovfl_pmds) {
3026
                ret = pfm_record_sample(task, ctx, ovfl_pmds, regs);
3027
                if (ret == 1) {
3028
                        /*
3029
                         * Sampling buffer became full
3030
                         * If no notication was requested, then we reset buffer index
3031
                         * and reset registers (done below) and resume.
3032
                         * If notification requested, then defer reset until pfm_restart()
3033
                         */
3034
                        if (ovfl_notify == 0UL) {
3035
                                ctx->ctx_psb->psb_hdr->hdr_count = 0UL;
3036
                                ctx->ctx_psb->psb_index          = 0UL;
3037
                        }
3038
                } else {
3039
                        /*
3040
                         * sample recorded in buffer, no need to notify user
3041
                         */
3042
                        ovfl_notify = 0UL;
3043
                }
3044
        }
3045
 
3046
        /*
3047
         * No overflow requiring a user level notification
3048
         */
3049
        if (ovfl_notify == 0UL) {
3050
                if (ovfl_pmds)
3051
                        pfm_reset_regs(ctx, &ovfl_pmds, PFM_PMD_SHORT_RESET);
3052
                return 0x0;
3053
        }
3054
 
3055
        /*
3056
         * keep track of what to reset when unblocking
3057
         */
3058
        ctx->ctx_ovfl_regs[0]  = ovfl_pmds;
3059
 
3060
        /*
3061
         * As a consequence of the overflow, we always resume
3062
         * with monitoring turned off. pfm_restart() will
3063
         * reactivate.
3064
         */
3065
        ctx->ctx_fl_frozen = 1;
3066
 
3067
        /*
3068
         * we have come to this point because there was an overflow and that notification
3069
         * was requested. The notify_task may have disappeared, in which case notify_task
3070
         * is NULL.
3071
         */
3072
        LOCK_CTX(ctx);
3073
 
3074
        if (ctx->ctx_notify_task) {
3075
                if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) {
3076
                        t->pfm_ovfl_block_reset = 1; /* will cause blocking */
3077
                } else {
3078
                        t->pfm_ovfl_block_reset = 0;
3079
                }
3080
 
3081
                DBprintk_ovfl(("[%d] scheduling tasklet\n", current->pid));
3082
 
3083
                /*
3084
                 * the tasklet is responsible for sending the notification
3085
                 * not the PMU owner nor the current task.
3086
                 */
3087
                tasklet_schedule(&ctx->ctx_tasklet);
3088
 
3089
        } else {
3090
                DBprintk_ovfl(("notification task has disappeared !\n"));
3091
                t->pfm_ovfl_block_reset = 0;
3092
        }
3093
 
3094
        UNLOCK_CTX(ctx);
3095
 
3096
        DBprintk_ovfl(("return pmc0=0x%x must_block=%ld\n",
3097
                                ctx->ctx_fl_frozen ? 0x1 : 0x0, t->pfm_ovfl_block_reset));
3098
 
3099
        return ctx->ctx_fl_frozen ? 0x1 : 0x0;
3100
}
3101
 
3102
static void
3103
pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
3104
{
3105
        u64 pmc0;
3106
        struct task_struct *task;
3107
        pfm_context_t *ctx;
3108
 
3109
        pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++;
3110
 
3111
        /*
3112
         * if an alternate handler is registered, just bypass the default one
3113
         */
3114
        if (pfm_alternate_intr_handler) {
3115
                (*pfm_alternate_intr_handler->handler)(irq, arg, regs);
3116
                return;
3117
        }
3118
 
3119
        /*
3120
         * srlz.d done before arriving here
3121
         *
3122
         * This is slow
3123
         */
3124
        pmc0 = ia64_get_pmc(0);
3125
        task = PMU_OWNER();
3126
        /*
3127
         * if we have some pending bits set
3128
         * assumes : if any PM[0].bit[63-1] is set, then PMC[0].fr = 1
3129
         */
3130
        if (PMC0_HAS_OVFL(pmc0) && task) {
3131
                /*
3132
                 * we assume that pmc0.fr is always set here
3133
                 */
3134
                ctx = PFM_GET_CTX(task);
3135
 
3136
                /* sanity check */
3137
                if (!ctx) {
3138
                        printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d has "
3139
                               "no PFM context\n", task->pid);
3140
                        return;
3141
                }
3142
                /*
3143
                 * assume PMC[0].fr = 1 at this point
3144
                 */
3145
                pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
3146
 
3147
                /*
3148
                 * we can only update pmc0 when the overflow
3149
                 * is for the current context or we are in system
3150
                 * wide mode. In UP (per-task) the current
3151
                 * task may not be the one owning the PMU,
3152
                 * same thing for system-wide.
3153
                 */
3154
                if (task == current || ctx->ctx_fl_system) {
3155
                        /*
3156
                         * We always clear the overflow status bits and either unfreeze
3157
                         * or keep the PMU frozen.
3158
                         */
3159
                        ia64_set_pmc(0, pmc0);
3160
                        ia64_srlz_d();
3161
                } else {
3162
                        task->thread.pmc[0] = pmc0;
3163
                }
3164
        } else {
3165
                pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++;
3166
        }
3167
}
3168
 
3169
/* for debug only */
3170
static int
3171
pfm_proc_info(char *page)
3172
{
3173
        char *p = page;
3174
        int i;
3175
 
3176
        p += sprintf(p, "fastctxsw              : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
3177
        p += sprintf(p, "ovfl_mask              : 0x%lx\n", pmu_conf.ovfl_val);
3178
 
3179
        for(i=0; i < NR_CPUS; i++) {
3180
                if (cpu_online(i) == 0) continue;
3181
                p += sprintf(p, "CPU%-2d overflow intrs   : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_count);
3182
                p += sprintf(p, "CPU%-2d spurious intrs   : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count);
3183
                p += sprintf(p, "CPU%-2d recorded samples : %lu\n", i, pfm_stats[i].pfm_recorded_samples_count);
3184
                p += sprintf(p, "CPU%-2d smpl buffer full : %lu\n", i, pfm_stats[i].pfm_full_smpl_buffer_count);
3185
                p += sprintf(p, "CPU%-2d syst_wide        : %d\n", i, cpu_data(i)->pfm_syst_info & PFM_CPUINFO_SYST_WIDE ? 1 : 0);
3186
                p += sprintf(p, "CPU%-2d dcr_pp           : %d\n", i, cpu_data(i)->pfm_syst_info & PFM_CPUINFO_DCR_PP ? 1 : 0);
3187
                p += sprintf(p, "CPU%-2d exclude idle     : %d\n", i, cpu_data(i)->pfm_syst_info & PFM_CPUINFO_EXCL_IDLE ? 1 : 0);
3188
                p += sprintf(p, "CPU%-2d owner            : %d\n", i, pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
3189
                p += sprintf(p, "CPU%-2d activations      : %lu\n", i, pmu_owners[i].activation_number);
3190
        }
3191
 
3192
        LOCK_PFS();
3193
 
3194
        p += sprintf(p, "proc_sessions          : %u\n"
3195
                        "sys_sessions           : %u\n"
3196
                        "sys_use_dbregs         : %u\n"
3197
                        "ptrace_use_dbregs      : %u\n",
3198
                        pfm_sessions.pfs_task_sessions,
3199
                        pfm_sessions.pfs_sys_sessions,
3200
                        pfm_sessions.pfs_sys_use_dbregs,
3201
                        pfm_sessions.pfs_ptrace_use_dbregs);
3202
 
3203
        UNLOCK_PFS();
3204
 
3205
        return p - page;
3206
}
3207
 
3208
/* /proc interface, for debug only */
3209
static int
3210
perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
3211
{
3212
        int len = pfm_proc_info(page);
3213
 
3214
        if (len <= off+count) *eof = 1;
3215
 
3216
        *start = page + off;
3217
        len   -= off;
3218
 
3219
        if (len>count) len = count;
3220
        if (len<0) len = 0;
3221
 
3222
        return len;
3223
}
3224
 
3225
/*
3226
 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
3227
 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
3228
 * is active or inactive based on mode. We must rely on the value in
3229
 * local_cpu_data->pfm_syst_info
3230
 */
3231
void
3232
pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
3233
{
3234
        struct pt_regs *regs;
3235
        unsigned long dcr;
3236
        unsigned long dcr_pp;
3237
 
3238
        dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
3239
 
3240
        /*
3241
         * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
3242
         * on every CPU, so we can rely on the pid to identify the idle task.
3243
         */
3244
        if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
3245
                regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
3246
                regs--;
3247
                ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
3248
                return;
3249
        }
3250
        /*
3251
         * we are the idle task  and there is exclusion.
3252
         *
3253
         * if monitoring has started
3254
         */
3255
        if (dcr_pp) {
3256
                dcr = ia64_get_dcr();
3257
                /*
3258
                 * context switching in?
3259
                 */
3260
                if (is_ctxswin) {
3261
                        /* mask monitoring for the idle task */
3262
                        ia64_set_dcr(dcr & ~IA64_DCR_PP);
3263
                        pfm_clear_psr_pp();
3264
                        ia64_srlz_i();
3265
                        return;
3266
                }
3267
                /*
3268
                 * context switching out
3269
                 * restore normal kernel level settings
3270
                 *
3271
                 * Due to inlining this odd if-then-else construction generates
3272
                 * better code.
3273
                 */
3274
                ia64_set_dcr(dcr |IA64_DCR_PP);
3275
                pfm_set_psr_pp();
3276
                ia64_srlz_i();
3277
        }
3278
}
3279
 
3280
#ifdef CONFIG_SMP
3281
void
3282
pfm_save_regs(struct task_struct *task)
3283
{
3284
        pfm_context_t *ctx;
3285
        struct thread_struct *t;
3286
        u64 psr;
3287
 
3288
        ctx = PFM_GET_CTX(task);
3289
        if (ctx == NULL) goto save_error;
3290
        t = &task->thread;
3291
 
3292
        /*
3293
         * sanity check
3294
         */
3295
        if (ctx->ctx_last_activation != GET_ACTIVATION()) {
3296
                DBprintk(("ctx_activation=%lu activation=%lu: no save\n",
3297
                        ctx->ctx_last_activation, GET_ACTIVATION()));
3298
                return;
3299
        }
3300
 
3301
        /*
3302
         * save current PSR: needed because we modify it
3303
         */
3304
        psr = pfm_get_psr();
3305
 
3306
        /*
3307
         * stop monitoring:
3308
         * This is the last instruction which may generate an overflow
3309
         *
3310
         * We do not need to set psr.sp because, it is irrelevant in kernel.
3311
         * It will be restored from ipsr when going back to user level
3312
         */
3313
        pfm_clear_psr_up();
3314
 
3315
        /*
3316
         * keep a copy of the saved psr (for reload)
3317
         */
3318
        ctx->ctx_saved_psr = psr;
3319
 
3320
        /*
3321
         * release ownership of this PMU.
3322
         */
3323
        SET_PMU_OWNER(NULL);
3324
 
3325
        /*
3326
         * we systematically save the PMD as we have no
3327
         * guarantee we will be schedule at that same
3328
         * CPU again.
3329
         */
3330
        pfm_save_pmds(t->pmd, ctx->ctx_used_pmds[0]);
3331
 
3332
        /*
3333
         * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
3334
         * we will need it on the restore path to check
3335
         * for pending overflow.
3336
         */
3337
        t->pmc[0] = ia64_get_pmc(0);
3338
 
3339
        return;
3340
 
3341
save_error:
3342
        printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
3343
                smp_processor_id(), task->pid,
3344
                task->thread.flags & IA64_THREAD_PM_VALID);
3345
}
3346
 
3347
#else /* !CONFIG_SMP */
3348
 
3349
void
3350
pfm_save_regs(struct task_struct *task)
3351
{
3352
        pfm_context_t *ctx;
3353
        u64 psr;
3354
 
3355
        ctx = PFM_GET_CTX(task);
3356
        if (ctx == NULL) goto save_error;
3357
        /*
3358
         * save current PSR: needed because we modify it
3359
         */
3360
        psr = pfm_get_psr();
3361
 
3362
        /*
3363
         * stop monitoring:
3364
         * This is the last instruction which may generate an overflow
3365
         *
3366
         * We do not need to set psr.sp because, it is irrelevant in kernel.
3367
         * It will be restored from ipsr when going back to user level
3368
         */
3369
        pfm_clear_psr_up();
3370
 
3371
        /*
3372
         * keep a copy of the saved psr (for reload)
3373
         */
3374
        ctx->ctx_saved_psr = psr;
3375
 
3376
        return;
3377
save_error:
3378
        printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
3379
                smp_processor_id(), task->pid,
3380
                task->thread.flags & IA64_THREAD_PM_VALID);
3381
}
3382
 
3383
static unsigned long
3384
pfm_lazy_save_regs (struct task_struct *task)
3385
{
3386
        pfm_context_t *ctx;
3387
        struct thread_struct *t;
3388
 
3389
        ctx = PFM_GET_CTX(task);
3390
        t   = &task->thread;
3391
 
3392
        DBprintk(("on [%d] used_pmds=0x%lx\n", task->pid, ctx->ctx_used_pmds[0]));
3393
 
3394
        /*
3395
         * release ownership of this PMU.
3396
         * must be done before we save the registers.
3397
         *
3398
         * after this call any PMU interrupt is treated
3399
         * as spurious.
3400
         */
3401
        SET_PMU_OWNER(NULL);
3402
 
3403
        /*
3404
         * save all the pmds we use
3405
         */
3406
        pfm_save_pmds(t->pmd, ctx->ctx_used_pmds[0]);
3407
 
3408
        /*
3409
         * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
3410
         * it is needed to check for pended overflow
3411
         * on the restore path
3412
         */
3413
        t->pmc[0] = ia64_get_pmc(0);
3414
 
3415
        return t->pmc[0];
3416
}
3417
#endif /* CONFIG_SMP */
3418
 
3419
#ifdef CONFIG_SMP
3420
void
3421
pfm_load_regs (struct task_struct *task)
3422
{
3423
        pfm_context_t *ctx;
3424
        struct thread_struct *t;
3425
        struct task_struct *owner;
3426
        unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
3427
        u64 psr;
3428
 
3429
        ctx = PFM_GET_CTX(task);
3430
        if (unlikely(ctx == NULL)) {
3431
                printk(KERN_ERR "perfmon: pfm_load_regs() null context\n");
3432
                return;
3433
        }
3434
 
3435
        owner = PMU_OWNER();
3436
        t     = &task->thread;
3437
 
3438
        /*
3439
         * possible on unload
3440
         */
3441
        if ((t->flags & IA64_THREAD_PM_VALID) == 0) {
3442
                DBprintk(("[%d] PM_VALID=0, nothing to do\n", task->pid));
3443
                return;
3444
        }
3445
 
3446
        /*
3447
         * we restore ALL the debug registers to avoid picking up
3448
         * stale state.
3449
         *
3450
         * This must be done even when the task is still the owner
3451
         * as the registers may have been modified via ptrace()
3452
         * (not perfmon) by the previous task.
3453
         */
3454
        if (ctx->ctx_fl_using_dbreg) {
3455
                pfm_restore_ibrs(t->ibr, pmu_conf.num_ibrs);
3456
                pfm_restore_dbrs(t->dbr, pmu_conf.num_dbrs);
3457
        }
3458
 
3459
        /*
3460
         * retrieve saved psr
3461
         */
3462
        psr = ctx->ctx_saved_psr;
3463
 
3464
        /*
3465
         * if we were the last user of the PMU on that CPU,
3466
         * then nothing to do except restore psr
3467
         */
3468
        if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
3469
                /*
3470
                 * retrieve partial reload masks (due to user modifications)
3471
                 */
3472
                pmc_mask = 0UL;
3473
                pmd_mask = 0UL;
3474
 
3475
                if (pmc_mask || pmd_mask) DBprintk(("partial reload [%d] pmd_mask=0x%lx pmc_mask=0x%lx\n", task->pid, pmd_mask, pmc_mask));
3476
        } else {
3477
                /*
3478
                 * To avoid leaking information to the user level when psr.sp=0,
3479
                 * we must reload ALL implemented pmds (even the ones we don't use).
3480
                 * In the kernel we only allow PFM_READ_PMDS on registers which
3481
                 * we initialized or requested (sampling) so there is no risk there.
3482
                 */
3483
                pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
3484
 
3485
                /*
3486
                 * ALL accessible PMCs are systematically reloaded, unused registers
3487
                 * get their default (from pfm_reset_pmu_state()) values to avoid picking
3488
                 * up stale configuration.
3489
                 *
3490
                 * PMC0 is never in the mask. It is always restored separately.
3491
                 */
3492
                pmc_mask = ctx->ctx_reload_pmcs[0];
3493
 
3494
                DBprintk(("full reload for [%d] owner=%d activation=%lu last_activation=%lu last_cpu=%d pmd_mask=0x%lx pmc_mask=0x%lx\n",
3495
                        task->pid, owner ? owner->pid : -1,
3496
                        GET_ACTIVATION(), ctx->ctx_last_activation,
3497
                        GET_LAST_CPU(ctx), pmd_mask, pmc_mask));
3498
 
3499
        }
3500
 
3501
        if (pmd_mask) pfm_restore_pmds(t->pmd, pmd_mask);
3502
        if (pmc_mask) pfm_restore_pmcs(t->pmc, pmc_mask);
3503
 
3504
        /*
3505
         * check for pending overflow at the time the state
3506
         * was saved.
3507
         */
3508
        if (PMC0_HAS_OVFL(t->pmc[0])) {
3509
                struct pt_regs *regs = TASK_PTREGS(task);
3510
                pfm_overflow_handler(task, ctx, t->pmc[0], regs);
3511
        }
3512
 
3513
        /*
3514
         * fl_frozen==1 when we are in blocking mode waiting for restart
3515
         */
3516
        if (ctx->ctx_fl_frozen == 0) {
3517
                pfm_unfreeze_pmu();
3518
        }
3519
 
3520
        SET_LAST_CPU(ctx, smp_processor_id());
3521
 
3522
        /*
3523
         * dump activation value for this PMU
3524
         */
3525
        INC_ACTIVATION();
3526
        /*
3527
         * record current activation for this context
3528
         */
3529
        SET_ACTIVATION(ctx);
3530
 
3531
        /*
3532
         * establish new ownership. Interrupts
3533
         * are still masked at this point.
3534
         */
3535
        SET_PMU_OWNER(task);
3536
 
3537
        /*
3538
         * restore the psr we changed
3539
         */
3540
        pfm_set_psr_l(psr);
3541
 
3542
}
3543
#else /*  !CONFIG_SMP */
3544
/*
3545
 * reload PMU state for UP kernels
3546
 */
3547
void
3548
pfm_load_regs (struct task_struct *task)
3549
{
3550
        struct thread_struct *t;
3551
        pfm_context_t *ctx;
3552
        struct task_struct *owner;
3553
        unsigned long pmd_mask, pmc_mask;
3554
        unsigned long prev_pmc0 = ~0UL;
3555
        u64 psr;
3556
 
3557
        owner      = PMU_OWNER();
3558
        ctx        = PFM_GET_CTX(task);
3559
        t          = &task->thread;
3560
 
3561
        /*
3562
         * we restore ALL the debug registers to avoid picking up
3563
         * stale state.
3564
         *
3565
         * This must be done even when the task is still the owner
3566
         * as the registers may have been modified via ptrace()
3567
         * (not perfmon) by the previous task.
3568
         */
3569
        if (ctx->ctx_fl_using_dbreg) {
3570
                pfm_restore_ibrs(t->ibr, pmu_conf.num_ibrs);
3571
                pfm_restore_dbrs(t->dbr, pmu_conf.num_dbrs);
3572
        }
3573
 
3574
        /*
3575
         * retrieved save psr
3576
         */
3577
        psr = ctx->ctx_saved_psr;
3578
 
3579
        /*
3580
         * short path, our state is still there, just
3581
         * need to restore psr and we go
3582
         *
3583
         * we do not touch either PMC nor PMD. the psr is not touched
3584
         * by the overflow_handler. So we are safe w.r.t. to interrupt
3585
         * concurrency even without interrupt masking.
3586
         */
3587
        if (owner == task) {
3588
                pfm_set_psr_l(psr);
3589
                return;
3590
        }
3591
 
3592
        DBprintk(("reload for [%d] owner=%d\n", task->pid, owner ? owner->pid : -1));
3593
 
3594
        /*
3595
         * someone else is still using the PMU, first push it out and
3596
         * then we'll be able to install our stuff !
3597
         *
3598
         * Upon return, there will be no owner for the current PMU
3599
         */
3600
        if (owner) prev_pmc0 = pfm_lazy_save_regs(owner);
3601
        /*
3602
         * To avoid leaking information to the user level when psr.sp=0,
3603
         * we must reload ALL implemented pmds (even the ones we don't use).
3604
         * In the kernel we only allow PFM_READ_PMDS on registers which
3605
         * we initialized or requested (sampling) so there is no risk there.
3606
         */
3607
        pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
3608
 
3609
        /*
3610
         * ALL accessible PMCs are systematically reloaded, unused registers
3611
         * get their default (from pfm_reset_pmu_state()) values to avoid picking
3612
         * up stale configuration.
3613
         *
3614
         * PMC0 is never in the mask. It is always restored separately.
3615
         */
3616
        pmc_mask = ctx->ctx_reload_pmcs[0];
3617
 
3618
        pfm_restore_pmds(t->pmd, pmd_mask);
3619
        pfm_restore_pmcs(t->pmc, pmc_mask);
3620
 
3621
        /*
3622
         * Check for pending overflow when state was last saved.
3623
         * invoked handler is overflow status bits set.
3624
         *
3625
         * Any PMU overflow in flight at this point, will still
3626
         * be treated as spurious because we have no declared
3627
         * owner. Note that the first level interrupt handler
3628
         * DOES NOT TOUCH any PMC except PMC0 for which we have
3629
         * a copy already.
3630
         */
3631
        if (PMC0_HAS_OVFL(t->pmc[0])) {
3632
                struct pt_regs *regs = TASK_PTREGS(task);
3633
                pfm_overflow_handler(task, ctx, t->pmc[0], regs);
3634
        }
3635
 
3636
 
3637
 
3638
        /*
3639
         * fl_frozen==1 when we are in blocking mode waiting for restart
3640
         */
3641
        if (ctx->ctx_fl_frozen == 0) {
3642
                pfm_unfreeze_pmu();
3643
        } else if (prev_pmc0 == 0UL && ctx->ctx_fl_frozen) {
3644
                /*
3645
                 * owner is still NULL at this point.
3646
                 *
3647
                 * if the previous owner (from lazy_save_regs())
3648
                 * was not in frozen state, then we need to freeze
3649
                 * the PMU if the new context is frozen.
3650
                 *
3651
                 * on McKinley this will generate a spurious interrupt
3652
                 * but we have no other way.
3653
                 */
3654
                pfm_freeze_pmu();
3655
        }
3656
 
3657
        /*
3658
         * establish new ownership. If there was an in-flight
3659
         * overflow interrupt, it will be treated as spurious
3660
         * before and after the call, because no overflow
3661
         * status bit can possibly be set. No new overflow
3662
         * can be generated because, at this point, psr.up
3663
         * is still cleared.
3664
         */
3665
        SET_PMU_OWNER(task);
3666
 
3667
        /*
3668
         * restore the psr. This is the point at which
3669
         * new overflow interrupts can be generated again.
3670
         */
3671
        pfm_set_psr_l(psr);
3672
}
3673
#endif /* CONFIG_SMP */
3674
 
3675
/*
3676
 * XXX: make this routine able to work with non current context
3677
 */
3678
static void
3679
pfm_reset_pmu(struct task_struct *task)
3680
{
3681
        struct thread_struct *t = &task->thread;
3682
        pfm_context_t *ctx = t->pfm_context;
3683
        int i;
3684
 
3685
        if (task != current) {
3686
                printk("perfmon: invalid task in pfm_reset_pmu()\n");
3687
                return;
3688
        }
3689
 
3690
        /* Let's make sure the PMU is frozen */
3691
        pfm_freeze_pmu();
3692
 
3693
        /*
3694
         * install reset values for PMC. We skip PMC0 (done above)
3695
         * XX: good up to 64 PMCS
3696
         */
3697
        for (i=1; (pmu_conf.pmc_desc[i].type & PFM_REG_END) == 0; i++) {
3698
                if ((pmu_conf.pmc_desc[i].type & PFM_REG_IMPL) == 0) continue;
3699
                ia64_set_pmc(i, PMC_DFL_VAL(i));
3700
                /*
3701
                 * When restoring context, we must restore ALL pmcs, even the ones
3702
                 * that the task does not use to avoid leaks and possibly corruption
3703
                 * of the sesion because of configuration conflicts. So here, we
3704
                 * initialize the entire set used in the context switch restore routine.
3705
                 */
3706
                t->pmc[i] = PMC_DFL_VAL(i);
3707
                DBprintk(("pmc[%d]=0x%lx\n", i, t->pmc[i]));
3708
        }
3709
 
3710
        /*
3711
         * clear reset values for PMD.
3712
         * XXX: good up to 64 PMDS.
3713
         */
3714
        for (i=0; (pmu_conf.pmd_desc[i].type & PFM_REG_END) == 0; i++) {
3715
                if ((pmu_conf.pmd_desc[i].type & PFM_REG_IMPL) == 0) continue;
3716
                ia64_set_pmd(i, 0UL);
3717
                t->pmd[i] = 0UL;
3718
        }
3719
 
3720
        /*
3721
         * On context switched restore, we must restore ALL pmc and ALL pmd even
3722
         * when they are not actively used by the task. In UP, the incoming process
3723
         * may otherwise pick up left over PMC, PMD state from the previous process.
3724
         * As opposed to PMD, stale PMC can cause harm to the incoming
3725
         * process because they may change what is being measured.
3726
         * Therefore, we must systematically reinstall the entire
3727
         * PMC state. In SMP, the same thing is possible on the
3728
         * same CPU but also on between 2 CPUs.
3729
         *
3730
         * The problem with PMD is information leaking especially
3731
         * to user level when psr.sp=0
3732
         *
3733
         * There is unfortunately no easy way to avoid this problem
3734
         * on either UP or SMP. This definitively slows down the
3735
         * pfm_load_regs() function.
3736
         */
3737
 
3738
         /*
3739
          * We must include all the PMC in this mask to make sure we don't
3740
          * see any side effect of a stale state, such as opcode matching
3741
          * or range restrictions, for instance.
3742
          *
3743
          * We never directly restore PMC0 so we do not include it in the mask.
3744
          */
3745
        ctx->ctx_reload_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1;
3746
        /*
3747
         * We must include all the PMD in this mask to avoid picking
3748
         * up stale value and leak information, especially directly
3749
         * at the user level when psr.sp=0
3750
         */
3751
        ctx->ctx_reload_pmds[0] = pmu_conf.impl_pmds[0];
3752
 
3753
        /*
3754
         * Keep track of the pmds we want to sample
3755
         * XXX: may be we don't need to save/restore the DEAR/IEAR pmds
3756
         * but we do need the BTB for sure. This is because of a hardware
3757
         * buffer of 1 only for non-BTB pmds.
3758
         *
3759
         * We ignore the unimplemented pmds specified by the user
3760
         */
3761
        ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0];
3762
        ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
3763
 
3764
        /*
3765
         * useful in case of re-enable after disable
3766
         */
3767
        ctx->ctx_used_ibrs[0] = 0UL;
3768
        ctx->ctx_used_dbrs[0] = 0UL;
3769
 
3770
        ia64_srlz_d();
3771
}
3772
 
3773
/*
3774
 * This function is called when a thread exits (from exit_thread()).
3775
 * This is a simplified pfm_save_regs() that simply flushes the current
3776
 * register state into the save area taking into account any pending
3777
 * overflow. This time no notification is sent because the task is dying
3778
 * anyway. The inline processing of overflows avoids loosing some counts.
3779
 * The PMU is frozen on exit from this call and is to never be reenabled
3780
 * again for this task.
3781
 *
3782
 */
3783
void
3784
pfm_flush_regs (struct task_struct *task)
3785
{
3786
        pfm_context_t *ctx;
3787
        u64 pmc0;
3788
        unsigned long mask2, val;
3789
        int i;
3790
 
3791
        ctx = task->thread.pfm_context;
3792
 
3793
        if (ctx == NULL) return;
3794
 
3795
        /*
3796
         * that's it if context already disabled
3797
         */
3798
        if (ctx->ctx_flags.state == PFM_CTX_DISABLED) return;
3799
 
3800
        /*
3801
         * stop monitoring:
3802
         * This is the only way to stop monitoring without destroying overflow
3803
         * information in PMC[0].
3804
         * This is the last instruction which can cause overflow when monitoring
3805
         * in kernel.
3806
         * By now, we could still have an overflow interrupt in-flight.
3807
         */
3808
        if (ctx->ctx_fl_system) {
3809
 
3810
                /* disable dcr pp */
3811
                ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
3812
 
3813
                /* stop monitoring */
3814
                pfm_clear_psr_pp();
3815
                ia64_srlz_i();
3816
 
3817
                PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
3818
                PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
3819
                PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
3820
        } else  {
3821
 
3822
                /* stop monitoring */
3823
                pfm_clear_psr_up();
3824
                ia64_srlz_i();
3825
 
3826
                /* no more save/restore on ctxsw */
3827
                current->thread.flags &= ~IA64_THREAD_PM_VALID;
3828
        }
3829
 
3830
        /*
3831
         * Mark the PMU as not owned
3832
         * This will cause the interrupt handler to do nothing in case an overflow
3833
         * interrupt was in-flight
3834
         * This also guarantees that pmc0 will contain the final state
3835
         * It virtually gives us full control on overflow processing from that point
3836
         * on.
3837
         * It must be an atomic operation.
3838
         */
3839
        SET_PMU_OWNER(NULL);
3840
 
3841
        /*
3842
         * read current overflow status:
3843
         *
3844
         * we are guaranteed to read the final stable state
3845
         */
3846
        ia64_srlz_d();
3847
        pmc0 = ia64_get_pmc(0); /* slow */
3848
 
3849
        /*
3850
         * freeze PMU:
3851
         *
3852
         * This destroys the overflow information. This is required to make sure
3853
         * next process does not start with monitoring on if not requested
3854
         */
3855
        pfm_freeze_pmu();
3856
 
3857
        /*
3858
         * We don't need to restore psr, because we are on our way out
3859
         */
3860
 
3861
        /*
3862
         * This loop flushes the PMD into the PFM context.
3863
         * It also processes overflow inline.
3864
         *
3865
         * IMPORTANT: No notification is sent at this point as the process is dying.
3866
         * The implicit notification will come from a SIGCHILD or a return from a
3867
         * waitpid().
3868
         *
3869
         */
3870
#ifdef CONFIG_SMP
3871
        if (GET_LAST_CPU(ctx) != smp_processor_id())
3872
                printk(KERN_DEBUG "perfmon: [%d] last_cpu=%d\n",
3873
                       task->pid, GET_LAST_CPU(ctx));
3874
#endif
3875
 
3876
        /*
3877
         * we save all the used pmds
3878
         * we take care of overflows for pmds used as counters
3879
         */
3880
        mask2 = ctx->ctx_used_pmds[0];
3881
        for (i = 0; mask2; i++, mask2>>=1) {
3882
 
3883
                /* skip non used pmds */
3884
                if ((mask2 & 0x1) == 0) continue;
3885
 
3886
                val = ia64_get_pmd(i);
3887
 
3888
                if (PMD_IS_COUNTING(i)) {
3889
                        DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n",
3890
                                task->pid,
3891
                                i,
3892
                                ctx->ctx_soft_pmds[i].val,
3893
                                val & pmu_conf.ovfl_val));
3894
 
3895
                        /* collect latest results */
3896
                        ctx->ctx_soft_pmds[i].val += val & pmu_conf.ovfl_val;
3897
 
3898
                        /*
3899
                         * now everything is in ctx_soft_pmds[] and we need
3900
                         * to clear the saved context from save_regs() such that
3901
                         * pfm_read_pmds() gets the correct value
3902
                         */
3903
                        task->thread.pmd[i] = 0;
3904
 
3905
                        /*
3906
                         * take care of overflow inline
3907
                         */
3908
                        if (pmc0 & (1UL << i)) {
3909
                                ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
3910
                                DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
3911
                                        task->pid, i, ctx->ctx_soft_pmds[i].val));
3912
                        }
3913
                } else {
3914
                        DBprintk(("[%d] pmd[%d] hw_pmd=0x%lx\n", task->pid, i, val));
3915
                        /*
3916
                         * not a counter, just save value as is
3917
                         */
3918
                        task->thread.pmd[i] = val;
3919
                }
3920
        }
3921
        SET_LAST_CPU(ctx, -1);
3922
}
3923
 
3924
 
3925
/*
3926
 * task is the newly created task, pt_regs for new child
3927
 */
3928
int
3929
pfm_inherit(struct task_struct *task, struct pt_regs *regs)
3930
{
3931
        pfm_context_t *ctx;
3932
        pfm_context_t *nctx;
3933
        struct thread_struct *thread;
3934
        unsigned long m;
3935
        int i;
3936
 
3937
        /*
3938
         * the new task was copied from parent and therefore points
3939
         * to the parent's context at this point
3940
         */
3941
        ctx    = task->thread.pfm_context;
3942
        thread = &task->thread;
3943
 
3944
        /*
3945
         * for secure sessions, make sure child cannot mess up
3946
         * the monitoring session.
3947
         */
3948
        if (ctx->ctx_fl_unsecure == 0) {
3949
                ia64_psr(regs)->sp = 1;
3950
                DBprintk(("enabling psr.sp for [%d]\n", task->pid));
3951
        } else {
3952
                DBprintk(("psr.sp=%d [%d]\n", ia64_psr(regs)->sp, task->pid));
3953
        }
3954
 
3955
 
3956
        /*
3957
         * if there was a virtual mapping for the sampling buffer
3958
         * the mapping is NOT inherited across fork() (see VM_DONTCOPY),
3959
         * so we don't have to explicitely remove it here.
3960
         *
3961
         *
3962
         * Part of the clearing of fields is also done in
3963
         * copy_thread() because the fiels are outside the
3964
         * pfm_context structure and can affect tasks not
3965
         * using perfmon.
3966
         */
3967
 
3968
        /* clear pending notification */
3969
        task->thread.pfm_ovfl_block_reset = 0;
3970
 
3971
        /*
3972
         * clear cpu pinning restriction for child
3973
         */
3974
        if (ctx->ctx_fl_system) {
3975
                task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
3976
                task->need_resched = 1;
3977
 
3978
                DBprintk(("setting cpus_allowed for [%d] to 0x%lx from 0x%lx\n",
3979
                        task->pid,
3980
                        ctx->ctx_saved_cpus_allowed,
3981
                        current->cpus_allowed));
3982
        }
3983
 
3984
        /*
3985
         * takes care of easiest case first
3986
         */
3987
        if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_NONE) {
3988
 
3989
                DBprintk(("removing PFM context for [%d]\n", task->pid));
3990
 
3991
                task->thread.pfm_context = NULL;
3992
 
3993
                /*
3994
                 * we must clear psr.up because the new child does
3995
                 * not have a context and the PM_VALID flag is cleared
3996
                 * in copy_thread().
3997
                 *
3998
                 * we do not clear psr.pp because it is always
3999
                 * controlled by the system wide logic and we should
4000
                 * never be here when system wide is running anyway
4001
                 */
4002
                ia64_psr(regs)->up = 0;
4003
 
4004
                /* copy_thread() clears IA64_THREAD_PM_VALID */
4005
                return 0;
4006
        }
4007
        nctx = pfm_context_alloc();
4008
        if (nctx == NULL) return -ENOMEM;
4009
 
4010
        /* copy content */
4011
        *nctx = *ctx;
4012
 
4013
        if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_ONCE) {
4014
                nctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
4015
                DBprintk(("downgrading to INHERIT_NONE for [%d]\n", task->pid));
4016
                /*
4017
                 * downgrade parent: once means only first child!
4018
                 */
4019
                ctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
4020
        }
4021
        /*
4022
         * task is not yet visible in the tasklist, so we do
4023
         * not need to lock the newly created context.
4024
         * However, we must grab the tasklist_lock to ensure
4025
         * that the ctx_owner or ctx_notify_task do not disappear
4026
         * while we increment their check counters.
4027
         */
4028
        read_lock(&tasklist_lock);
4029
 
4030
        if (nctx->ctx_notify_task)
4031
                atomic_inc(&nctx->ctx_notify_task->thread.pfm_notifiers_check);
4032
 
4033
        if (nctx->ctx_owner)
4034
                atomic_inc(&nctx->ctx_owner->thread.pfm_owners_check);
4035
 
4036
        read_unlock(&tasklist_lock);
4037
 
4038
 
4039
        LOCK_PFS();
4040
        pfm_sessions.pfs_task_sessions++;
4041
        UNLOCK_PFS();
4042
 
4043
        /* initialize counters in new context */
4044
        m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
4045
        for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
4046
                if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
4047
                        nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.ovfl_val;
4048
                        thread->pmd[i]             = nctx->ctx_soft_pmds[i].lval & pmu_conf.ovfl_val;
4049
                } else {
4050
                        thread->pmd[i]             = 0UL; /* reset to initial state */
4051
                }
4052
        }
4053
 
4054
        nctx->ctx_fl_frozen    = 0;
4055
        nctx->ctx_ovfl_regs[0] = 0UL;
4056
        SET_LAST_CPU(nctx, -1);
4057
 
4058
        /*
4059
         * here nctx->ctx_psb == ctx->ctx_psb
4060
         *
4061
         * increment reference count to sampling
4062
         * buffer, if any. Note that this is independent
4063
         * from the virtual mapping. The latter is never
4064
         * inherited while the former will be if context
4065
         * is setup to something different from PFM_FL_INHERIT_NONE
4066
         */
4067
        if (nctx->ctx_psb) {
4068
                LOCK_PSB(nctx->ctx_psb);
4069
 
4070
                nctx->ctx_psb->psb_refcnt++;
4071
 
4072
                DBprintk(("updated smpl @ %p refcnt=%lu psb_flags=0x%x\n",
4073
                        ctx->ctx_psb->psb_hdr,
4074
                        ctx->ctx_psb->psb_refcnt,
4075
                        ctx->ctx_psb->psb_flags));
4076
 
4077
                UNLOCK_PSB(nctx->ctx_psb);
4078
 
4079
                /*
4080
                 * remove any pointer to sampling buffer mapping
4081
                 */
4082
                nctx->ctx_smpl_vaddr = 0;
4083
        }
4084
 
4085
        sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
4086
 
4087
        /*
4088
         * propagate kernel psr in new context (used for first ctxsw in
4089
         */
4090
        nctx->ctx_saved_psr = pfm_get_psr();
4091
 
4092
        /*
4093
         * force a full reload on ctxsw in
4094
         */
4095
        nctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
4096
        SET_LAST_CPU(nctx, -1);
4097
 
4098
        /*
4099
         * initialize tasklet for signal notifications
4100
         *
4101
         * ALL signal-based (or any notification using data structures
4102
         * external to perfmon) MUST use tasklets to avoid lock contentions
4103
         * when a signal has to be sent for overflow interrupt handler.
4104
         */
4105
        tasklet_init(&nctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)nctx);
4106
 
4107
        /* link with new task */
4108
        thread->pfm_context = nctx;
4109
 
4110
        DBprintk(("nctx=%p for process [%d]\n", (void *)nctx, task->pid));
4111
 
4112
        /*
4113
         * the copy_thread routine automatically clears
4114
         * IA64_THREAD_PM_VALID, so we need to reenable it, if it was used by the caller
4115
         */
4116
        if (current->thread.flags & IA64_THREAD_PM_VALID) {
4117
                DBprintk(("setting PM_VALID for [%d]\n", task->pid));
4118
                thread->flags |= IA64_THREAD_PM_VALID;
4119
        }
4120
        return 0;
4121
}
4122
 
4123
/*
4124
 *
4125
 * We cannot touch any of the PMU registers at this point as we may
4126
 * not be running on the same CPU the task was last run on.  Therefore
4127
 * it is assumed that the PMU has been stopped appropriately in
4128
 * pfm_flush_regs() called from exit_thread().
4129
 *
4130
 * The function is called in the context of the parent via a release_thread()
4131
 * and wait4(). The task is not in the tasklist anymore.
4132
 */
4133
void
4134
pfm_context_exit(struct task_struct *task)
4135
{
4136
        pfm_context_t *ctx = task->thread.pfm_context;
4137
 
4138
        /*
4139
         * check sampling buffer
4140
         */
4141
        if (ctx->ctx_psb) {
4142
                pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
4143
 
4144
                LOCK_PSB(psb);
4145
 
4146
                DBprintk(("sampling buffer from [%d] @%p size %ld refcnt=%lu psb_flags=0x%x\n",
4147
                        task->pid,
4148
                        psb->psb_hdr, psb->psb_size, psb->psb_refcnt, psb->psb_flags));
4149
 
4150
                /*
4151
                 * in the case where we are the last user, we may be able to free
4152
                 * the buffer
4153
                 */
4154
                psb->psb_refcnt--;
4155
 
4156
                if (psb->psb_refcnt == 0) {
4157
 
4158
                        /*
4159
                         * The flag is cleared in pfm_vm_close(). which gets
4160
                         * called from do_exit() via exit_mm().
4161
                         * By the time we come here, the task has no more mm context.
4162
                         *
4163
                         * We can only free the psb and buffer here after the vm area
4164
                         * describing the buffer has been removed. This normally happens
4165
                         * as part of do_exit() but the entire mm context is ONLY removed
4166
                         * once its reference counts goes to zero. This is typically
4167
                         * the case except for multi-threaded (several tasks) processes.
4168
                         *
4169
                         * See pfm_vm_close() and pfm_cleanup_smpl_buf() for more details.
4170
                         */
4171
                        if ((psb->psb_flags & PSB_HAS_VMA) == 0) {
4172
 
4173
                                DBprintk(("cleaning sampling buffer from [%d] @%p size %ld\n",
4174
                                        task->pid,
4175
                                        psb->psb_hdr, psb->psb_size));
4176
 
4177
                                /*
4178
                                 * free the buffer and psb
4179
                                 */
4180
                                pfm_rvfree(psb->psb_hdr, psb->psb_size);
4181
                                kfree(psb);
4182
                                psb = NULL;
4183
                        }
4184
                }
4185
                /* psb may have been deleted */
4186
                if (psb) UNLOCK_PSB(psb);
4187
        }
4188
 
4189
        DBprintk(("cleaning [%d] pfm_context @%p notify_task=%p check=%d mm=%p\n",
4190
                task->pid, ctx,
4191
                ctx->ctx_notify_task,
4192
                atomic_read(&task->thread.pfm_notifiers_check), task->mm));
4193
 
4194
        /*
4195
         * To avoid getting the notified task or owner task scan the entire process
4196
         * list when they exit, we decrement notifiers_check and owners_check respectively.
4197
         *
4198
         * Of course, there is race condition between decreasing the value and the
4199
         * task exiting. The danger comes from the fact that, in both cases, we have a
4200
         * direct pointer to a task structure thereby bypassing the tasklist.
4201
         * We must make sure that, if we have task!= NULL, the target task is still
4202
         * present and is identical to the initial task specified
4203
         * during pfm_context_create(). It may already be detached from the tasklist but
4204
         * that's okay. Note that it is okay if we miss the deadline and the task scans
4205
         * the list for nothing, it will affect performance but not correctness.
4206
         * The correctness is ensured by using the ctx_lock which prevents the
4207
         * notify_task from changing the fields in our context.
4208
         * Once holdhing this lock, if we see task!= NULL, then it will stay like
4209
         * that until we release the lock. If it is NULL already then we came too late.
4210
         */
4211
        LOCK_CTX(ctx);
4212
 
4213
        if (ctx->ctx_notify_task != NULL) {
4214
                DBprintk(("[%d], [%d] atomic_sub on [%d] notifiers=%u\n", current->pid,
4215
                        task->pid,
4216
                        ctx->ctx_notify_task->pid,
4217
                        atomic_read(&ctx->ctx_notify_task->thread.pfm_notifiers_check)));
4218
 
4219
                atomic_dec(&ctx->ctx_notify_task->thread.pfm_notifiers_check);
4220
        }
4221
 
4222
        if (ctx->ctx_owner != NULL) {
4223
                DBprintk(("[%d], [%d] atomic_sub on [%d] owners=%u\n",
4224
                         current->pid,
4225
                         task->pid,
4226
                         ctx->ctx_owner->pid,
4227
                         atomic_read(&ctx->ctx_owner->thread.pfm_owners_check)));
4228
 
4229
                atomic_dec(&ctx->ctx_owner->thread.pfm_owners_check);
4230
        }
4231
 
4232
        UNLOCK_CTX(ctx);
4233
 
4234
        pfm_unreserve_session(task, ctx->ctx_fl_system, 1UL << ctx->ctx_cpu);
4235
 
4236
        if (ctx->ctx_fl_system) {
4237
                /*
4238
                 * remove any CPU pinning
4239
                 */
4240
                task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
4241
                task->need_resched = 1;
4242
        }
4243
 
4244
        pfm_context_free(ctx);
4245
        /*
4246
         *  clean pfm state in thread structure,
4247
         */
4248
        task->thread.pfm_context          = NULL;
4249
        task->thread.pfm_ovfl_block_reset = 0;
4250
 
4251
        /* pfm_notifiers is cleaned in pfm_cleanup_notifiers() */
4252
}
4253
 
4254
/*
4255
 * function invoked from release_thread when pfm_smpl_buf_list is not NULL
4256
 */
4257
int
4258
pfm_cleanup_smpl_buf(struct task_struct *task)
4259
{
4260
        pfm_smpl_buffer_desc_t *tmp, *psb = task->thread.pfm_smpl_buf_list;
4261
 
4262
        if (psb == NULL) {
4263
                printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
4264
                return -1;
4265
        }
4266
        /*
4267
         * Walk through the list and free the sampling buffer and psb
4268
         */
4269
        while (psb) {
4270
                DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size));
4271
 
4272
                pfm_rvfree(psb->psb_hdr, psb->psb_size);
4273
                tmp = psb->psb_next;
4274
                kfree(psb);
4275
                psb = tmp;
4276
        }
4277
 
4278
        /* just in case */
4279
        task->thread.pfm_smpl_buf_list = NULL;
4280
 
4281
        return 0;
4282
}
4283
 
4284
/*
4285
 * function invoked from release_thread to make sure that the ctx_owner field does not
4286
 * point to an unexisting task.
4287
 */
4288
void
4289
pfm_cleanup_owners(struct task_struct *task)
4290
{
4291
        struct task_struct *p;
4292
        pfm_context_t *ctx;
4293
 
4294
        DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
4295
 
4296
        read_lock(&tasklist_lock);
4297
 
4298
        for_each_task(p) {
4299
                /*
4300
                 * It is safe to do the 2-step test here, because thread.ctx
4301
                 * is cleaned up only in release_thread() and at that point
4302
                 * the task has been detached from the tasklist which is an
4303
                 * operation which uses the write_lock() on the tasklist_lock
4304
                 * so it cannot run concurrently to this loop. So we have the
4305
                 * guarantee that if we find p and it has a perfmon ctx then
4306
                 * it is going to stay like this for the entire execution of this
4307
                 * loop.
4308
                 */
4309
                ctx = p->thread.pfm_context;
4310
 
4311
                //DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
4312
 
4313
                if (ctx && ctx->ctx_owner == task) {
4314
                        DBprintk(("trying for owner [%d] in [%d]\n", task->pid, p->pid));
4315
                        /*
4316
                         * the spinlock is required to take care of a race condition
4317
                         * with the send_sig_info() call. We must make sure that
4318
                         * either the send_sig_info() completes using a valid task,
4319
                         * or the notify_task is cleared before the send_sig_info()
4320
                         * can pick up a stale value. Note that by the time this
4321
                         * function is executed the 'task' is already detached from the
4322
                         * tasklist. The problem is that the notifiers have a direct
4323
                         * pointer to it. It is okay to send a signal to a task in this
4324
                         * stage, it simply will have no effect. But it is better than sending
4325
                         * to a completely destroyed task or worse to a new task using the same
4326
                         * task_struct address.
4327
                         */
4328
                        LOCK_CTX(ctx);
4329
 
4330
                        ctx->ctx_owner = NULL;
4331
 
4332
                        UNLOCK_CTX(ctx);
4333
 
4334
                        DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
4335
                }
4336
        }
4337
        read_unlock(&tasklist_lock);
4338
 
4339
        atomic_set(&task->thread.pfm_owners_check, 0);
4340
}
4341
 
4342
 
4343
/*
4344
 * function called from release_thread to make sure that the ctx_notify_task is not pointing
4345
 * to an unexisting task
4346
 */
4347
void
4348
pfm_cleanup_notifiers(struct task_struct *task)
4349
{
4350
        struct task_struct *p;
4351
        pfm_context_t *ctx;
4352
 
4353
        DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
4354
 
4355
        read_lock(&tasklist_lock);
4356
 
4357
        for_each_task(p) {
4358
                /*
4359
                 * It is safe to do the 2-step test here, because thread.ctx
4360
                 * is cleaned up only in release_thread() and at that point
4361
                 * the task has been detached from the tasklist which is an
4362
                 * operation which uses the write_lock() on the tasklist_lock
4363
                 * so it cannot run concurrently to this loop. So we have the
4364
                 * guarantee that if we find p and it has a perfmon ctx then
4365
                 * it is going to stay like this for the entire execution of this
4366
                 * loop.
4367
                 */
4368
                ctx = p->thread.pfm_context;
4369
 
4370
                //DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
4371
 
4372
                if (ctx && ctx->ctx_notify_task == task) {
4373
                        DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid));
4374
                        /*
4375
                         * the spinlock is required to take care of a race condition
4376
                         * with the send_sig_info() call. We must make sure that
4377
                         * either the send_sig_info() completes using a valid task,
4378
                         * or the notify_task is cleared before the send_sig_info()
4379
                         * can pick up a stale value. Note that by the time this
4380
                         * function is executed the 'task' is already detached from the
4381
                         * tasklist. The problem is that the notifiers have a direct
4382
                         * pointer to it. It is okay to send a signal to a task in this
4383
                         * stage, it simply will have no effect. But it is better than sending
4384
                         * to a completely destroyed task or worse to a new task using the same
4385
                         * task_struct address.
4386
                         */
4387
                        LOCK_CTX(ctx);
4388
 
4389
                        ctx->ctx_notify_task = NULL;
4390
 
4391
                        UNLOCK_CTX(ctx);
4392
 
4393
                        DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
4394
                }
4395
        }
4396
        read_unlock(&tasklist_lock);
4397
 
4398
        atomic_set(&task->thread.pfm_notifiers_check, 0);
4399
}
4400
 
4401
static struct irqaction perfmon_irqaction = {
4402
        .handler = pfm_interrupt_handler,
4403
        .flags   = SA_INTERRUPT,
4404
        .name    = "perfmon"
4405
};
4406
 
4407
int
4408
pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
4409
{
4410
        int ret;
4411
 
4412
        /* some sanity checks */
4413
        if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
4414
 
4415
        /* do the easy test first */
4416
        if (pfm_alternate_intr_handler) return -EBUSY;
4417
 
4418
        /* reserve our session */
4419
        ret = pfm_reserve_session(NULL, 1, cpu_online_map);
4420
        if (ret) return ret;
4421
 
4422
        if (pfm_alternate_intr_handler) {
4423
                printk(KERN_DEBUG "perfmon: install_alternate, intr_handler not NULL "
4424
                       "after reserve\n");
4425
                return -EINVAL;
4426
        }
4427
 
4428
        pfm_alternate_intr_handler = hdl;
4429
 
4430
        return 0;
4431
}
4432
 
4433
int
4434
pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
4435
{
4436
        if (hdl == NULL) return -EINVAL;
4437
 
4438
        /* cannot remove someone else's handler! */
4439
        if (pfm_alternate_intr_handler != hdl) return -EINVAL;
4440
 
4441
        pfm_alternate_intr_handler = NULL;
4442
 
4443
        /*
4444
         * XXX: assume cpu_online_map has not changed since reservation
4445
         */
4446
        pfm_unreserve_session(NULL, 1, cpu_online_map);
4447
 
4448
        return 0;
4449
}
4450
 
4451
/*
4452
 * perfmon initialization routine, called from the initcall() table
4453
 */
4454
int __init
4455
pfm_init(void)
4456
{
4457
        unsigned int n, n_counters, i;
4458
 
4459
        pmu_conf.disabled = 1;
4460
 
4461
        printk(KERN_INFO "perfmon: version %u.%u IRQ %u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN,
4462
               IA64_PERFMON_VECTOR);
4463
 
4464
        /*
4465
         * compute the number of implemented PMD/PMC from the
4466
         * description tables
4467
         */
4468
        n = 0;
4469
        for (i=0; PMC_IS_LAST(i) == 0;  i++) {
4470
                if (PMC_IS_IMPL(i) == 0) continue;
4471
                pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63);
4472
                n++;
4473
        }
4474
        pmu_conf.num_pmcs = n;
4475
 
4476
        n = 0; n_counters = 0;
4477
        for (i=0; PMD_IS_LAST(i) == 0;  i++) {
4478
                if (PMD_IS_IMPL(i) == 0) continue;
4479
                pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63);
4480
                n++;
4481
                if (PMD_IS_COUNTING(i)) n_counters++;
4482
        }
4483
        pmu_conf.num_pmds      = n;
4484
        pmu_conf.num_counters  = n_counters;
4485
 
4486
        printk(KERN_INFO "perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n",
4487
               pmu_conf.num_pmcs,
4488
               pmu_conf.num_pmds,
4489
               pmu_conf.num_counters,
4490
               ffz(pmu_conf.ovfl_val));
4491
 
4492
        /* sanity check */
4493
        if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
4494
                printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
4495
                return -1;
4496
        }
4497
 
4498
        /*
4499
         * for now here for debug purposes
4500
         */
4501
        perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
4502
        if (perfmon_dir == NULL) {
4503
                printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
4504
                return -1;
4505
        }
4506
 
4507
        /*
4508
         * create /proc/sys/kernel/perfmon
4509
         */
4510
        pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
4511
 
4512
        /*
4513
         * initialize all our spinlocks
4514
         */
4515
        spin_lock_init(&pfm_sessions.pfs_lock);
4516
 
4517
        /* we are all set */
4518
        pmu_conf.disabled = 0;
4519
 
4520
        return 0;
4521
}
4522
 
4523
__initcall(pfm_init);
4524
 
4525
void
4526
pfm_init_percpu(void)
4527
{
4528
        int i;
4529
 
4530
        if (smp_processor_id() == 0)
4531
                register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
4532
 
4533
        ia64_set_pmv(IA64_PERFMON_VECTOR);
4534
        ia64_srlz_d();
4535
 
4536
        /*
4537
         * we first initialize the PMU to a stable state.
4538
         * the values may have been changed from their power-up
4539
         * values by software executed before the kernel took over.
4540
         *
4541
         * At this point, pmu_conf has not yet been initialized
4542
         *
4543
         * On McKinley, this code is ineffective until PMC4 is initialized.
4544
         */
4545
        for (i=1; PMC_IS_LAST(i) == 0;  i++) {
4546
                if (PMC_IS_IMPL(i) == 0) continue;
4547
                ia64_set_pmc(i, PMC_DFL_VAL(i));
4548
        }
4549
 
4550
        for (i=0; PMD_IS_LAST(i); i++) {
4551
                if (PMD_IS_IMPL(i) == 0) continue;
4552
                ia64_set_pmd(i, 0UL);
4553
        }
4554
        pfm_freeze_pmu();
4555
}
4556
 
4557
#else /* !CONFIG_PERFMON */
4558
 
4559
asmlinkage long
4560
sys_perfmonctl (int pid, int cmd, void *req, int count, long arg5, long arg6,
4561
                long arg7, long arg8, long stack)
4562
{
4563
        return -ENOSYS;
4564
}
4565
 
4566
#endif /* !CONFIG_PERFMON */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.