URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [lib/] [proportions.c] - Blame information for rev 81

Go to most recent revision | Details | Compare with Previous | View Log


/*
 * Floating proportions
 *
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Description:
 *
 * The floating proportion is a time derivative with an exponentially decaying
 * history:
 *
 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 *
 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 * and i the time period over which the differential is taken. So d/dt_{-i} is
 * the differential over the i-th last period.
 *
 * The decaying history gives smooth transitions. The time differential carries
 * the notion of speed.
 *
 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 *
 *   \Sum_{i=0} 1/2^(1+i) = 1
 *
 * Further more, if we measure time (t) in the same events as x; so that:
 *
 *   t = \Sum_{j} x_{j}
 *
 * we get that:
 *
 *   \Sum_{j} p_{j} = 1
 *
 * Writing this in an iterative fashion we get (dropping the 'd's):
 *
 *   if (++x_{j}, ++t > period)
 *     t /= 2;
 *     for_each (j)
 *       x_{j} /= 2;
 *
 * so that:
 *
 *   p_{j} = x_{j} / t;
 *
 * We optimize away the '/= 2' for the global time delta by noting that:
 *
 *   if (++t > period) t /= 2:
 *
 * Can be approximated by:
 *
 *   period/2 + (++t % period/2)
 *
 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 *   binary operations and wraparound artefacts disappear. ]
 *
 * Also note that this yields a natural counter of the elapsed periods:
 *
 *   c = t / (period/2)
 *
 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 *   around issue. ]
 *
 * This allows us to do away with the loop over all prop_locals on each period
 * expiration. By remembering the period count under which it was last accessed
 * as c_{j}, we can obtain the number of 'missed' cycles from:
 *
 *   c - c_{j}
 *
 * We can then lazily catch up to the global period count every time we are
 * going to use x_{j}, by doing:
 *
 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 */
 
#include <linux/proportions.h>
#include <linux/rcupdate.h>
 
/*
 * Limit the time part in order to ensure there are some bits left for the
 * cycle counter.
 */
#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
 
int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
        int err;
 
        if (shift > PROP_MAX_SHIFT)
                shift = PROP_MAX_SHIFT;
 
        pd->index = 0;
        pd->pg[0].shift = shift;
        mutex_init(&pd->mutex);
        err = percpu_counter_init_irq(&pd->pg[0].events, 0);
        if (err)
                goto out;
 
        err = percpu_counter_init_irq(&pd->pg[1].events, 0);
        if (err)
                percpu_counter_destroy(&pd->pg[0].events);
 
out:
        return err;
}
 
/*
 * We have two copies, and flip between them to make it seem like an atomic
 * update. The update is not really atomic wrt the events counter, but
 * it is internally consistent with the bit layout depending on shift.
 *
 * We copy the events count, move the bits around and flip the index.
 */
void prop_change_shift(struct prop_descriptor *pd, int shift)
{
        int index;
        int offset;
        u64 events;
        unsigned long flags;
 
        if (shift > PROP_MAX_SHIFT)
                shift = PROP_MAX_SHIFT;
 
        mutex_lock(&pd->mutex);
 
        index = pd->index ^ 1;
        offset = pd->pg[pd->index].shift - shift;
        if (!offset)
                goto out;
 
        pd->pg[index].shift = shift;
 
        local_irq_save(flags);
        events = percpu_counter_sum(&pd->pg[pd->index].events);
        if (offset < 0)
                events <<= -offset;
        else
                events >>= offset;
        percpu_counter_set(&pd->pg[index].events, events);
 
        /*
         * ensure the new pg is fully written before the switch
         */
        smp_wmb();
        pd->index = index;
        local_irq_restore(flags);
 
        synchronize_rcu();
 
out:
        mutex_unlock(&pd->mutex);
}
 
/*
 * wrap the access to the data in an rcu_read_lock() section;
 * this is used to track the active references.
 */
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
{
        int index;
 
        rcu_read_lock();
        index = pd->index;
        /*
         * match the wmb from vcd_flip()
         */
        smp_rmb();
        return &pd->pg[index];
}
 
static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
{
        rcu_read_unlock();
}
 
static void
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
{
        int offset = *pl_shift - new_shift;
 
        if (!offset)
                return;
 
        if (offset < 0)
                *pl_period <<= -offset;
        else
                *pl_period >>= offset;
 
        *pl_shift = new_shift;
}
 
/*
 * PERCPU
 */
 
#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
int prop_local_init_percpu(struct prop_local_percpu *pl)
{
        spin_lock_init(&pl->lock);
        pl->shift = 0;
        pl->period = 0;
        return percpu_counter_init_irq(&pl->events, 0);
}
 
void prop_local_destroy_percpu(struct prop_local_percpu *pl)
{
        percpu_counter_destroy(&pl->events);
}
 
/*
 * Catch up with missed period expirations.
 *
 *   until (c_{j} == c)
 *     x_{j} -= x_{j}/2;
 *     c_{j}++;
 */
static
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
{
        unsigned long period = 1UL << (pg->shift - 1);
        unsigned long period_mask = ~(period - 1);
        unsigned long global_period;
        unsigned long flags;
 
        global_period = percpu_counter_read(&pg->events);
        global_period &= period_mask;
 
        /*
         * Fast path - check if the local and global period count still match
         * outside of the lock.
         */
        if (pl->period == global_period)
                return;
 
        spin_lock_irqsave(&pl->lock, flags);
        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 
        /*
         * For each missed period, we half the local counter.
         * basically:
         *   pl->events >> (global_period - pl->period);
         */
        period = (global_period - pl->period) >> (pg->shift - 1);
        if (period < BITS_PER_LONG) {
                s64 val = percpu_counter_read(&pl->events);
 
                if (val < (nr_cpu_ids * PROP_BATCH))
                        val = percpu_counter_sum(&pl->events);
 
                __percpu_counter_add(&pl->events, -val + (val >> period),
                                        PROP_BATCH);
        } else
                percpu_counter_set(&pl->events, 0);
 
        pl->period = global_period;
        spin_unlock_irqrestore(&pl->lock, flags);
}
 
/*
 *   ++x_{j}, ++t
 */
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
{
        struct prop_global *pg = prop_get_global(pd);
 
        prop_norm_percpu(pg, pl);
        __percpu_counter_add(&pl->events, 1, PROP_BATCH);
        percpu_counter_add(&pg->events, 1);
        prop_put_global(pd, pg);
}
 
/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_percpu(struct prop_descriptor *pd,
                struct prop_local_percpu *pl,
                long *numerator, long *denominator)
{
        struct prop_global *pg = prop_get_global(pd);
        unsigned long period_2 = 1UL << (pg->shift - 1);
        unsigned long counter_mask = period_2 - 1;
        unsigned long global_count;
 
        prop_norm_percpu(pg, pl);
        *numerator = percpu_counter_read_positive(&pl->events);
 
        global_count = percpu_counter_read(&pg->events);
        *denominator = period_2 + (global_count & counter_mask);
 
        prop_put_global(pd, pg);
}
 
/*
 * SINGLE
 */
 
int prop_local_init_single(struct prop_local_single *pl)
{
        spin_lock_init(&pl->lock);
        pl->shift = 0;
        pl->period = 0;
        pl->events = 0;
        return 0;
}
 
void prop_local_destroy_single(struct prop_local_single *pl)
{
}
 
/*
 * Catch up with missed period expirations.
 */
static
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
{
        unsigned long period = 1UL << (pg->shift - 1);
        unsigned long period_mask = ~(period - 1);
        unsigned long global_period;
        unsigned long flags;
 
        global_period = percpu_counter_read(&pg->events);
        global_period &= period_mask;
 
        /*
         * Fast path - check if the local and global period count still match
         * outside of the lock.
         */
        if (pl->period == global_period)
                return;
 
        spin_lock_irqsave(&pl->lock, flags);
        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
        /*
         * For each missed period, we half the local counter.
         */
        period = (global_period - pl->period) >> (pg->shift - 1);
        if (likely(period < BITS_PER_LONG))
                pl->events >>= period;
        else
                pl->events = 0;
        pl->period = global_period;
        spin_unlock_irqrestore(&pl->lock, flags);
}
 
/*
 *   ++x_{j}, ++t
 */
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
{
        struct prop_global *pg = prop_get_global(pd);
 
        prop_norm_single(pg, pl);
        pl->events++;
        percpu_counter_add(&pg->events, 1);
        prop_put_global(pd, pg);
}
 
/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_single(struct prop_descriptor *pd,
                struct prop_local_single *pl,
                long *numerator, long *denominator)
{
        struct prop_global *pg = prop_get_global(pd);
        unsigned long period_2 = 1UL << (pg->shift - 1);
        unsigned long counter_mask = period_2 - 1;
        unsigned long global_count;
 
        prop_norm_single(pg, pl);
        *numerator = pl->events;
 
        global_count = percpu_counter_read(&pg->events);
        *denominator = period_2 + (global_count & counter_mask);
 
        prop_put_global(pd, pg);
}

Browse

Tools

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [lib/] [proportions.c] - Blame information for rev 81

Line No.	Rev	Author	Line
1	62	marcus.erl	`/*`
2			`* Floating proportions`
3			`*`
4			`* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>`
5			`*`
6			`* Description:`
7			`*`
8			`* The floating proportion is a time derivative with an exponentially decaying`
9			`* history:`
10			`*`
11			`* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)`
12			`*`
13			`* Where j is an element from {prop_local}, x_{j} is j's number of events,`
14			`* and i the time period over which the differential is taken. So d/dt_{-i} is`
15			`* the differential over the i-th last period.`
16			`*`
17			`* The decaying history gives smooth transitions. The time differential carries`
18			`* the notion of speed.`
19			`*`
20			`* The denominator is 2^(1+i) because we want the series to be normalised, ie.`
21			`*`
22			`* \Sum_{i=0} 1/2^(1+i) = 1`
23			`*`
24			`* Further more, if we measure time (t) in the same events as x; so that:`
25			`*`
26			`* t = \Sum_{j} x_{j}`
27			`*`
28			`* we get that:`
29			`*`
30			`* \Sum_{j} p_{j} = 1`
31			`*`
32			`* Writing this in an iterative fashion we get (dropping the 'd's):`
33			`*`
34			`* if (++x_{j}, ++t > period)`
35			`* t /= 2;`
36			`* for_each (j)`
37			`* x_{j} /= 2;`
38			`*`
39			`* so that:`
40			`*`
41			`* p_{j} = x_{j} / t;`
42			`*`
43			`* We optimize away the '/= 2' for the global time delta by noting that:`
44			`*`
45			`* if (++t > period) t /= 2:`
46			`*`
47			`* Can be approximated by:`
48			`*`
49			`* period/2 + (++t % period/2)`
50			`*`
51			`* [ Furthermore, when we choose period to be 2^n it can be written in terms of`
52			`* binary operations and wraparound artefacts disappear. ]`
53			`*`
54			`* Also note that this yields a natural counter of the elapsed periods:`
55			`*`
56			`* c = t / (period/2)`
57			`*`
58			`* [ Its monotonic increasing property can be applied to mitigate the wrap-`
59			`* around issue. ]`
60			`*`
61			`* This allows us to do away with the loop over all prop_locals on each period`
62			`* expiration. By remembering the period count under which it was last accessed`
63			`* as c_{j}, we can obtain the number of 'missed' cycles from:`
64			`*`
65			`* c - c_{j}`
66			`*`
67			`* We can then lazily catch up to the global period count every time we are`
68			`* going to use x_{j}, by doing:`
69			`*`
70			`* x_{j} /= 2^(c - c_{j}), c_{j} = c`
71			`*/`
72
73			`#include <linux/proportions.h>`
74			`#include <linux/rcupdate.h>`
75
76			`/*`
77			`* Limit the time part in order to ensure there are some bits left for the`
78			`* cycle counter.`
79			`*/`
80			`#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)`
81
82			`int prop_descriptor_init(struct prop_descriptor *pd, int shift)`
83			`{`
84			`int err;`
85
86			`if (shift > PROP_MAX_SHIFT)`
87			`shift = PROP_MAX_SHIFT;`
88
89			`pd->index = 0;`
90			`pd->pg[0].shift = shift;`
91			`mutex_init(&pd->mutex);`
92			`err = percpu_counter_init_irq(&pd->pg[0].events, 0);`
93			`if (err)`
94			`goto out;`
95
96			`err = percpu_counter_init_irq(&pd->pg[1].events, 0);`
97			`if (err)`
98			`percpu_counter_destroy(&pd->pg[0].events);`
99
100			`out:`
101			`return err;`
102			`}`
103
104			`/*`
105			`* We have two copies, and flip between them to make it seem like an atomic`
106			`* update. The update is not really atomic wrt the events counter, but`
107			`* it is internally consistent with the bit layout depending on shift.`
108			`*`
109			`* We copy the events count, move the bits around and flip the index.`
110			`*/`
111			`void prop_change_shift(struct prop_descriptor *pd, int shift)`
112			`{`
113			`int index;`
114			`int offset;`
115			`u64 events;`
116			`unsigned long flags;`
117
118			`if (shift > PROP_MAX_SHIFT)`
119			`shift = PROP_MAX_SHIFT;`
120
121			`mutex_lock(&pd->mutex);`
122
123			`index = pd->index ^ 1;`
124			`offset = pd->pg[pd->index].shift - shift;`
125			`if (!offset)`
126			`goto out;`
127
128			`pd->pg[index].shift = shift;`
129
130			`local_irq_save(flags);`
131			`events = percpu_counter_sum(&pd->pg[pd->index].events);`
132			`if (offset < 0)`
133			`events <<= -offset;`
134			`else`
135			`events >>= offset;`
136			`percpu_counter_set(&pd->pg[index].events, events);`
137
138			`/*`
139			`* ensure the new pg is fully written before the switch`
140			`*/`
141			`smp_wmb();`
142			`pd->index = index;`
143			`local_irq_restore(flags);`
144
145			`synchronize_rcu();`
146
147			`out:`
148			`mutex_unlock(&pd->mutex);`
149			`}`
150
151			`/*`
152			`* wrap the access to the data in an rcu_read_lock() section;`
153			`* this is used to track the active references.`
154			`*/`
155			`static struct prop_global prop_get_global(struct prop_descriptor pd)`
156			`{`
157			`int index;`
158
159			`rcu_read_lock();`
160			`index = pd->index;`
161			`/*`
162			`* match the wmb from vcd_flip()`
163			`*/`
164			`smp_rmb();`
165			`return &pd->pg[index];`
166			`}`
167
168			`static void prop_put_global(struct prop_descriptor pd, struct prop_global pg)`
169			`{`
170			`rcu_read_unlock();`
171			`}`
172
173			`static void`
174			`prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)`
175			`{`
176			`int offset = *pl_shift - new_shift;`
177
178			`if (!offset)`
179			`return;`
180
181			`if (offset < 0)`
182			`*pl_period <<= -offset;`
183			`else`
184			`*pl_period >>= offset;`
185
186			`*pl_shift = new_shift;`
187			`}`
188
189			`/*`
190			`* PERCPU`
191			`*/`
192
193			`#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))`
194
195			`int prop_local_init_percpu(struct prop_local_percpu *pl)`
196			`{`
197			`spin_lock_init(&pl->lock);`
198			`pl->shift = 0;`
199			`pl->period = 0;`
200			`return percpu_counter_init_irq(&pl->events, 0);`
201			`}`
202
203			`void prop_local_destroy_percpu(struct prop_local_percpu *pl)`
204			`{`
205			`percpu_counter_destroy(&pl->events);`
206			`}`
207
208			`/*`
209			`* Catch up with missed period expirations.`
210			`*`
211			`* until (c_{j} == c)`
212			`* x_{j} -= x_{j}/2;`
213			`* c_{j}++;`
214			`*/`
215			`static`
216			`void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)`
217			`{`
218			`unsigned long period = 1UL << (pg->shift - 1);`
219			`unsigned long period_mask = ~(period - 1);`
220			`unsigned long global_period;`
221			`unsigned long flags;`
222
223			`global_period = percpu_counter_read(&pg->events);`
224			`global_period &= period_mask;`
225
226			`/*`
227			`* Fast path - check if the local and global period count still match`
228			`* outside of the lock.`
229			`*/`
230			`if (pl->period == global_period)`
231			`return;`
232
233			`spin_lock_irqsave(&pl->lock, flags);`
234			`prop_adjust_shift(&pl->shift, &pl->period, pg->shift);`
235
236			`/*`
237			`* For each missed period, we half the local counter.`
238			`* basically:`
239			`* pl->events >> (global_period - pl->period);`
240			`*/`
241			`period = (global_period - pl->period) >> (pg->shift - 1);`
242			`if (period < BITS_PER_LONG) {`
243			`s64 val = percpu_counter_read(&pl->events);`
244
245			`if (val < (nr_cpu_ids * PROP_BATCH))`
246			`val = percpu_counter_sum(&pl->events);`
247
248			`__percpu_counter_add(&pl->events, -val + (val >> period),`
249			`PROP_BATCH);`
250			`} else`
251			`percpu_counter_set(&pl->events, 0);`
252
253			`pl->period = global_period;`
254			`spin_unlock_irqrestore(&pl->lock, flags);`
255			`}`
256
257			`/*`
258			`* ++x_{j}, ++t`
259			`*/`
260			`void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)`
261			`{`
262			`struct prop_global *pg = prop_get_global(pd);`
263
264			`prop_norm_percpu(pg, pl);`
265			`__percpu_counter_add(&pl->events, 1, PROP_BATCH);`
266			`percpu_counter_add(&pg->events, 1);`
267			`prop_put_global(pd, pg);`
268			`}`
269
270			`/*`
271			`* Obtain a fraction of this proportion`
272			`*`
273			`* p_{j} = x_{j} / (period/2 + t % period/2)`
274			`*/`
275			`void prop_fraction_percpu(struct prop_descriptor *pd,`
276			`struct prop_local_percpu *pl,`
277			`long numerator, long denominator)`
278			`{`
279			`struct prop_global *pg = prop_get_global(pd);`
280			`unsigned long period_2 = 1UL << (pg->shift - 1);`
281			`unsigned long counter_mask = period_2 - 1;`
282			`unsigned long global_count;`
283
284			`prop_norm_percpu(pg, pl);`
285			`*numerator = percpu_counter_read_positive(&pl->events);`
286
287			`global_count = percpu_counter_read(&pg->events);`
288			`*denominator = period_2 + (global_count & counter_mask);`
289
290			`prop_put_global(pd, pg);`
291			`}`
292
293			`/*`
294			`* SINGLE`
295			`*/`
296
297			`int prop_local_init_single(struct prop_local_single *pl)`
298			`{`
299			`spin_lock_init(&pl->lock);`
300			`pl->shift = 0;`
301			`pl->period = 0;`
302			`pl->events = 0;`
303			`return 0;`
304			`}`
305
306			`void prop_local_destroy_single(struct prop_local_single *pl)`
307			`{`
308			`}`
309
310			`/*`
311			`* Catch up with missed period expirations.`
312			`*/`
313			`static`
314			`void prop_norm_single(struct prop_global pg, struct prop_local_single pl)`
315			`{`
316			`unsigned long period = 1UL << (pg->shift - 1);`
317			`unsigned long period_mask = ~(period - 1);`
318			`unsigned long global_period;`
319			`unsigned long flags;`
320
321			`global_period = percpu_counter_read(&pg->events);`
322			`global_period &= period_mask;`
323
324			`/*`
325			`* Fast path - check if the local and global period count still match`
326			`* outside of the lock.`
327			`*/`
328			`if (pl->period == global_period)`
329			`return;`
330
331			`spin_lock_irqsave(&pl->lock, flags);`
332			`prop_adjust_shift(&pl->shift, &pl->period, pg->shift);`
333			`/*`
334			`* For each missed period, we half the local counter.`
335			`*/`
336			`period = (global_period - pl->period) >> (pg->shift - 1);`
337			`if (likely(period < BITS_PER_LONG))`
338			`pl->events >>= period;`
339			`else`
340			`pl->events = 0;`
341			`pl->period = global_period;`
342			`spin_unlock_irqrestore(&pl->lock, flags);`
343			`}`
344
345			`/*`
346			`* ++x_{j}, ++t`
347			`*/`
348			`void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)`
349			`{`
350			`struct prop_global *pg = prop_get_global(pd);`
351
352			`prop_norm_single(pg, pl);`
353			`pl->events++;`
354			`percpu_counter_add(&pg->events, 1);`
355			`prop_put_global(pd, pg);`
356			`}`
357
358			`/*`
359			`* Obtain a fraction of this proportion`
360			`*`
361			`* p_{j} = x_{j} / (period/2 + t % period/2)`
362			`*/`
363			`void prop_fraction_single(struct prop_descriptor *pd,`
364			`struct prop_local_single *pl,`
365			`long numerator, long denominator)`
366			`{`
367			`struct prop_global *pg = prop_get_global(pd);`
368			`unsigned long period_2 = 1UL << (pg->shift - 1);`
369			`unsigned long counter_mask = period_2 - 1;`
370			`unsigned long global_count;`
371
372			`prop_norm_single(pg, pl);`
373			`*numerator = pl->events;`
374
375			`global_count = percpu_counter_read(&pg->events);`
376			`*denominator = period_2 + (global_count & counter_mask);`
377
378			`prop_put_global(pd, pg);`
379			`}`