OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [java/] [text/] [Bidi.java] - Blame information for rev 771

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 771 jeremybenn
/* Bidi.java -- Bidirectional Algorithm implementation
2
   Copyright (C) 2005, 2006  Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.text;
40
 
41
import java.awt.font.NumericShaper;
42
import java.awt.font.TextAttribute;
43
import java.util.ArrayList;
44
 
45
 
46
/**
47
 * Bidirectional Algorithm implementation.
48
 *
49
 * The full algorithm is
50
 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
51
 * Annex #9: The Bidirectional Algorithm</a>.
52
 *
53
 * @since 1.4
54
 */
55
public final class Bidi
56
{
57
  /**
58
   * This indicates that a strongly directional character in the text should
59
   * set the initial direction, but if no such character is found, then the
60
   * initial direction will be left-to-right.
61
   */
62
  public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
63
 
64
  /**
65
   * This indicates that a strongly directional character in the text should
66
   * set the initial direction, but if no such character is found, then the
67
   * initial direction will be right-to-left.
68
   */
69
  public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
70
 
71
  /**
72
   * This indicates that the initial direction should be left-to-right.
73
   */
74
  public static final int DIRECTION_LEFT_TO_RIGHT = 0;
75
 
76
  /**
77
   * This indicates that the initial direction should be right-to-left.
78
   */
79
  public static final int DIRECTION_RIGHT_TO_LEFT = 1;
80
 
81
  // Flags used when computing the result.
82
  private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
83
  private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
84
 
85
  // The text we are examining, and the starting offset.
86
  // If we had a better way to handle createLineBidi, we wouldn't
87
  // need this at all -- which for the String case would be an
88
  // efficiency win.
89
  private char[] text;
90
  private int textOffset;
91
  // The embeddings corresponding to the text, and the starting offset.
92
  private byte[] embeddings;
93
  private int embeddingOffset;
94
  // The length of the text (and embeddings) to use.
95
  private int length;
96
  // The flags.
97
  private int flags;
98
 
99
  // All instance fields following this point are initialized
100
  // during analysis.  Fields before this must be set by the constructor.
101
 
102
  // The initial embedding level.
103
  private int baseEmbedding;
104
  // The type of each character in the text.
105
  private byte[] types;
106
  // The levels we compute.
107
  private byte[] levels;
108
 
109
  // A list of indices where a formatting code was found.  These
110
  // are indicies into the original text -- not into the text after
111
  // the codes have been removed.
112
  private ArrayList formatterIndices;
113
 
114
  // Indices of the starts of runs in the text.
115
  private int[] runs;
116
 
117
  // A convenience field where we keep track of what kinds of runs
118
  // we've seen.
119
  private int resultFlags;
120
 
121
  /**
122
   * Create a new Bidi object given an attributed character iterator.
123
   * This constructor will examine various attributes of the text:
124
   * <ul>
125
   * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
126
   * paragraph's base embedding level.  This constructor will recognize
127
   * either {@link TextAttribute#RUN_DIRECTION_LTR} or
128
   * {@link TextAttribute#RUN_DIRECTION_RTL}.  If neither is given,
129
   * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
130
   * </li>
131
   *
132
   * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
133
   * shaping will be done before the Bidi algorithm is run.
134
   * </li>
135
   *
136
   * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
137
   * character, then the value of this attribute will be used as an
138
   * embedding level override.
139
   * </li>
140
   * </ul>
141
   * @param iter the attributed character iterator to use
142
   */
143
  public Bidi(AttributedCharacterIterator iter)
144
  {
145
    // If set, this attribute should be set on all characters.
146
    // We don't check this (should we?) but we do assume that we
147
    // can simply examine the first character.
148
    Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
149
    if (val == TextAttribute.RUN_DIRECTION_LTR)
150
      this.flags = DIRECTION_LEFT_TO_RIGHT;
151
    else if (val == TextAttribute.RUN_DIRECTION_RTL)
152
      this.flags = DIRECTION_RIGHT_TO_LEFT;
153
    else
154
      this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
155
 
156
    // Likewise this attribute should be specified on the whole text.
157
    // We read it here and then, if it is set, we apply the numeric shaper
158
    // to the text before processing it.
159
    NumericShaper shaper = null;
160
    val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
161
    if (val instanceof NumericShaper)
162
      shaper = (NumericShaper) val;
163
 
164
    char[] text = new char[iter.getEndIndex() - iter.getBeginIndex()];
165
    this.embeddings = new byte[this.text.length];
166
    this.embeddingOffset = 0;
167
    this.length = text.length;
168
    for (int i = 0; i < this.text.length; ++i)
169
      {
170
        this.text[i] = iter.current();
171
 
172
        val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
173
        if (val instanceof Integer)
174
          {
175
            int ival = ((Integer) val).intValue();
176
            byte bval;
177
            if (ival < -62 || ival > 62)
178
              bval = 0;
179
            else
180
              bval = (byte) ival;
181
            this.embeddings[i] = bval;
182
          }
183
      }
184
 
185
    // Invoke the numeric shaper, if specified.
186
    if (shaper != null)
187
      shaper.shape(this.text, 0, this.length);
188
 
189
    runBidi();
190
  }
191
 
192
  /**
193
   * Create a new Bidi object with the indicated text and, possibly, explicit
194
   * embedding settings.
195
   *
196
   * If the embeddings array is null, it is ignored.  Otherwise it is taken to
197
   * be explicit embedding settings corresponding to the text.  Positive values
198
   * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
199
   * embedding overrides.  (FIXME: not at all clear what this really means.)
200
   *
201
   * @param text the text to use
202
   * @param offset the offset of the first character of the text
203
   * @param embeddings the explicit embeddings, or null if there are none
204
   * @param embedOffset the offset of the first embedding value to use
205
   * @param length the length of both the text and the embeddings
206
   * @param flags a flag indicating the base embedding direction
207
   */
208
  public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
209
              int length, int flags)
210
  {
211
    if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
212
        && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
213
        && flags != DIRECTION_LEFT_TO_RIGHT
214
        && flags != DIRECTION_RIGHT_TO_LEFT)
215
      throw new IllegalArgumentException("unrecognized 'flags' argument: "
216
                                         + flags);
217
    this.text = text;
218
    this.textOffset = offset;
219
    this.embeddings = embeddings;
220
    this.embeddingOffset = embedOffset;
221
    this.length = length;
222
    this.flags = flags;
223
 
224
    runBidi();
225
  }
226
 
227
  /**
228
   * Create a new Bidi object using the contents of the given String
229
   * as the text.
230
   * @param text the text to use
231
   * @param flags a flag indicating the base embedding direction
232
   */
233
  public Bidi(String text, int flags)
234
  {
235
    if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
236
        && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
237
        && flags != DIRECTION_LEFT_TO_RIGHT
238
        && flags != DIRECTION_RIGHT_TO_LEFT)
239
      throw new IllegalArgumentException("unrecognized 'flags' argument: "
240
                                         + flags);
241
 
242
    // This is inefficient, but it isn't clear whether it matters.
243
    // If it does we can change our implementation a bit to allow either
244
    // a String or a char[].
245
    this.text = text.toCharArray();
246
    this.textOffset = 0;
247
    this.embeddings = null;
248
    this.embeddingOffset = 0;
249
    this.length = text.length();
250
    this.flags = flags;
251
 
252
    runBidi();
253
  }
254
 
255
  /**
256
   * Implementation function which computes the initial type of
257
   * each character in the input.
258
   */
259
  private void computeTypes()
260
  {
261
    types = new byte[length];
262
    for (int i = 0; i < length; ++i)
263
      types[i] = Character.getDirectionality(text[textOffset + i]);
264
  }
265
 
266
  /**
267
   * An internal function which implements rules P2 and P3.
268
   * This computes the base embedding level.
269
   * @return the paragraph's base embedding level
270
   */
271
  private int computeParagraphEmbeddingLevel()
272
  {
273
    // First check to see if the user supplied a directionality override.
274
    if (flags == DIRECTION_LEFT_TO_RIGHT
275
        || flags == DIRECTION_RIGHT_TO_LEFT)
276
      return flags;
277
 
278
    // This implements rules P2 and P3.
279
    // (Note that we don't need P1, as the user supplies
280
    // a paragraph.)
281
    for (int i = 0; i < length; ++i)
282
      {
283
        int dir = types[i];
284
        if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
285
          return DIRECTION_LEFT_TO_RIGHT;
286
        if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
287
            || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
288
          return DIRECTION_RIGHT_TO_LEFT;
289
      }
290
    return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
291
            ? DIRECTION_LEFT_TO_RIGHT
292
            : DIRECTION_RIGHT_TO_LEFT);
293
  }
294
 
295
  /**
296
   * An internal function which implements rules X1 through X9.
297
   * This computes the initial levels for the text, handling
298
   * explicit overrides and embeddings.
299
   */
300
  private void computeExplicitLevels()
301
  {
302
    levels = new byte[length];
303
    byte currentEmbedding = (byte) baseEmbedding;
304
    // The directional override is a Character directionality
305
    // constant.  -1 means there is no override.
306
    byte directionalOverride = -1;
307
    // The stack of pushed embeddings, and the stack pointer.
308
    // Note that because the direction is inherent in the depth,
309
    // and because we have a bit left over in a byte, we can encode
310
    // the override, if any, directly in this value on the stack.
311
    final int MAX_DEPTH = 62;
312
    byte[] embeddingStack = new byte[MAX_DEPTH];
313
    int sp = 0;
314
 
315
    for (int i = 0; i < length; ++i)
316
      {
317
        // If we see an explicit embedding, we use that, even if
318
        // the current character is itself a directional override.
319
        if (embeddings != null && embeddings[embeddingOffset + i] != 0)
320
          {
321
            // It isn't at all clear what we're supposed to do here.
322
            // What does a negative value really mean?
323
            // Should we push on the embedding stack here?
324
            currentEmbedding = embeddings[embeddingOffset + i];
325
            if (currentEmbedding < 0)
326
              {
327
                currentEmbedding = (byte) -currentEmbedding;
328
                directionalOverride
329
                  = (((currentEmbedding % 2) == 0)
330
                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
331
                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
332
              }
333
            else
334
              directionalOverride = -1;
335
            continue;
336
          }
337
        // No explicit embedding.
338
        boolean isLtoR = false;
339
        boolean isSpecial = true;
340
        switch (types[i])
341
          {
342
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
343
            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
344
              isLtoR = true;
345
              // Fall through.
346
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
347
            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
348
              {
349
                byte newEmbedding;
350
                if (isLtoR)
351
                  {
352
                    // Least greater even.
353
                    newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
354
                  }
355
                else
356
                  {
357
                    // Least greater odd.
358
                    newEmbedding = (byte) ((currentEmbedding + 1) | 1);
359
                  }
360
                // FIXME: we don't properly handle invalid pushes.
361
                if (newEmbedding < MAX_DEPTH)
362
                  {
363
                    // The new level is valid.  Push the old value.
364
                    // See above for a comment on the encoding here.
365
                    if (directionalOverride != -1)
366
                      currentEmbedding |= Byte.MIN_VALUE;
367
                    embeddingStack[sp++] = currentEmbedding;
368
                    currentEmbedding = newEmbedding;
369
                    if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
370
                      directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
371
                    else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
372
                      directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
373
                    else
374
                      directionalOverride = -1;
375
                  }
376
                }
377
              break;
378
            case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
379
              {
380
                // FIXME: we don't properly handle a pop with a corresponding
381
                // invalid push.
382
                if (sp == 0)
383
                  {
384
                    // We saw a pop without a push.  Just ignore it.
385
                    break;
386
                  }
387
                byte newEmbedding = embeddingStack[--sp];
388
                currentEmbedding = (byte) (newEmbedding & 0x7f);
389
                if (newEmbedding < 0)
390
                  directionalOverride
391
                  = (((newEmbedding & 1) == 0)
392
                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
393
                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
394
                else
395
                  directionalOverride = -1;
396
              }
397
              break;
398
            default:
399
              isSpecial = false;
400
              break;
401
          }
402
        levels[i] = currentEmbedding;
403
        if (isSpecial)
404
          {
405
            // Mark this character for removal.
406
            if (formatterIndices == null)
407
              formatterIndices = new ArrayList();
408
            formatterIndices.add(Integer.valueOf(i));
409
          }
410
        else if (directionalOverride != -1)
411
          types[i] = directionalOverride;
412
      }
413
 
414
    // Remove the formatting codes and update both the arrays
415
    // and 'length'.  It would be more efficient not to remove
416
    // these codes, but it is also more complicated.  Also, the
417
    // Unicode algorithm reference does not properly describe
418
    // how this is to be done -- from what I can tell, their suggestions
419
    // in this area will not yield the correct results.
420
    if (formatterIndices == null)
421
      return;
422
    int output = 0, input = 0;
423
    final int size = formatterIndices.size();
424
    for (int i = 0; i <= size; ++i)
425
      {
426
        int nextFmt;
427
        if (i == size)
428
          nextFmt = length;
429
        else
430
          nextFmt = ((Integer) formatterIndices.get(i)).intValue();
431
        // Non-formatter codes are from 'input' to 'nextFmt'.
432
        int len = nextFmt - input;
433
        System.arraycopy(levels, input, levels, output, len);
434
        System.arraycopy(types, input, types, output, len);
435
        output += len;
436
        input = nextFmt + 1;
437
      }
438
    length -= formatterIndices.size();
439
  }
440
 
441
  /**
442
   * An internal function to compute the boundaries of runs
443
   * in the text.  It isn't strictly necessary to do this, but
444
   * it lets us write some following passes in a less complicated
445
   * way.  Also it lets us efficiently implement some of the public
446
   * methods.  A run is simply a sequence of characters at the
447
   * same level.
448
   */
449
  private void computeRuns()
450
  {
451
    int runCount = 0;
452
    int currentEmbedding = baseEmbedding;
453
    for (int i = 0; i < length; ++i)
454
      {
455
        if (levels[i] != currentEmbedding)
456
          {
457
            currentEmbedding = levels[i];
458
            ++runCount;
459
          }
460
      }
461
 
462
    // This may be called multiple times.  If so, and if
463
    // the number of runs has not changed, then don't bother
464
    // allocating a new array.
465
    if (runs == null || runs.length != runCount + 1)
466
      runs = new int[runCount + 1];
467
    int where = 0;
468
    int lastRunStart = 0;
469
    currentEmbedding = baseEmbedding;
470
    for (int i = 0; i < length; ++i)
471
      {
472
        if (levels[i] != currentEmbedding)
473
          {
474
            runs[where++] = lastRunStart;
475
            lastRunStart = i;
476
            currentEmbedding = levels[i];
477
          }
478
      }
479
    runs[where++] = lastRunStart;
480
  }
481
 
482
  /**
483
   * An internal method to resolve weak types.  This implements
484
   * rules W1 through W7.
485
   */
486
  private void resolveWeakTypes()
487
  {
488
    final int runCount = getRunCount();
489
 
490
    int previousLevel = baseEmbedding;
491
    for (int run = 0; run < runCount; ++run)
492
      {
493
        int start = getRunStart(run);
494
        int end = getRunLimit(run);
495
        int level = getRunLevel(run);
496
 
497
        // These are the names used in the Bidi algorithm.
498
        byte sor = (((Math.max(previousLevel, level) % 2) == 0)
499
                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
500
                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
501
        int nextLevel;
502
        if (run == runCount - 1)
503
          nextLevel = baseEmbedding;
504
        else
505
          nextLevel = getRunLevel(run + 1);
506
        byte eor = (((Math.max(level, nextLevel) % 2) == 0)
507
                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
508
                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
509
 
510
        byte prevType = sor;
511
        byte prevStrongType = sor;
512
        for (int i = start; i < end; ++i)
513
          {
514
            final byte nextType = (i == end - 1) ? eor : types[i + 1];
515
 
516
            // Rule W1: change NSM to the prevailing direction.
517
            if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
518
              types[i] = prevType;
519
            else
520
              prevType = types[i];
521
 
522
            // Rule W2: change EN to AN in some cases.
523
            if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
524
              {
525
                if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
526
                  types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
527
              }
528
            else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
529
                     || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
530
                     || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
531
              prevStrongType = types[i];
532
 
533
            // Rule W3: change AL to R.
534
            if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
535
              types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
536
 
537
            // Rule W4: handle separators between two numbers.
538
            if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
539
                && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
540
              {
541
                if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
542
                    || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
543
                  types[i] = nextType;
544
              }
545
            else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
546
                     && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
547
                     && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
548
              types[i] = nextType;
549
 
550
            // Rule W5: change a sequence of european terminators to
551
            // european numbers, if they are adjacent to european numbers.
552
            // We also include BN characters in this.
553
            if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
554
                || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
555
              {
556
                if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
557
                  types[i] = prevType;
558
                else
559
                  {
560
                    // Look ahead to see if there is an EN terminating this
561
                    // sequence of ETs.
562
                    int j = i + 1;
563
                    while (j < end
564
                           && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
565
                               || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
566
                      ++j;
567
                    if (j < end
568
                        && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
569
                      {
570
                        // Change them all to EN now.
571
                        for (int k = i; k < j; ++k)
572
                          types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
573
                      }
574
                  }
575
              }
576
 
577
            // Rule W6: separators and terminators change to ON.
578
            // Again we include BN.
579
            if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
580
                || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
581
                || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
582
                || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
583
              types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
584
 
585
            // Rule W7: change european number types.
586
            if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
587
                && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
588
              types[i] = prevStrongType;
589
          }
590
 
591
        previousLevel = level;
592
      }
593
  }
594
 
595
  /**
596
   * An internal method to resolve neutral types.  This implements
597
   * rules N1 and N2.
598
   */
599
  private void resolveNeutralTypes()
600
  {
601
    // This implements rules N1 and N2.
602
    final int runCount = getRunCount();
603
 
604
    int previousLevel = baseEmbedding;
605
    for (int run = 0; run < runCount; ++run)
606
      {
607
        int start = getRunStart(run);
608
        int end = getRunLimit(run);
609
        int level = getRunLevel(run);
610
 
611
        byte embeddingDirection
612
          = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
613
                                : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
614
        // These are the names used in the Bidi algorithm.
615
        byte sor = (((Math.max(previousLevel, level) % 2) == 0)
616
                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
617
                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
618
        int nextLevel;
619
        if (run == runCount - 1)
620
          nextLevel = baseEmbedding;
621
        else
622
          nextLevel = getRunLevel(run + 1);
623
        byte eor = (((Math.max(level, nextLevel) % 2) == 0)
624
                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
625
                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
626
 
627
        byte prevStrong = sor;
628
        int neutralStart = -1;
629
        for (int i = start; i <= end; ++i)
630
          {
631
            byte newStrong = -1;
632
            byte thisType = i == end ? eor : types[i];
633
            switch (thisType)
634
              {
635
              case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
636
                newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
637
                break;
638
              case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
639
              case Character.DIRECTIONALITY_ARABIC_NUMBER:
640
              case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
641
                newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
642
                break;
643
              case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
644
              case Character.DIRECTIONALITY_OTHER_NEUTRALS:
645
              case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
646
              case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
647
              case Character.DIRECTIONALITY_WHITESPACE:
648
                if (neutralStart == -1)
649
                  neutralStart = i;
650
                break;
651
              }
652
            // If we see a strong character, update all the neutrals.
653
            if (newStrong != -1)
654
              {
655
                if (neutralStart != -1)
656
                  {
657
                    byte override = (prevStrong == newStrong
658
                                     ? prevStrong
659
                                     : embeddingDirection);
660
                    for (int j = neutralStart; j < i; ++j)
661
                      types[j] = override;
662
                  }
663
                prevStrong = newStrong;
664
                neutralStart = -1;
665
              }
666
          }
667
 
668
        previousLevel = level;
669
      }
670
  }
671
 
672
  /**
673
   * An internal method to resolve implicit levels.
674
   * This implements rules I1 and I2.
675
   */
676
  private void resolveImplicitLevels()
677
  {
678
    // This implements rules I1 and I2.
679
    for (int i = 0; i < length; ++i)
680
      {
681
        if ((levels[i] & 1) == 0)
682
          {
683
            if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
684
              ++levels[i];
685
            else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
686
                     || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
687
              levels[i] += 2;
688
          }
689
        else
690
          {
691
            if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
692
                || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
693
                || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
694
              ++levels[i];
695
          }
696
 
697
        // Update the result flags.
698
        resultFlags |= 1 << (levels[i] & 1);
699
      }
700
    // One final update of the result flags, using the base level.
701
    resultFlags |= 1 << baseEmbedding;
702
  }
703
 
704
  /**
705
   * This reinserts the formatting codes that we removed early on.
706
   * Actually it does not insert formatting codes per se, but rather
707
   * simply inserts new levels at the appropriate locations in the
708
   * 'levels' array.
709
   */
710
  private void reinsertFormattingCodes()
711
  {
712
    if (formatterIndices == null)
713
      return;
714
    int input = length;
715
    int output = levels.length;
716
    // Process from the end as we are copying the array over itself here.
717
    for (int index = formatterIndices.size() - 1; index >= 0; --index)
718
      {
719
        int nextFmt = ((Integer) formatterIndices.get(index)).intValue();
720
 
721
        // nextFmt points to a location in the original array.  So,
722
        // nextFmt+1 is the target of our copying.  output is the location
723
        // to which we last copied, thus we can derive the length of the
724
        // copy from it.
725
        int len = output - nextFmt - 1;
726
        output = nextFmt;
727
        input -= len;
728
        // Note that we no longer need 'types' at this point, so we
729
        // only edit 'levels'.
730
        if (nextFmt + 1 < levels.length)
731
          System.arraycopy(levels, input, levels, nextFmt + 1, len);
732
 
733
        // Now set the level at the reinsertion point.
734
        int rightLevel;
735
        if (output == levels.length - 1)
736
          rightLevel = baseEmbedding;
737
        else
738
          rightLevel = levels[output + 1];
739
        int leftLevel;
740
        if (input == 0)
741
          leftLevel = baseEmbedding;
742
        else
743
          leftLevel = levels[input];
744
        levels[output] = (byte) Math.max(leftLevel, rightLevel);
745
      }
746
    length = levels.length;
747
  }
748
 
749
  /**
750
   * This is the main internal entry point.  After a constructor
751
   * has initialized the appropriate local state, it will call
752
   * this method to do all the work.
753
   */
754
  private void runBidi()
755
  {
756
    computeTypes();
757
    baseEmbedding = computeParagraphEmbeddingLevel();
758
    computeExplicitLevels();
759
    computeRuns();
760
    resolveWeakTypes();
761
    resolveNeutralTypes();
762
    resolveImplicitLevels();
763
    // We're done with the types.  Let the GC clean up.
764
    types = null;
765
    reinsertFormattingCodes();
766
    // After resolving the implicit levels, the number
767
    // of runs may have changed.
768
    computeRuns();
769
  }
770
 
771
  /**
772
   * Return true if the paragraph base embedding is left-to-right,
773
   * false otherwise.
774
   */
775
  public boolean baseIsLeftToRight()
776
  {
777
    return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
778
  }
779
 
780
  /**
781
   * Create a new Bidi object for a single line of text, taken
782
   * from the text used when creating the current Bidi object.
783
   * @param start the index of the first character of the line
784
   * @param end the index of the final character of the line
785
   * @return a new Bidi object for the indicated line of text
786
   */
787
  public Bidi createLineBidi(int start, int end)
788
  {
789
    // This isn't the most efficient implementation possible.
790
    // This probably does not matter, so we choose simplicity instead.
791
    int level = getLevelAt(start);
792
    int flag = (((level % 2) == 0)
793
                ? DIRECTION_LEFT_TO_RIGHT
794
                : DIRECTION_RIGHT_TO_LEFT);
795
    return new Bidi(text, textOffset + start,
796
                    embeddings, embeddingOffset + start,
797
                    end - start, flag);
798
  }
799
 
800
  /**
801
   * Return the base embedding level of the paragraph.
802
   */
803
  public int getBaseLevel()
804
  {
805
    return baseEmbedding;
806
  }
807
 
808
  /**
809
   * Return the length of the paragraph, in characters.
810
   */
811
  public int getLength()
812
  {
813
    return length;
814
  }
815
 
816
  /**
817
   * Return the level at the indicated character.  If the
818
   * supplied index is less than zero or greater than the length
819
   * of the text, then the paragraph's base embedding level will
820
   * be returned.
821
   * @param offset the character to examine
822
   * @return the level of that character
823
   */
824
  public int getLevelAt(int offset)
825
  {
826
    if (offset < 0 || offset >= length)
827
      return getBaseLevel();
828
    return levels[offset];
829
  }
830
 
831
  /**
832
   * Return the number of runs in the result.  A run is
833
   * a sequence of characters at the same embedding level.
834
   */
835
  public int getRunCount()
836
  {
837
    return runs.length;
838
  }
839
 
840
  /**
841
   * Return the level of the indicated run.
842
   * @param which the run to examine
843
   * @return the level of that run
844
   */
845
  public int getRunLevel(int which)
846
  {
847
    return levels[runs[which]];
848
  }
849
 
850
  /**
851
   * Return the index of the character just following the end
852
   * of the indicated run.
853
   * @param which the run to examine
854
   * @return the index of the character after the final character
855
   * of the run
856
   */
857
  public int getRunLimit(int which)
858
  {
859
    if (which == runs.length - 1)
860
      return length;
861
    return runs[which + 1];
862
  }
863
 
864
  /**
865
   * Return the index of the first character in the indicated run.
866
   * @param which the run to examine
867
   * @return the index of the first character of the run
868
   */
869
  public int getRunStart(int which)
870
  {
871
    return runs[which];
872
  }
873
 
874
  /**
875
   * Return true if the text is entirely left-to-right, and the
876
   * base embedding is also left-to-right.
877
   */
878
  public boolean isLeftToRight()
879
  {
880
    return resultFlags == LTOR;
881
  }
882
 
883
  /**
884
   * Return true if the text consists of mixed left-to-right and
885
   * right-to-left runs, or if the text consists of one kind of run
886
   * which differs from the base embedding direction.
887
   */
888
  public boolean isMixed()
889
  {
890
    return resultFlags == (LTOR | RTOL);
891
  }
892
 
893
  /**
894
   * Return true if the text is entirely right-to-left, and the
895
   * base embedding is also right-to-left.
896
   */
897
  public boolean isRightToLeft()
898
  {
899
    return resultFlags == RTOL;
900
  }
901
 
902
  /**
903
   * Return a String describing the internal state of this object.
904
   * This is only useful for debugging.
905
   */
906
  public String toString()
907
  {
908
    return "Bidi Bidi Bidi I like you, Buck!";
909
  }
910
 
911
  /**
912
   * Reorder objects according to the levels passed in.  This implements
913
   * reordering as defined by the Unicode bidirectional layout specification.
914
   * The levels are integers from 0 to 62; even numbers represent left-to-right
915
   * runs, and odd numbers represent right-to-left runs.
916
   *
917
   * @param levels the levels associated with each object
918
   * @param levelOffset the index of the first level to use
919
   * @param objs the objects to reorder according to the levels
920
   * @param objOffset the index of the first object to use
921
   * @param count the number of objects (and levels) to manipulate
922
   */
923
  public static void reorderVisually(byte[] levels, int levelOffset,
924
                                     Object[] objs, int objOffset, int count)
925
  {
926
    // We need a copy of the 'levels' array, as we are going to modify it.
927
    // This is unfortunate but difficult to avoid.
928
    byte[] levelCopy = new byte[count];
929
    // Do this explicitly so we can also find the maximum depth at the
930
    // same time.
931
    int max = 0;
932
    int lowestOdd = 63;
933
    for (int i = 0; i < count; ++i)
934
      {
935
        levelCopy[i] = levels[levelOffset + i];
936
        max = Math.max(levelCopy[i], max);
937
        if (levelCopy[i] % 2 != 0)
938
          lowestOdd = Math.min(lowestOdd, levelCopy[i]);
939
      }
940
 
941
    // Reverse the runs starting with the deepest.
942
    for (int depth = max; depth >= lowestOdd; --depth)
943
      {
944
        int start = 0;
945
        while (start < count)
946
          {
947
            // Find the start of a run >= DEPTH.
948
            while (start < count && levelCopy[start] < depth)
949
              ++start;
950
            if (start == count)
951
              break;
952
            // Find the end of the run.
953
            int end = start + 1;
954
            while (end < count && levelCopy[end] >= depth)
955
              ++end;
956
 
957
            // Reverse this run.
958
            for (int i = 0; i < (end - start) / 2; ++i)
959
              {
960
                byte tmpb = levelCopy[end - i - 1];
961
                levelCopy[end - i - 1] = levelCopy[start + i];
962
                levelCopy[start + i] = tmpb;
963
                Object tmpo = objs[objOffset + end - i - 1];
964
                objs[objOffset + end - i - 1] = objs[objOffset + start + i];
965
                objs[objOffset + start + i] = tmpo;
966
              }
967
 
968
            // Handle the next run.
969
            start = end + 1;
970
          }
971
      }
972
  }
973
 
974
  /**
975
   * Returns false if all characters in the text between start and end
976
   * are all left-to-right text. This implementation is just calls
977
   * <code>Character.getDirectionality(char)</code> on all characters
978
   * and makes sure all characters are either explicitly left-to-right
979
   * or neutral in directionality (character types L, EN, ES, ET, AN,
980
   * CS, S and WS).
981
   */
982
  public static boolean requiresBidi(char[] text, int start, int end)
983
  {
984
    for (int i = start; i < end; i++)
985
      {
986
        byte dir = Character.getDirectionality(text[i]);
987
        if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
988
            && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
989
            && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
990
            && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
991
            && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
992
            && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
993
            && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
994
            && dir != Character.DIRECTIONALITY_WHITESPACE
995
            && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR)
996
          return true;
997
      }
998
 
999
    return false;
1000
  }
1001
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.