OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [gnu/] [gcj/] [convert/] [Input_UTF8.java] - Blame information for rev 801

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 756 jeremybenn
/* Copyright (C) 1999, 2000  Free Software Foundation
2
 
3
   This file is part of libgcj.
4
 
5
This software is copyrighted work licensed under the terms of the
6
Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
7
details.  */
8
 
9
package gnu.gcj.convert;
10
 
11
/**
12
 * Convert UTF8 to Unicode.
13
 * @author Per Bothner <bothner@cygnus.com>
14
 * @date March 1999.
15
 */
16
 
17
public class Input_UTF8 extends BytesToUnicode
18
{
19
  public String getName() { return "UTF8"; }
20
 
21
  int partial = 0;
22
  int partial_bytes_expected = 0;
23
  //int suggogate_second = -1;
24
 
25
  public int read (char[] outbuffer, int outpos, int count)
26
  {
27
    int origpos = outpos;
28
    for (;;)
29
      {
30
        if (outpos - origpos >= count)
31
          break;
32
        if (inpos >= inlength)
33
          break;
34
        int b = inbuffer[inpos++];
35
        if (b >= 0)
36
          outbuffer[outpos++] = (char) b;
37
        else
38
          {
39
            if ((b & 0xC0) == 0x80) // Continuation byte
40
              {
41
                partial = (partial << 6) | (b & 0x3F);
42
                --partial_bytes_expected;
43
                if (partial_bytes_expected == 1)
44
                  {
45
                    if (partial > (0xFFFF>>6))
46
                      {
47
                        // The next continuation byte will cause the result
48
                        // to exceed 0xFFFF, so we must use a surrogate pair.
49
                        // The "Unicode scalar value" (see D28 in section 3.7
50
                        // of the Unicode Standard 2.0) is defined as:
51
                        // value == (hi-0xD800)*0x400+(lo-0xDC00)+0x10000,
52
                        // where (hi, lo) is the Unicode surrogate pair.
53
                        // After reading the first three bytes, we have:
54
                        // partial == (value >> 6).
55
                        // Substituting and simplifying, we get:
56
                        // partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400.
57
                        // The definition lo>=0xDC00 && lo<=0xDFFF implies
58
                        // that (lo-0xDC00)>>6 is in the range 0..15.
59
                        // Hence we can solve for `hi' and we can emit
60
                        // the high-surrogate without waiting for the
61
                        // final byte:
62
                        outbuffer[outpos++]
63
                          = (char) (0xD800 + ((partial - 0x400) >> 4));
64
 
65
                        // Now we want to set it up so that when we read
66
                        // the final byte on the next iteration, we will
67
                        // get the low-surrogate without special handling.
68
                        // I.e. we want:
69
                        // lo == (next_partial << 6) | (next & 0x3F)
70
                        // where next is the next input byte and next_partial
71
                        // is the value of partial at the end of this
72
                        // iteration.  This implies:  next_partial == lo >> 6.
73
                        // We can simplify the previous:
74
                        // partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400,
75
                        // to: partial == (hi-0xD800)*0x10+(lo>>6)+0x90.
76
                        // Inserting the values of hi and next_partial,
77
                        // and simplifying, we get:  partial ==
78
                        // ( (partial-0x400)&~0xF) + next_partial + 0x90.
79
                        // Solving for next_partial, we get:
80
                        // next_partial = partial+0x400-0x90-(partial&~0xF):
81
                        // or: next_partial = (partial&0xF) + 0x370.  Hence:
82
                        partial = (partial & 0xF) + 0x370;
83
                      }
84
                  }
85
                else if (partial_bytes_expected == 0)
86
                  {
87
                    outbuffer[outpos++] = (char) partial;
88
                    partial = 0;
89
                    partial_bytes_expected = 0;
90
                  }
91
              }
92
            else // prefix byte
93
              {
94
                if ((b & 0xE0) == 0xC0)
95
                  {
96
                    partial = b & 0x1F;
97
                    partial_bytes_expected = 1;
98
                  }
99
                else if ((b & 0xF0) == 0xE0)
100
                  {
101
                    partial = b & 0xF;
102
                    partial_bytes_expected = 2;
103
                  }
104
                else
105
                  {
106
                    partial = b & 7;
107
                    partial_bytes_expected = 3;
108
                  }
109
              }
110
          }
111
      }
112
    return outpos - origpos;
113
  }
114
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.