OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [gnu/] [gcj/] [convert/] [Output_UTF8.java] - Blame information for rev 776

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 756 jeremybenn
/* Copyright (C) 1999, 2000, 2003, 2006  Free Software Foundation
2
 
3
   This file is part of libgcj.
4
 
5
This software is copyrighted work licensed under the terms of the
6
Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
7
details.  */
8
 
9
package gnu.gcj.convert;
10
 
11
/**
12
 * Convert Unicode to UTF8.
13
 * @author Per Bothner <bothner@cygnus.com>
14
 * @date Match 1999.
15
 */
16
 
17
public class Output_UTF8 extends UnicodeToBytes
18
{
19
  public String getName() { return "UTF8"; }
20
 
21
  /** True if a surrogate pair should be emitted as a single UTF8 sequence.
22
   * Otherwise, a surrogate pair is treated as two separate characters.
23
   * Also, '\0' is emitted as {0} if true, and as {0xC0,0x80} if false. */
24
  public boolean standardUTF8 = true;
25
 
26
  // Saves the previous char if it was a high-surrogate.
27
  char hi_part;
28
  // Value of incomplete character.
29
  int value;
30
  // Number of continuation bytes still to emit.
31
  int bytes_todo;
32
 
33
  public int write (char[] inbuffer, int inpos, int inlength)
34
  {
35
    int start_pos = inpos;
36
    int avail = buf.length - count;
37
    for (;;)
38
      {
39
        if (avail == 0 || (inlength == 0 && bytes_todo == 0 && hi_part == 0))
40
          break;
41
        // The algorithm is made more complicated because we want to write
42
        // at least one byte in the output buffer, if there is room for
43
        // that byte, and at least one input character is available.
44
        // This makes the code more robust, since client code will
45
        // always "make progress", even in the complicated cases,
46
        // where the output buffer only has room for only *part* of a
47
        // multi-byte sequence, or the input char buffer only has half
48
        // of a surrogate pair (when standardUTF8 is set), or both.
49
 
50
        // Handle continuation characters we did not have room for before.
51
        if (bytes_todo > 0)
52
          {
53
            do
54
              {
55
                bytes_todo--;
56
                buf[count++] = (byte)
57
                  (((value >> (bytes_todo * 6)) & 0x3F) | 0x80);
58
                avail--;
59
              }
60
            while (bytes_todo > 0 && avail > 0);
61
            continue;
62
          }
63
 
64
        // Handle a high surrogate at the end of the input stream.
65
        if (inlength == 0 && hi_part != 0)
66
          {
67
            buf[count++] = (byte) (0xE0 | (hi_part >> 12));
68
            value = hi_part;
69
            hi_part = 0;
70
            avail--;
71
            bytes_todo = 2;
72
            continue;
73
          }
74
 
75
        char ch = inbuffer[inpos++];
76
        inlength--;
77
 
78
        if (hi_part != 0 && (ch <= 0xDBFF || ch > 0xDFFF))
79
          {
80
            // If the previous character was a high surrogate, and we
81
            // don't now have a low surrogate, we print the high
82
            // surrogate as an isolated character.
83
            --inpos;
84
            ++inlength;
85
            buf[count++] = (byte) (0xE0 | (hi_part >> 12));
86
            value = hi_part;
87
            hi_part = 0;
88
            avail--;
89
            bytes_todo = 2;
90
          }
91
        else if (hi_part == 0 && ch >= 0xDC00 && ch <= 0xDFFF)
92
          {
93
            // If this character is a low surrogate and we didn't
94
            // previously see a high surrogate, we do the same thing
95
            // as above.
96
            buf[count++] = (byte) (0xE0 | (ch >> 12));
97
            value = ch;
98
            avail--;
99
            bytes_todo = 2;
100
          }
101
        else if (ch < 128 && (ch != 0 || standardUTF8))
102
          {
103
            avail--;
104
            buf[count++] = (byte) ch;
105
          }
106
        else if (ch <= 0x07FF)
107
          {
108
            buf[count++] = (byte) (0xC0 | (ch >> 6));
109
            avail--;
110
            value = ch;
111
            bytes_todo = 1;
112
          }
113
        else if (ch >= 0xD800 && ch <= 0xDFFF && standardUTF8)
114
          {
115
            if (ch <= 0xDBFF)  // High surrogates
116
              {
117
                // Just save the high surrogate until the next
118
                // character comes along.
119
                hi_part = ch;
120
              }
121
            else // Low surrogates
122
              {
123
                value = (hi_part - 0xD800) * 0x400 + (ch - 0xDC00) + 0x10000;
124
                buf[count++] = (byte) (0xF0 | (value >> 18));
125
                avail--;
126
                bytes_todo = 3;
127
                hi_part = 0;
128
              }
129
          }
130
        else
131
          {
132
            buf[count++] = (byte) (0xE0 | (ch >> 12));
133
            value = ch;
134
            avail--;
135
            bytes_todo = 2;
136
          }
137
      }
138
    return inpos - start_pos;
139
  }
140
 
141
  public boolean havePendingBytes()
142
  {
143
    return bytes_todo > 0 || hi_part != 0;
144
  }
145
 
146
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.