1 |
769 |
jeremybenn |
/* CharGlyphMap.java -- Manages the 'cmap' table of TrueType fonts
|
2 |
|
|
Copyright (C) 2006 Free Software Foundation, Inc.
|
3 |
|
|
|
4 |
|
|
This file is part of GNU Classpath.
|
5 |
|
|
|
6 |
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
7 |
|
|
it under the terms of the GNU General Public License as published by
|
8 |
|
|
the Free Software Foundation; either version 2, or (at your option)
|
9 |
|
|
any later version.
|
10 |
|
|
|
11 |
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
12 |
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
General Public License for more details.
|
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU General Public License
|
17 |
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
18 |
|
|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
|
|
02110-1301 USA.
|
20 |
|
|
|
21 |
|
|
Linking this library statically or dynamically with other modules is
|
22 |
|
|
making a combined work based on this library. Thus, the terms and
|
23 |
|
|
conditions of the GNU General Public License cover the whole
|
24 |
|
|
combination.
|
25 |
|
|
|
26 |
|
|
As a special exception, the copyright holders of this library give you
|
27 |
|
|
permission to link this library with independent modules to produce an
|
28 |
|
|
executable, regardless of the license terms of these independent
|
29 |
|
|
modules, and to copy and distribute the resulting executable under
|
30 |
|
|
terms of your choice, provided that you also meet, for each linked
|
31 |
|
|
independent module, the terms and conditions of the license of that
|
32 |
|
|
module. An independent module is a module which is not derived from
|
33 |
|
|
or based on this library. If you modify this library, you may extend
|
34 |
|
|
this exception to your version of the library, but you are not
|
35 |
|
|
obligated to do so. If you do not wish to do so, delete this
|
36 |
|
|
exception statement from your version. */
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
package gnu.java.awt.font.opentype;
|
40 |
|
|
|
41 |
|
|
import java.nio.ByteBuffer;
|
42 |
|
|
import java.nio.CharBuffer;
|
43 |
|
|
import java.nio.ShortBuffer;
|
44 |
|
|
import java.nio.IntBuffer;
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
/**
|
48 |
|
|
* A mapping from Unicode codepoints to glyphs. This mapping
|
49 |
|
|
* does not perform any re-ordering or decomposition, so it
|
50 |
|
|
* is not everything that is needed to support Unicode.
|
51 |
|
|
*
|
52 |
|
|
* <p>This class manages the <code>cmap</code> table of
|
53 |
|
|
* OpenType and TrueType fonts.
|
54 |
|
|
*
|
55 |
|
|
* @see <a href="http://partners.adobe.com/asn/tech/type/opentype/cmap.jsp">
|
56 |
|
|
* the <code>cmap</code> part of Adobe’ OpenType Specification</a>
|
57 |
|
|
*
|
58 |
|
|
* @see <a href="http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html">
|
59 |
|
|
* the <code>cmap</code> section of Apple’s TrueType Reference
|
60 |
|
|
* Manual</a>
|
61 |
|
|
*
|
62 |
|
|
* @author Sascha Brawer (brawer@dandelis.ch)
|
63 |
|
|
*/
|
64 |
|
|
public abstract class CharGlyphMap
|
65 |
|
|
{
|
66 |
|
|
private static final int PLATFORM_UNICODE = 0;
|
67 |
|
|
private static final int PLATFORM_MACINTOSH = 1;
|
68 |
|
|
private static final int PLATFORM_MICROSOFT = 3;
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
/**
|
72 |
|
|
* Determines the glyph index for a given Unicode codepoint. Users
|
73 |
|
|
* should be aware that the character-to-glyph mapping not not
|
74 |
|
|
* everything that is needed for full Unicode support. For example,
|
75 |
|
|
* the <code>cmap</code> table is not able to synthesize accented
|
76 |
|
|
* glyphs from the canonical decomposition sequence, even if the
|
77 |
|
|
* font would contain a glyph for the composed form.
|
78 |
|
|
*
|
79 |
|
|
* @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates
|
80 |
|
|
* (U+D800 to U+DFFF) cannot be passed, they must be mapped to
|
81 |
|
|
* UCS-4 first.
|
82 |
|
|
*
|
83 |
|
|
* @return the glyph index, or 0 if the font does not contain
|
84 |
|
|
* a glyph for this codepoint.
|
85 |
|
|
*/
|
86 |
|
|
public abstract int getGlyph(int ucs4);
|
87 |
|
|
|
88 |
|
|
|
89 |
|
|
/**
|
90 |
|
|
* Reads a CharGlyphMap from an OpenType or TrueType <code>cmap</code>
|
91 |
|
|
* table. The current implementation works as follows:
|
92 |
|
|
*
|
93 |
|
|
* <p><ol><li>If the font has a type 4 cmap for the Unicode platform
|
94 |
|
|
* (encoding 0, 1, 2, 3 or 4), or a type 4 cmap for the Microsoft
|
95 |
|
|
* platform (encodings 1 or 10), that table is used to map Unicode
|
96 |
|
|
* codepoints to glyphs. Most recent fonts, both for Macintosh and
|
97 |
|
|
* Windows, should provide such a table.</li>
|
98 |
|
|
*
|
99 |
|
|
* <li>Otherwise, if the font has any type 0 cmap for the Macintosh
|
100 |
|
|
* platform, a Unicode-to-glyph mapping is synthesized from certain
|
101 |
|
|
* type 0 cmaps. The current implementation collects mappings from
|
102 |
|
|
* Roman, Icelandic, Turkish, Croatian, Romanian, Eastern European,
|
103 |
|
|
* Cyrillic, Greek, Hebrew, Arabic and Farsi cmaps.</li>.</ol>
|
104 |
|
|
*
|
105 |
|
|
* @param buf a buffer whose position is right at the start
|
106 |
|
|
* of the entire <code>cmap</code> table, and whose limit
|
107 |
|
|
* is at its end.
|
108 |
|
|
*
|
109 |
|
|
* @return a concrete subclass of <code>CharGlyphMap</code>
|
110 |
|
|
* that performs the mapping.
|
111 |
|
|
*
|
112 |
|
|
* @see <a href=
|
113 |
|
|
* "http://partners.adobe.com/asn/tech/type/opentype/cmap.jsp"
|
114 |
|
|
* >the <code>cmap</code> part of Adobe’ OpenType Specification</a>
|
115 |
|
|
*
|
116 |
|
|
* @see <a href=
|
117 |
|
|
* "http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html"
|
118 |
|
|
* >the <code>cmap</code> section of Apple’s TrueType Reference
|
119 |
|
|
* Manual</a>
|
120 |
|
|
*/
|
121 |
|
|
public static CharGlyphMap forTable(ByteBuffer buf)
|
122 |
|
|
{
|
123 |
|
|
boolean hasType0 = false;
|
124 |
|
|
int start4 = -1, platform4 = 0, encoding4 = 0;
|
125 |
|
|
int start12 = -1, platform12 = 0, encoding12 = 0;
|
126 |
|
|
int version;
|
127 |
|
|
int numTables;
|
128 |
|
|
int tableStart = buf.position();
|
129 |
|
|
int limit = buf.limit();
|
130 |
|
|
int format, platform, language, encoding, length, offset;
|
131 |
|
|
|
132 |
|
|
version = buf.getChar();
|
133 |
|
|
if (version != 0)
|
134 |
|
|
return null;
|
135 |
|
|
|
136 |
|
|
numTables = buf.getChar();
|
137 |
|
|
for (int i = 0; i < numTables; i++)
|
138 |
|
|
{
|
139 |
|
|
buf.limit(limit).position(tableStart + 4 + i * 8);
|
140 |
|
|
platform = buf.getChar();
|
141 |
|
|
encoding = buf.getChar();
|
142 |
|
|
offset = tableStart + buf.getInt();
|
143 |
|
|
|
144 |
|
|
buf.position(offset);
|
145 |
|
|
format = buf.getChar();
|
146 |
|
|
|
147 |
|
|
switch (format)
|
148 |
|
|
{
|
149 |
|
|
case 0:
|
150 |
|
|
hasType0 = true;
|
151 |
|
|
break;
|
152 |
|
|
|
153 |
|
|
case 4:
|
154 |
|
|
length = buf.getChar();
|
155 |
|
|
language = buf.getChar();
|
156 |
|
|
if ((start4 == -1)
|
157 |
|
|
&& Type4.isSupported(platform, language, encoding))
|
158 |
|
|
{
|
159 |
|
|
start4 = offset;
|
160 |
|
|
platform4 = platform;
|
161 |
|
|
encoding4 = encoding;
|
162 |
|
|
}
|
163 |
|
|
break;
|
164 |
|
|
|
165 |
|
|
case 12:
|
166 |
|
|
if ((start12 == -1) && Type12.isSupported(platform, encoding))
|
167 |
|
|
{
|
168 |
|
|
start12 = offset;
|
169 |
|
|
platform12 = platform;
|
170 |
|
|
encoding12 = encoding;
|
171 |
|
|
}
|
172 |
|
|
break;
|
173 |
|
|
}
|
174 |
|
|
}
|
175 |
|
|
|
176 |
|
|
|
177 |
|
|
if (start12 >= 0)
|
178 |
|
|
{
|
179 |
|
|
try
|
180 |
|
|
{
|
181 |
|
|
buf.limit(limit).position(start12);
|
182 |
|
|
return new Type12(buf, platform12, encoding12);
|
183 |
|
|
}
|
184 |
|
|
catch (Exception ex)
|
185 |
|
|
{
|
186 |
|
|
ex.printStackTrace();
|
187 |
|
|
}
|
188 |
|
|
}
|
189 |
|
|
|
190 |
|
|
if (start4 >= 0)
|
191 |
|
|
{
|
192 |
|
|
try
|
193 |
|
|
{
|
194 |
|
|
buf.limit(limit).position(start4);
|
195 |
|
|
return Type4.readTable(buf, platform4, encoding4);
|
196 |
|
|
}
|
197 |
|
|
catch (Exception ex)
|
198 |
|
|
{
|
199 |
|
|
}
|
200 |
|
|
}
|
201 |
|
|
|
202 |
|
|
if (hasType0)
|
203 |
|
|
{
|
204 |
|
|
try
|
205 |
|
|
{
|
206 |
|
|
buf.limit(limit).position(tableStart);
|
207 |
|
|
return new Type0(buf);
|
208 |
|
|
}
|
209 |
|
|
catch (Exception ex)
|
210 |
|
|
{
|
211 |
|
|
}
|
212 |
|
|
}
|
213 |
|
|
|
214 |
|
|
return new Dummy();
|
215 |
|
|
}
|
216 |
|
|
|
217 |
|
|
|
218 |
|
|
/**
|
219 |
|
|
* A dummy mapping that maps anything to the undefined glyph.
|
220 |
|
|
* Used if no other cmap is understood in a font.
|
221 |
|
|
*
|
222 |
|
|
* @author Sascha Brawer (brawer@dandelis.ch)
|
223 |
|
|
*/
|
224 |
|
|
private static final class Dummy
|
225 |
|
|
extends CharGlyphMap
|
226 |
|
|
{
|
227 |
|
|
public int getGlyph(int ucs4)
|
228 |
|
|
{
|
229 |
|
|
return 0;
|
230 |
|
|
}
|
231 |
|
|
}
|
232 |
|
|
|
233 |
|
|
|
234 |
|
|
/**
|
235 |
|
|
* A mapping from Unicode code points to glyph IDs through CMAP Type
|
236 |
|
|
* 0 tables. These tables have serious limitations: Only the first
|
237 |
|
|
* 256 glyphs can be addressed, and the source of the mapping is not
|
238 |
|
|
* Unicode, but an encoding used on the Macintosh.
|
239 |
|
|
*
|
240 |
|
|
* <p>However, some fonts have only a Type 0 cmap. In this case, we
|
241 |
|
|
* process all the Type 0 tables we understand, and establish
|
242 |
|
|
* a reversed glyph-to-Unicode mapping. When a glyph is requested
|
243 |
|
|
* for a given Unicode character, we perform a linear search on the
|
244 |
|
|
* reversed table to find the glyph which maps to the requested
|
245 |
|
|
* character. While not blazingly fast, this gives a reasonable
|
246 |
|
|
* fallback for old fonts.
|
247 |
|
|
*
|
248 |
|
|
* @author Sascha Brawer (brawer@dandelis.ch)
|
249 |
|
|
*/
|
250 |
|
|
private static final class Type0
|
251 |
|
|
extends CharGlyphMap
|
252 |
|
|
{
|
253 |
|
|
/**
|
254 |
|
|
* An array whose <code>i</code>-th element indicates the
|
255 |
|
|
* Unicode code point of glyph <code>i</code> in the font.
|
256 |
|
|
*/
|
257 |
|
|
private char[] glyphToUCS2 = new char[256];
|
258 |
|
|
|
259 |
|
|
|
260 |
|
|
/**
|
261 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
262 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
263 |
|
|
* MacOS Arabic encoding.
|
264 |
|
|
*
|
265 |
|
|
* @see <a href=
|
266 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ARABIC.TXT"
|
267 |
|
|
* >the Unicode mapping table for the MacOS Arabic encoding</a>
|
268 |
|
|
*/
|
269 |
|
|
private static final String UPPER_ARABIC
|
270 |
|
|
= "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
271 |
|
|
+ "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb"
|
272 |
|
|
+ "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7"
|
273 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a"
|
274 |
|
|
+ "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f"
|
275 |
|
|
+ "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669"
|
276 |
|
|
+ "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623"
|
277 |
|
|
+ "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d"
|
278 |
|
|
+ "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637"
|
279 |
|
|
+ "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641"
|
280 |
|
|
+ "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b"
|
281 |
|
|
+ "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686"
|
282 |
|
|
+ "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2";
|
283 |
|
|
|
284 |
|
|
|
285 |
|
|
/**
|
286 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
287 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
288 |
|
|
* MacOS East European Roman encoding.
|
289 |
|
|
*
|
290 |
|
|
* @see <a href=
|
291 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CENTEURO.TXT"
|
292 |
|
|
* >the Unicode mapping table for the MacOS Central European
|
293 |
|
|
* encoding</a>
|
294 |
|
|
*/
|
295 |
|
|
private static final String UPPER_EAST_EUROPEAN_ROMAN
|
296 |
|
|
= "\u007e\u0000\u00c4\u0100\u0101\u00c9\u0104\u00d6\u00dc\u00e1"
|
297 |
|
|
+ "\u0105\u010c\u00e4\u010d\u0106\u0107\u00e9\u0179\u017a\u010e"
|
298 |
|
|
+ "\u00ed\u010f\u0112\u0113\u0116\u00f3\u0117\u00f4\u00f6\u00f5"
|
299 |
|
|
+ "\u00fa\u011a\u011b\u00fc\u2020\u00b0\u0118\u00a3\u00a7\u2022"
|
300 |
|
|
+ "\u00b6\u00df\u00ae\u00a9\u2122\u0119\u00a8\u2260\u0123\u012e"
|
301 |
|
|
+ "\u012f\u012a\u2264\u2265\u012b\u0136\u2202\u2211\u0142\u013b"
|
302 |
|
|
+ "\u013c\u013d\u013e\u0139\u013a\u0145\u0146\u0143\u00ac\u221a"
|
303 |
|
|
+ "\u0144\u0147\u2206\u00ab\u00bb\u2026\u00a0\u0148\u0150\u00d5"
|
304 |
|
|
+ "\u0151\u014c\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
|
305 |
|
|
+ "\u014d\u0154\u0155\u0158\u2039\u203a\u0159\u0156\u0157\u0160"
|
306 |
|
|
+ "\u201a\u201e\u0161\u015a\u015b\u00c1\u0164\u0165\u00cd\u017d"
|
307 |
|
|
+ "\u017e\u016a\u00d3\u00d4\u016b\u016e\u00da\u016f\u0170\u0171"
|
308 |
|
|
+ "\u0172\u0173\u00dd\u00fd\u0137\u017b\u0141\u017c\u0122\u02c7";
|
309 |
|
|
|
310 |
|
|
|
311 |
|
|
/**
|
312 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
313 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
314 |
|
|
* MacOS Roman encoding for the Croatian language.
|
315 |
|
|
*
|
316 |
|
|
* @see <a href=
|
317 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CROATIAN.TXT"
|
318 |
|
|
* >the Unicode mapping table for the MacOS Croatian encoding</a>
|
319 |
|
|
*/
|
320 |
|
|
private static final String UPPER_CROATIAN
|
321 |
|
|
= "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
322 |
|
|
+ "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
|
323 |
|
|
+ "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
|
324 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
|
325 |
|
|
+ "\u00b6\u00df\u00ae\u0160\u2122\u00b4\u00a8\u2260\u017d\u00d8"
|
326 |
|
|
+ "\u221e\u00b1\u2264\u2265\u2206\u00b5\u2202\u2211\u220f\u0161"
|
327 |
|
|
+ "\u222b\u00aa\u00ba\u03a9\u017e\u00f8\u00bf\u00a1\u00ac\u221a"
|
328 |
|
|
+ "\u0192\u2248\u0106\u00ab\u010c\u2026\u00a0\u00c0\u00c3\u00d5"
|
329 |
|
|
+ "\u0152\u0153\u0110\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
|
330 |
|
|
+ "\uf8ff\u00a9\u2044\u20ac\u2039\u203a\u00c6\u00bb\u2013\u00b7"
|
331 |
|
|
+ "\u201a\u201e\u2030\u00c2\u0107\u00c1\u010d\u00c8\u00cd\u00ce"
|
332 |
|
|
+ "\u00cf\u00cc\u00d3\u00d4\u0111\u00d2\u00da\u00db\u00d9\u0131"
|
333 |
|
|
+ "\u02c6\u02dc\u00af\u03c0\u00cb\u02da\u00b8\u00ca\u00e6\u02c7";
|
334 |
|
|
|
335 |
|
|
|
336 |
|
|
/**
|
337 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
338 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
339 |
|
|
* MacOS Cyrillic encoding.
|
340 |
|
|
*
|
341 |
|
|
* @see <a href=
|
342 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT"
|
343 |
|
|
* >the Unicode mapping table for the MacOS Cyrillic encoding</a>
|
344 |
|
|
*/
|
345 |
|
|
private static final String UPPER_CYRILLIC
|
346 |
|
|
= "\u007e\u0000\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417"
|
347 |
|
|
+ "\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421"
|
348 |
|
|
+ "\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b"
|
349 |
|
|
+ "\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022"
|
350 |
|
|
+ "\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453"
|
351 |
|
|
+ "\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454"
|
352 |
|
|
+ "\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a"
|
353 |
|
|
+ "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c"
|
354 |
|
|
+ "\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e"
|
355 |
|
|
+ "\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431"
|
356 |
|
|
+ "\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b"
|
357 |
|
|
+ "\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445"
|
358 |
|
|
+ "\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac";
|
359 |
|
|
|
360 |
|
|
|
361 |
|
|
/**
|
362 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
363 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
364 |
|
|
* MacOS Arabic encoding with the Farsi language.
|
365 |
|
|
*
|
366 |
|
|
* @see <a href=
|
367 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/FARSI.TXT"
|
368 |
|
|
* >the Unicode mapping table for the MacOS Farsi encoding</a>
|
369 |
|
|
*/
|
370 |
|
|
private static final String UPPER_FARSI
|
371 |
|
|
= "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
372 |
|
|
+ "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb"
|
373 |
|
|
+ "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7"
|
374 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a"
|
375 |
|
|
+ "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f"
|
376 |
|
|
+ "\u06f0\u06f1\u06f2\u06f3\u06f4\u06f5\u06f6\u06f7\u06f8\u06f9"
|
377 |
|
|
+ "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623"
|
378 |
|
|
+ "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d"
|
379 |
|
|
+ "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637"
|
380 |
|
|
+ "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641"
|
381 |
|
|
+ "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b"
|
382 |
|
|
+ "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686"
|
383 |
|
|
+ "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2";
|
384 |
|
|
|
385 |
|
|
|
386 |
|
|
/**
|
387 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
388 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
389 |
|
|
* MacOS Greek encoding.
|
390 |
|
|
*
|
391 |
|
|
* @see <a
|
392 |
|
|
* href="http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GREEK.TXT"
|
393 |
|
|
* >the Unicode mapping table for the MacOS Greek encoding</a>
|
394 |
|
|
*/
|
395 |
|
|
private static final String UPPER_GREEK
|
396 |
|
|
= "\u007e\u0000\u00c4\u00b9\u00b2\u00c9\u00b3\u00d6\u00dc\u0385"
|
397 |
|
|
+ "\u00e0\u00e2\u00e4\u0384\u00a8\u00e7\u00e9\u00e8\u00ea\u00eb"
|
398 |
|
|
+ "\u00a3\u2122\u00ee\u00ef\u2022\u00bd\u2030\u00f4\u00f6\u00a6"
|
399 |
|
|
+ "\u20ac\u00f9\u00fb\u00fc\u2020\u0393\u0394\u0398\u039b\u039e"
|
400 |
|
|
+ "\u03a0\u00df\u00ae\u00a9\u03a3\u03aa\u00a7\u2260\u00b0\u00b7"
|
401 |
|
|
+ "\u0391\u00b1\u2264\u2265\u00a5\u0392\u0395\u0396\u0397\u0399"
|
402 |
|
|
+ "\u039a\u039c\u03a6\u03ab\u03a8\u03a9\u03ac\u039d\u00ac\u039f"
|
403 |
|
|
+ "\u03a1\u2248\u03a4\u00ab\u00bb\u2026\u00a0\u03a5\u03a7\u0386"
|
404 |
|
|
+ "\u0388\u0153\u2013\u2015\u201c\u201d\u2018\u2019\u00f7\u0389"
|
405 |
|
|
+ "\u038a\u038c\u038e\u03ad\u03ae\u03af\u03cc\u038f\u03cd\u03b1"
|
406 |
|
|
+ "\u03b2\u03c8\u03b4\u03b5\u03c6\u03b3\u03b7\u03b9\u03be\u03ba"
|
407 |
|
|
+ "\u03bb\u03bc\u03bd\u03bf\u03c0\u03ce\u03c1\u03c3\u03c4\u03b8"
|
408 |
|
|
+ "\u03c9\u03c2\u03c7\u03c5\u03b6\u03ca\u03cb\u0390\u03b0\u00ad";
|
409 |
|
|
|
410 |
|
|
|
411 |
|
|
/**
|
412 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
413 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
414 |
|
|
* MacOS Hebrew encoding.
|
415 |
|
|
*
|
416 |
|
|
* <p>The codepoint 0x81 (HEBREW LIGATURE YIDDISH YOD YOD PATAH)
|
417 |
|
|
* has no composed Unicode equivalent, but is expressed as the
|
418 |
|
|
* sequence U+05F2 U+05B7 in Unicode. A similar situation exists
|
419 |
|
|
* with the codepoint 0xC0 (HEBREW LIGATURE LAMED HOLAM), which
|
420 |
|
|
* MacOS converts to U+F86A U+05DC U+05B9. To correctly deal
|
421 |
|
|
* with these sequences, we probably should synthesize a ligature
|
422 |
|
|
* table if a Hebrew font only provides a Type 0 CMAP.
|
423 |
|
|
*
|
424 |
|
|
* @see <a href=
|
425 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/HEBREW.TXT"
|
426 |
|
|
* >the Unicode mapping table for the MacOS Hebrew encoding</a>
|
427 |
|
|
*/
|
428 |
|
|
private static final String UPPER_HEBREW
|
429 |
|
|
= "\u007e\u0000\u00c4\u0000\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
430 |
|
|
+ "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
|
431 |
|
|
+ "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
|
432 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u0025"
|
433 |
|
|
+ "\u20aa\u0027\u0029\u0028\u002a\u002b\u002c\u002d\u002e\u002f"
|
434 |
|
|
+ "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039"
|
435 |
|
|
+ "\u003a\u003b\u003c\u003d\u003e\u003f\u0000\u201e\uf89b\uf89c"
|
436 |
|
|
+ "\uf89d\uf89e\u05bc\ufb4b\ufb35\u2026\u00a0\u05b8\u05b7\u05b5"
|
437 |
|
|
+ "\u05b6\u05b4\u2013\u2014\u201c\u201d\u2018\u2019\ufb2a\ufb2b"
|
438 |
|
|
+ "\u05bf\u05b0\u05b2\u05b1\u05bb\u05b9\u0000\u05b3\u05d0\u05d1"
|
439 |
|
|
+ "\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db"
|
440 |
|
|
+ "\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5"
|
441 |
|
|
+ "\u05e6\u05e7\u05e8\u05e9\u05ea\u007d\u005d\u007b\u005b\u007c";
|
442 |
|
|
|
443 |
|
|
|
444 |
|
|
/**
|
445 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
446 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
447 |
|
|
* MacOS Roman encoding with the Icelandic language.
|
448 |
|
|
*
|
449 |
|
|
* @see <a href=
|
450 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ICELAND.TXT"
|
451 |
|
|
* >the Unicode mapping table for the MacOS Icelandic encoding</a>
|
452 |
|
|
*/
|
453 |
|
|
private static final String UPPER_ICELANDIC
|
454 |
|
|
= "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
455 |
|
|
+ "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
|
456 |
|
|
+ "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
|
457 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u00dd\u00b0\u00a2\u00a3\u00a7\u2022"
|
458 |
|
|
+ "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8"
|
459 |
|
|
+ "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
|
460 |
|
|
+ "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a"
|
461 |
|
|
+ "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
|
462 |
|
|
+ "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
|
463 |
|
|
+ "\u00ff\u0178\u2044\u20ac\u00d0\u00f0\u00de\u00fe\u00fd\u00b7"
|
464 |
|
|
+ "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
|
465 |
|
|
+ "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131"
|
466 |
|
|
+ "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
|
467 |
|
|
|
468 |
|
|
|
469 |
|
|
/**
|
470 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
471 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
472 |
|
|
* MacOS Roman encoding for most languages. Exceptions include
|
473 |
|
|
* Croatian, Icelandic, Romanian, and Turkish.
|
474 |
|
|
*
|
475 |
|
|
* @see <a
|
476 |
|
|
* href="http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT"
|
477 |
|
|
* >the Unicode mapping table for the MacOS Roman encoding</a>
|
478 |
|
|
*/
|
479 |
|
|
private static final String UPPER_ROMAN
|
480 |
|
|
= "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
481 |
|
|
+ "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
|
482 |
|
|
+ "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
|
483 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
|
484 |
|
|
+ "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8"
|
485 |
|
|
+ "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
|
486 |
|
|
+ "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a"
|
487 |
|
|
+ "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
|
488 |
|
|
+ "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
|
489 |
|
|
+ "\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7"
|
490 |
|
|
+ "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
|
491 |
|
|
+ "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131"
|
492 |
|
|
+ "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
|
493 |
|
|
|
494 |
|
|
|
495 |
|
|
/**
|
496 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
497 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
498 |
|
|
* MacOS Roman encoding with the Romanian language.
|
499 |
|
|
*
|
500 |
|
|
* @see <a href=
|
501 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT"
|
502 |
|
|
* >the Unicode mapping table for the MacOS Romanian encoding</a>
|
503 |
|
|
*/
|
504 |
|
|
private static final String UPPER_ROMANIAN
|
505 |
|
|
= "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
506 |
|
|
+ "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
|
507 |
|
|
+ "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
|
508 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
|
509 |
|
|
+ "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u0102\u0218"
|
510 |
|
|
+ "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
|
511 |
|
|
+ "\u222b\u00aa\u00ba\u03a9\u0103\u0219\u00bf\u00a1\u00ac\u221a"
|
512 |
|
|
+ "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
|
513 |
|
|
+ "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
|
514 |
|
|
+ "\u00ff\u0178\u2044\u20ac\u2039\u203a\u021a\u021b\u2021\u00b7"
|
515 |
|
|
+ "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
|
516 |
|
|
+ "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131"
|
517 |
|
|
+ "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
|
518 |
|
|
|
519 |
|
|
|
520 |
|
|
/**
|
521 |
|
|
* A String whose <code>charAt(i)</code> is the Unicode character
|
522 |
|
|
* that corresponds to the codepoint <code>i + 127</code> in the
|
523 |
|
|
* MacOS Roman encoding with the Turkish language.
|
524 |
|
|
*
|
525 |
|
|
* @see <a href=
|
526 |
|
|
* "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/TURKISH.TXT"
|
527 |
|
|
* >the Unicode mapping table for the MacOS Turkish encoding</a>
|
528 |
|
|
*/
|
529 |
|
|
private static final String UPPER_TURKISH
|
530 |
|
|
= "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
|
531 |
|
|
+ "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
|
532 |
|
|
+ "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
|
533 |
|
|
+ "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
|
534 |
|
|
+ "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8"
|
535 |
|
|
+ "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
|
536 |
|
|
+ "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a"
|
537 |
|
|
+ "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
|
538 |
|
|
+ "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
|
539 |
|
|
+ "\u00ff\u0178\u011e\u011f\u0130\u0131\u015e\u015f\u2021\u00b7"
|
540 |
|
|
+ "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
|
541 |
|
|
+ "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\uf8a0"
|
542 |
|
|
+ "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
|
543 |
|
|
|
544 |
|
|
|
545 |
|
|
/**
|
546 |
|
|
* Constructs a CharGlyphMap.Type0 from all type 0 cmaps provided
|
547 |
|
|
* by the font. The implementation is able to fuse multiple type
|
548 |
|
|
* 0 cmaps, such as the MacRoman, Turkish, Icelandic and Croatian
|
549 |
|
|
* encoding, into a single map from Unicode characters to glyph
|
550 |
|
|
* indices.
|
551 |
|
|
*
|
552 |
|
|
* @param buf a ByteBuffer whose position is right at the
|
553 |
|
|
* beginning of the entire cmap table of the font (<i>not</i>
|
554 |
|
|
* at some subtable).
|
555 |
|
|
*/
|
556 |
|
|
public Type0(ByteBuffer buf)
|
557 |
|
|
{
|
558 |
|
|
int numTables;
|
559 |
|
|
int tableStart = buf.position();
|
560 |
|
|
int limit = buf.limit();
|
561 |
|
|
|
562 |
|
|
/* The CMAP version must be 0. */
|
563 |
|
|
if (buf.getChar() != 0)
|
564 |
|
|
throw new IllegalStateException();
|
565 |
|
|
|
566 |
|
|
numTables = buf.getChar();
|
567 |
|
|
for (int i = 0; i < numTables; i++)
|
568 |
|
|
{
|
569 |
|
|
buf.limit(limit).position(tableStart + 4 + i * 8);
|
570 |
|
|
int platform = buf.getChar();
|
571 |
|
|
int encoding = buf.getChar();
|
572 |
|
|
int offset = tableStart + buf.getInt();
|
573 |
|
|
|
574 |
|
|
buf.position(offset);
|
575 |
|
|
int format = buf.getChar();
|
576 |
|
|
int length = buf.getChar();
|
577 |
|
|
buf.limit(offset + length);
|
578 |
|
|
int language = buf.getChar();
|
579 |
|
|
|
580 |
|
|
if (format == 0)
|
581 |
|
|
readSingleTable(buf, platform, language, encoding);
|
582 |
|
|
}
|
583 |
|
|
}
|
584 |
|
|
|
585 |
|
|
|
586 |
|
|
/**
|
587 |
|
|
* Processes a CMAP Type 0 table whose platform, encoding and
|
588 |
|
|
* language are already known.
|
589 |
|
|
*
|
590 |
|
|
* @param buf the buffer to read the table from, positioned
|
591 |
|
|
* right after the language tag.
|
592 |
|
|
*/
|
593 |
|
|
private void readSingleTable(ByteBuffer buf,
|
594 |
|
|
int platform, int language,
|
595 |
|
|
int encoding)
|
596 |
|
|
{
|
597 |
|
|
String upper = getUpper129(platform, encoding, language);
|
598 |
|
|
if (upper == null)
|
599 |
|
|
return;
|
600 |
|
|
|
601 |
|
|
/* Skip the MacOS codepoints [0 .. 31] because they do not
|
602 |
|
|
* correspond to any Unicode codepoint.
|
603 |
|
|
*/
|
604 |
|
|
buf.position(buf.position() + 32);
|
605 |
|
|
|
606 |
|
|
/* Irrespective of script and language, the MacOS codepoints
|
607 |
|
|
* [32 .. 126] correspond to the same Unicode codepoint.
|
608 |
|
|
*/
|
609 |
|
|
for (int i = 32; i < 126; i++)
|
610 |
|
|
glyphToUCS2[buf.get() & 0xff] = (char) i;
|
611 |
|
|
|
612 |
|
|
for (int i = 127; i < 256; i++)
|
613 |
|
|
glyphToUCS2[buf.get() & 0xff] = upper.charAt(i - 127);
|
614 |
|
|
|
615 |
|
|
/* Glyph 0 is always the undefined character, which has
|
616 |
|
|
* no codepoint in Unicode.
|
617 |
|
|
*/
|
618 |
|
|
glyphToUCS2[0] = 0;
|
619 |
|
|
}
|
620 |
|
|
|
621 |
|
|
|
622 |
|
|
/**
|
623 |
|
|
* Determines the glyph index for a given Unicode codepoint.
|
624 |
|
|
*
|
625 |
|
|
* @param ucs4 the Unicode codepoint in UCS-4 encoding.
|
626 |
|
|
*
|
627 |
|
|
* @return the glyph index, or 0 if the font does not contain
|
628 |
|
|
* a glyph for this codepoint.
|
629 |
|
|
*/
|
630 |
|
|
public int getGlyph(int ucs4)
|
631 |
|
|
{
|
632 |
|
|
/* This linear search is not exactly super fast. However,
|
633 |
|
|
* only really ancient fonts have only a type 0 cmap,
|
634 |
|
|
* so it should not hurt in very many cases. If it shows
|
635 |
|
|
* to be a performance problem, one could do a binary search
|
636 |
|
|
* on a 256-entry table sorted by Unicode codepoint. The
|
637 |
|
|
* matching index of that table could then be used to look
|
638 |
|
|
* up the glyph ID at that position.
|
639 |
|
|
*/
|
640 |
|
|
for (int i = 0; i < 256; i++)
|
641 |
|
|
if (glyphToUCS2[i] == ucs4)
|
642 |
|
|
return i;
|
643 |
|
|
return 0;
|
644 |
|
|
}
|
645 |
|
|
|
646 |
|
|
|
647 |
|
|
/**
|
648 |
|
|
* Returns a String whose <code>charAt(i)</code> is the Unicode
|
649 |
|
|
* character that corresponds to the codepoint <code>i +
|
650 |
|
|
* 127</code> in the encoding specified by the platform, script
|
651 |
|
|
* and language tag of a Type 0 CMAP.
|
652 |
|
|
*
|
653 |
|
|
* @param language the language tag in the cmap subtable. For the
|
654 |
|
|
* Macintosh platform, this is 0 to indicate language-neutral
|
655 |
|
|
* encoding, or the MacOS language code <i>plus one.</i> The
|
656 |
|
|
* Apple documentation does not mention that one needs to be
|
657 |
|
|
* added, but the Adobe OpenType specification does.
|
658 |
|
|
*
|
659 |
|
|
* @return a String for mapping the top 129 characters to
|
660 |
|
|
* UCS-2. If <code>platform</code> is not <code>1</code>
|
661 |
|
|
* (indicating Macintosh), or if the combination of
|
662 |
|
|
* <code>script</code> and <code>language</code> is not
|
663 |
|
|
* recognized, <code>null</code> will be returned.
|
664 |
|
|
*/
|
665 |
|
|
private static String getUpper129(int platform, int script, int language)
|
666 |
|
|
{
|
667 |
|
|
if (platform != PLATFORM_MACINTOSH)
|
668 |
|
|
return null;
|
669 |
|
|
|
670 |
|
|
switch (script)
|
671 |
|
|
{
|
672 |
|
|
case 0: /* smRoman */
|
673 |
|
|
if (language == /* langIcelandic+1 */ 16)
|
674 |
|
|
return UPPER_ICELANDIC;
|
675 |
|
|
else if (language == /* langTurkish+1 */ 18)
|
676 |
|
|
return UPPER_TURKISH;
|
677 |
|
|
else if (language == /* langCroatian+1 */ 19)
|
678 |
|
|
return UPPER_CROATIAN;
|
679 |
|
|
else if (language == /* langRomanian+1 */ 38)
|
680 |
|
|
return UPPER_ROMANIAN;
|
681 |
|
|
else if (language == /* language-neutral */ 0)
|
682 |
|
|
return UPPER_ROMAN;
|
683 |
|
|
else
|
684 |
|
|
return null;
|
685 |
|
|
|
686 |
|
|
case 4: /* smArabic */
|
687 |
|
|
if (language == /* langFarsi+1 */ 32)
|
688 |
|
|
return UPPER_FARSI;
|
689 |
|
|
else
|
690 |
|
|
return UPPER_ARABIC;
|
691 |
|
|
|
692 |
|
|
case 5: /* smHebrew */
|
693 |
|
|
return UPPER_HEBREW;
|
694 |
|
|
|
695 |
|
|
case 6: /* smGreek */
|
696 |
|
|
return UPPER_GREEK;
|
697 |
|
|
|
698 |
|
|
case 7: /* smCyrillic */
|
699 |
|
|
return UPPER_CYRILLIC;
|
700 |
|
|
|
701 |
|
|
case 29: /* smSlavic == smEastEurRoman */
|
702 |
|
|
return UPPER_EAST_EUROPEAN_ROMAN;
|
703 |
|
|
}
|
704 |
|
|
|
705 |
|
|
return null;
|
706 |
|
|
}
|
707 |
|
|
}
|
708 |
|
|
|
709 |
|
|
|
710 |
|
|
/**
|
711 |
|
|
* A mapping from Unicode code points to glyph IDs through CMAP Type
|
712 |
|
|
* 4 tables. These tables are able to map two-byte encoded text
|
713 |
|
|
* to glyph IDs, such as Unicode Basic Multilingual Plane which
|
714 |
|
|
* contains U+0000 .. U+FFFE without surrogates.
|
715 |
|
|
*
|
716 |
|
|
* @author Sascha Brawer (brawer@dandelis.ch)
|
717 |
|
|
*/
|
718 |
|
|
private static final class Type4
|
719 |
|
|
extends CharGlyphMap
|
720 |
|
|
{
|
721 |
|
|
/**
|
722 |
|
|
* Determines whether this implementation supports a combination
|
723 |
|
|
* of platform, language and encoding is supported for a type 4
|
724 |
|
|
* <code>cmap</code> table.
|
725 |
|
|
*
|
726 |
|
|
* <p>Currently, we support the following combinations:
|
727 |
|
|
*
|
728 |
|
|
* <ul><li>the Unicode platform in encodings 0, 1, 2, 3 and
|
729 |
|
|
* 4;</li>
|
730 |
|
|
*
|
731 |
|
|
* <li>the Microsoft platform in encodings 1 (Basic Multilingual
|
732 |
|
|
* Plane) and 10 (full Unicode).</li></ul>
|
733 |
|
|
*
|
734 |
|
|
* <p>Most recent Macintosh fonts provide a type 4
|
735 |
|
|
* <code>cmap</code> for Unicode. Microsoft recommends providing a
|
736 |
|
|
* type 4 <code>cmap</code> for encoding 1 of the Microsoft
|
737 |
|
|
* platform. The implementation of GNU Classpath supports both
|
738 |
|
|
* variants.
|
739 |
|
|
*
|
740 |
|
|
* <p>Not supported are ShiftJIS, Big5, Wansung, Johab, and other
|
741 |
|
|
* non-Unicode encodings. Text can easily be converted to Unicode
|
742 |
|
|
* using the java.nio.charset package.
|
743 |
|
|
*/
|
744 |
|
|
static boolean isSupported(int platform, int language, int encoding)
|
745 |
|
|
{
|
746 |
|
|
switch (platform)
|
747 |
|
|
{
|
748 |
|
|
case PLATFORM_UNICODE:
|
749 |
|
|
return (encoding >= 0) && (encoding <= 4);
|
750 |
|
|
|
751 |
|
|
case PLATFORM_MICROSOFT:
|
752 |
|
|
return (encoding == /* Basic Multilingual Plane */ 1)
|
753 |
|
|
|| (encoding == /* Full Unicode */ 10);
|
754 |
|
|
}
|
755 |
|
|
|
756 |
|
|
return false;
|
757 |
|
|
}
|
758 |
|
|
|
759 |
|
|
|
760 |
|
|
/**
|
761 |
|
|
* Processes a CMAP Type 4 table whose platform, encoding and
|
762 |
|
|
* language are already known. We understand the Unicode platform
|
763 |
|
|
* with encodings 0, 1, 2, 3 and 4, and the Microsoft platform
|
764 |
|
|
* with encodings 1 (Unicode BMP) and 10 (UCS-4).
|
765 |
|
|
*
|
766 |
|
|
* @param buf the buffer to read the table from, positioned at
|
767 |
|
|
* its beginning.
|
768 |
|
|
*
|
769 |
|
|
* @return a Type4 table, or <code>null</code> if the combination
|
770 |
|
|
* of platform and encoding is not understood.
|
771 |
|
|
*/
|
772 |
|
|
static Type4 readTable(ByteBuffer buf,
|
773 |
|
|
int platform, int encoding)
|
774 |
|
|
{
|
775 |
|
|
int tableStart = buf.position();
|
776 |
|
|
char format = buf.getChar();
|
777 |
|
|
int length = buf.getChar();
|
778 |
|
|
int language = buf.getChar();
|
779 |
|
|
|
780 |
|
|
if ((format != 4) || !isSupported(platform, language, encoding))
|
781 |
|
|
throw new IllegalArgumentException();
|
782 |
|
|
|
783 |
|
|
buf.limit(tableStart + length);
|
784 |
|
|
|
785 |
|
|
int segCountX2 = buf.getChar();
|
786 |
|
|
int segCount = segCountX2 / 2;
|
787 |
|
|
int searchRange = buf.getChar();
|
788 |
|
|
int entrySelector = buf.getChar();
|
789 |
|
|
int rangeShift = buf.getChar();
|
790 |
|
|
|
791 |
|
|
CharBuffer endCode, startCode, idRangeOffset_glyphID;
|
792 |
|
|
ShortBuffer idDelta;
|
793 |
|
|
|
794 |
|
|
int pos = buf.position();
|
795 |
|
|
endCode = buf.asCharBuffer();
|
796 |
|
|
pos += segCountX2 + /* reservedPad */ 2;
|
797 |
|
|
|
798 |
|
|
buf.position(pos);
|
799 |
|
|
startCode = buf.asCharBuffer();
|
800 |
|
|
pos += segCountX2;
|
801 |
|
|
|
802 |
|
|
buf.position(pos);
|
803 |
|
|
idDelta = buf.asShortBuffer();
|
804 |
|
|
pos += segCountX2;
|
805 |
|
|
|
806 |
|
|
buf.position(pos);
|
807 |
|
|
idRangeOffset_glyphID = buf.asCharBuffer();
|
808 |
|
|
|
809 |
|
|
endCode.limit(segCount);
|
810 |
|
|
startCode.limit(segCount);
|
811 |
|
|
idDelta.limit(segCount);
|
812 |
|
|
idRangeOffset_glyphID.limit((buf.limit() - pos) / 2);
|
813 |
|
|
|
814 |
|
|
return new Type4(segCount,
|
815 |
|
|
endCode, startCode, idDelta,
|
816 |
|
|
idRangeOffset_glyphID);
|
817 |
|
|
}
|
818 |
|
|
|
819 |
|
|
|
820 |
|
|
private CharBuffer lastChar;
|
821 |
|
|
private CharBuffer firstChar;
|
822 |
|
|
private ShortBuffer idDelta;
|
823 |
|
|
private CharBuffer rangeID;
|
824 |
|
|
private int numSegments;
|
825 |
|
|
|
826 |
|
|
private Type4(int numSegments,
|
827 |
|
|
CharBuffer lastChar, CharBuffer firstChar,
|
828 |
|
|
ShortBuffer idDelta, CharBuffer rangeID)
|
829 |
|
|
{
|
830 |
|
|
this.numSegments = numSegments;
|
831 |
|
|
this.lastChar = lastChar;
|
832 |
|
|
this.firstChar = firstChar;
|
833 |
|
|
this.idDelta = idDelta;
|
834 |
|
|
this.rangeID = rangeID;
|
835 |
|
|
}
|
836 |
|
|
|
837 |
|
|
|
838 |
|
|
/**
|
839 |
|
|
* Determines the glyph index for a given Unicode codepoint.
|
840 |
|
|
*
|
841 |
|
|
* @param ucs4 the Unicode codepoint in UCS-4 encoding.
|
842 |
|
|
*
|
843 |
|
|
* @return the glyph index, or 0 if the font does not contain
|
844 |
|
|
* a glyph for this codepoint.
|
845 |
|
|
*/
|
846 |
|
|
public int getGlyph(int ucs4)
|
847 |
|
|
{
|
848 |
|
|
char c, segStart;
|
849 |
|
|
int segment, idRangeOffset;
|
850 |
|
|
|
851 |
|
|
if (ucs4 > 0xffff)
|
852 |
|
|
return 0;
|
853 |
|
|
|
854 |
|
|
c = (char) ucs4;
|
855 |
|
|
segment = find(c);
|
856 |
|
|
segStart = firstChar.get(segment);
|
857 |
|
|
if ((c < segStart) || (c > lastChar.get(segment)))
|
858 |
|
|
return 0;
|
859 |
|
|
|
860 |
|
|
/*
|
861 |
|
|
* System.out.println("seg " + segment
|
862 |
|
|
* + ", range=" + (int) rangeID[segment]
|
863 |
|
|
* + ", delta=" + delta[segment]);
|
864 |
|
|
*/
|
865 |
|
|
|
866 |
|
|
idRangeOffset = rangeID.get(segment);
|
867 |
|
|
if (idRangeOffset == 0)
|
868 |
|
|
return (int) (char) (((int) c) + idDelta.get(segment));
|
869 |
|
|
int result = rangeID.get((idRangeOffset >> 1)
|
870 |
|
|
+ (c - segStart) + segment);
|
871 |
|
|
if (result == 0)
|
872 |
|
|
return 0;
|
873 |
|
|
return (int) (char) (result + idDelta.get(segment));
|
874 |
|
|
}
|
875 |
|
|
|
876 |
|
|
|
877 |
|
|
private int find(char c)
|
878 |
|
|
{
|
879 |
|
|
int min, max, mid;
|
880 |
|
|
|
881 |
|
|
min = 0;
|
882 |
|
|
max = numSegments - 1;
|
883 |
|
|
mid = max >> 1;
|
884 |
|
|
|
885 |
|
|
while (min < max)
|
886 |
|
|
{
|
887 |
|
|
// System.out.println("(" + min + "," + max + ") " + mid);
|
888 |
|
|
char val = lastChar.get(mid);
|
889 |
|
|
if (val == c)
|
890 |
|
|
break;
|
891 |
|
|
else if (val < c)
|
892 |
|
|
min = mid + 1;
|
893 |
|
|
else if (val > c)
|
894 |
|
|
max = mid;
|
895 |
|
|
mid = (min + max) >> 1;
|
896 |
|
|
}
|
897 |
|
|
|
898 |
|
|
return mid;
|
899 |
|
|
}
|
900 |
|
|
}
|
901 |
|
|
|
902 |
|
|
|
903 |
|
|
/**
|
904 |
|
|
* A mapping from Unicode code points to glyph IDs through CMAP Type
|
905 |
|
|
* 12 tables. These tables are able to map four-byte encoded text
|
906 |
|
|
* to glyph IDs, such as Unicode UCS-4.
|
907 |
|
|
*
|
908 |
|
|
* @author Sascha Brawer (brawer@dandelis.ch)
|
909 |
|
|
*/
|
910 |
|
|
private static final class Type12
|
911 |
|
|
extends CharGlyphMap
|
912 |
|
|
{
|
913 |
|
|
int numGroups;
|
914 |
|
|
IntBuffer data;
|
915 |
|
|
|
916 |
|
|
|
917 |
|
|
/**
|
918 |
|
|
* Determines whether this implementation supports a combination
|
919 |
|
|
* of platform and encoding for a type 12 <code>cmap</code> table.
|
920 |
|
|
*
|
921 |
|
|
* <p>Currently, we support the following combinations:
|
922 |
|
|
*
|
923 |
|
|
* <ul><li>the Unicode platform in encodings 0, 1, 2, 3 and
|
924 |
|
|
* 4;</li>
|
925 |
|
|
*
|
926 |
|
|
* <li>the Microsoft platform in encodings 1 (Basic Multilingual
|
927 |
|
|
* Plane) and 10 (full Unicode).</li></ul>
|
928 |
|
|
*/
|
929 |
|
|
static boolean isSupported(int platform, int encoding)
|
930 |
|
|
{
|
931 |
|
|
switch (platform)
|
932 |
|
|
{
|
933 |
|
|
case PLATFORM_UNICODE:
|
934 |
|
|
return (encoding >= 0) && (encoding <= 4);
|
935 |
|
|
|
936 |
|
|
case PLATFORM_MICROSOFT:
|
937 |
|
|
return (encoding == /* Basic Multilingual Plane */ 1)
|
938 |
|
|
|| (encoding == /* Full Unicode */ 10);
|
939 |
|
|
}
|
940 |
|
|
|
941 |
|
|
return false;
|
942 |
|
|
}
|
943 |
|
|
|
944 |
|
|
|
945 |
|
|
/**
|
946 |
|
|
* Constructs a <code>cmap</code> type 12 table whose platform and
|
947 |
|
|
* encoding are already known. We understand the Unicode platform
|
948 |
|
|
* with encodings 0, 1, 2, 3 and 4, and the Microsoft platform
|
949 |
|
|
* with encodings 1 (Unicode BMP) and 10 (UCS-4).
|
950 |
|
|
*
|
951 |
|
|
* @param buf the buffer to read the table from, positioned at
|
952 |
|
|
* its beginning.
|
953 |
|
|
*/
|
954 |
|
|
Type12(ByteBuffer buf, int platform, int encoding)
|
955 |
|
|
{
|
956 |
|
|
int tableStart = buf.position();
|
957 |
|
|
int format = buf.getChar();
|
958 |
|
|
if ((format != 12) || !isSupported(platform, encoding))
|
959 |
|
|
throw new IllegalStateException();
|
960 |
|
|
|
961 |
|
|
buf.getChar(); // skip reserved field
|
962 |
|
|
buf.limit(tableStart + buf.getInt());
|
963 |
|
|
int language = buf.getInt();
|
964 |
|
|
numGroups = buf.getInt();
|
965 |
|
|
data = buf.asIntBuffer();
|
966 |
|
|
}
|
967 |
|
|
|
968 |
|
|
|
969 |
|
|
/**
|
970 |
|
|
* Determines the glyph index for a given Unicode codepoint. Users
|
971 |
|
|
* should be aware that the character-to-glyph mapping not not
|
972 |
|
|
* everything that is needed for full Unicode support. For example,
|
973 |
|
|
* the <code>cmap</code> table is not able to synthesize accented
|
974 |
|
|
* glyphs from the canonical decomposition sequence, even if the
|
975 |
|
|
* font would contain a glyph for the composed form.
|
976 |
|
|
*
|
977 |
|
|
* @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates
|
978 |
|
|
* (U+D800 to U+DFFF) cannot be passed, they must be mapped to
|
979 |
|
|
* UCS-4 first.
|
980 |
|
|
*
|
981 |
|
|
* @return the glyph index, or 0 if the font does not contain
|
982 |
|
|
* a glyph for this codepoint.
|
983 |
|
|
*/
|
984 |
|
|
public int getGlyph(int ucs4)
|
985 |
|
|
{
|
986 |
|
|
int min, max, mid, startCharCode, endCharCode;
|
987 |
|
|
|
988 |
|
|
min = 0;
|
989 |
|
|
max = numGroups - 1;
|
990 |
|
|
mid = max >> 1;
|
991 |
|
|
do
|
992 |
|
|
{
|
993 |
|
|
startCharCode = data.get(3 * mid);
|
994 |
|
|
endCharCode = data.get(3 * mid + 1);
|
995 |
|
|
|
996 |
|
|
|
997 |
|
|
/*
|
998 |
|
|
System.out.println("group " + mid + " (U+"
|
999 |
|
|
+ Integer.toHexString(startCharCode)
|
1000 |
|
|
+ " .. U+" + Integer.toHexString(endCharCode)
|
1001 |
|
|
+ "): glyph " + (int) data.get(mid*3+2));
|
1002 |
|
|
*/
|
1003 |
|
|
|
1004 |
|
|
if ((startCharCode <= ucs4) && (ucs4 <= endCharCode))
|
1005 |
|
|
return ucs4
|
1006 |
|
|
- startCharCode
|
1007 |
|
|
+ /* startGlyphID */ data.get(mid * 3 + 2);
|
1008 |
|
|
|
1009 |
|
|
if (endCharCode < ucs4)
|
1010 |
|
|
min = mid + 1;
|
1011 |
|
|
else
|
1012 |
|
|
max = mid;
|
1013 |
|
|
mid = (min + max) >> 1;
|
1014 |
|
|
}
|
1015 |
|
|
while (min < max);
|
1016 |
|
|
|
1017 |
|
|
startCharCode = data.get(3 * mid);
|
1018 |
|
|
endCharCode = data.get(3 * mid + 1);
|
1019 |
|
|
if ((startCharCode <= ucs4) && (ucs4 <= endCharCode))
|
1020 |
|
|
return ucs4
|
1021 |
|
|
- startCharCode
|
1022 |
|
|
+ /* startGlyphID */ data.get(mid * 3 + 2);
|
1023 |
|
|
|
1024 |
|
|
return 0;
|
1025 |
|
|
}
|
1026 |
|
|
}
|
1027 |
|
|
}
|