OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [tools/] [gnu/] [classpath/] [tools/] [doclets/] [xmldoclet/] [HtmlRepairer.java] - Blame information for rev 781

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 779 jeremybenn
/* gnu.classpath.tools.doclets.xmldoclet.HtmlRepairer.java
2
   Copyright (C) 2003 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19
02111-1307 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
package gnu.classpath.tools.doclets.xmldoclet;
39
 
40
import java.io.*;
41
import java.util.*;
42
import com.sun.javadoc.DocErrorReporter;
43
import com.sun.javadoc.ClassDoc;
44
import com.sun.javadoc.MemberDoc;
45
 
46
/**
47
 *  Provides methods for tidying up HTML source.
48
 *
49
 *  @author Julian Scheid
50
 */
51
public final class HtmlRepairer {
52
 
53
   private static class TagInfo {
54
 
55
      private Set parentTags = new HashSet();
56
 
57
      public TagInfo(String parentTag) {
58
         this.parentTags.add(parentTag);
59
      }
60
 
61
      public TagInfo(String[] parentTagArr) {
62
         for (int i=0; i<parentTagArr.length; ++i) {
63
            this.parentTags.add(parentTagArr[i]);
64
         }
65
      }
66
 
67
      public boolean isLegalParentTag(String tag) {
68
         return this.parentTags.contains(tag);
69
      }
70
   }
71
 
72
   private DocErrorReporter warningReporter;
73
   private boolean noWarn;
74
   private boolean noEmailWarn;
75
   private ClassDoc contextClass;
76
   private MemberDoc contextMember;
77
   private StringBuffer output = new StringBuffer();
78
   private Stack tagStack = new Stack();
79
   private boolean isLeadingTag = true;
80
   private boolean throwAwayLeadingPara = false;
81
 
82
   private static Map tagInfoMap;
83
 
84
   private static Set noTextParentTags;
85
 
86
   static {
87
      tagInfoMap = new HashMap();
88
      tagInfoMap.put("li", new TagInfo(new String[] { "ul", "ol", "nl", "menu", "dir" }));
89
      tagInfoMap.put("td", new TagInfo(new String[] { "tr" }));
90
      tagInfoMap.put("th", new TagInfo(new String[] { "tr" }));
91
      tagInfoMap.put("tr", new TagInfo(new String[] { "table" }));
92
      tagInfoMap.put("dt", new TagInfo(new String[] { "dl" }));
93
      tagInfoMap.put("dd", new TagInfo(new String[] { "dl" }));
94
      tagInfoMap.put("param", new TagInfo(new String[] { "applet" }));
95
 
96
      String[] noTextParentTagArr = {
97
         "area", "base", "body", "br", "dd", "dt", "head", "hr", "html",
98
         "img", "input", "link", "map", "meta", "ol", "optgroup", "param",
99
         "select", "table", "tbody", "tfoot", "thead", "tr", "ul",
100
      };
101
 
102
      noTextParentTags = new HashSet();
103
      for (int i=0; i<noTextParentTagArr.length; ++i) {
104
         noTextParentTags.add(noTextParentTagArr[i]);
105
      }
106
   }
107
 
108
   public HtmlRepairer(DocErrorReporter warningReporter,
109
                       boolean noWarn, boolean noEmailWarn,
110
                       ClassDoc contextClass, MemberDoc contextMember,
111
                       boolean throwAwayLeadingPara) {
112
      this.warningReporter = warningReporter;
113
      this.noWarn = noWarn;
114
      this.noEmailWarn = noEmailWarn;
115
      this.contextClass = contextClass;
116
      this.contextMember = contextMember;
117
      this.throwAwayLeadingPara = throwAwayLeadingPara;
118
   }
119
 
120
   private static String replaceStr(String haystack, String needle, String replacement) {
121
      int ndx=haystack.indexOf(needle);
122
      if (ndx<0)
123
         return haystack;
124
      else
125
         return haystack.substring(0, ndx)+replacement
126
            + replaceStr(haystack.substring(ndx+needle.length()), needle, replacement);
127
   }
128
 
129
   private void haveText(String text) {
130
 
131
      if (isLeadingTag && throwAwayLeadingPara) {
132
         if (0 != text.trim().length()) {
133
            isLeadingTag = false;
134
         }
135
      }
136
 
137
      if (tagStack.isEmpty() || !noTextParentTags.contains(tagStack.peek())) {
138
 
139
         text = replaceStr(text, "&lt1", "&lt;1");
140
         text = replaceStr(text, "&&", "&amp;&amp;");
141
         text = replaceStr(text, "& ", "&amp; ");
142
         text = replaceStr(text, "&\t", "&amp;\t");
143
         text = replaceStr(text, "&\r", "&amp;\r");
144
         text = replaceStr(text, "&\n", "&amp;\n");
145
         for (char c='0'; c<='9'; ++c)
146
            text = replaceStr(text, "&"+c, "&amp;"+c);
147
         text = replaceStr(text, "\u00a7", "&sect;");
148
         output.append(text);
149
      }
150
      else {
151
         printWarning("Discarded text in <" + tagStack.peek() + "> element");
152
      }
153
   }
154
 
155
   private void haveStartOrEndTag(String tag) {
156
 
157
      boolean _isLeadingTag = isLeadingTag;
158
      isLeadingTag = false;
159
 
160
      tag = tag.trim();
161
 
162
      boolean isEndTag = tag.startsWith("/");
163
      boolean isAtomTag = tag.endsWith("/");
164
 
165
      if (isEndTag && isAtomTag) {
166
         // got something like '</a/>' which is invalid.
167
         // suppose a close tag was intended.
168
         tag = tag.substring(0, tag.length()-1);
169
      }
170
 
171
      if (tag.length() < 1) {
172
         printWarning("Deleting broken tag");
173
         return;
174
      }
175
 
176
      String tagName = tag.substring(isEndTag?1:0, isAtomTag?tag.length()-1:tag.length());
177
      String tagAttributes = "";
178
 
179
      for (int i=0; i<tagName.length(); ++i) {
180
         if (" \t\r\n".indexOf(tagName.charAt(i))>=0) {
181
            tagAttributes = tagName.substring(i).trim();
182
            tagName = tagName.substring(0, i);
183
            break;
184
         }
185
      }
186
 
187
      if (!isEndTag && tagName.indexOf('@')>0) {
188
         if (!noEmailWarn) {
189
            printWarning("Tag looks like email address: <"+tagName+">");
190
         }
191
         output.append("&lt;"+tag+"&gt;");
192
         return;
193
      }
194
 
195
      tagName = tagName.toLowerCase();
196
 
197
      if (_isLeadingTag && "p".equals(tagName) && !isEndTag && throwAwayLeadingPara) {
198
         return;
199
      }
200
 
201
      if ("p".equals(tagName) || "br".equals(tagName) || "hr".equals(tagName)) {
202
         // throw away </p> and </br>
203
         if (isEndTag) {
204
            return;
205
         }
206
         // make sure every <p> is a <p/> and every <br> is a <br/>
207
         else if (!isAtomTag) {
208
            tag += "/";
209
            isAtomTag = true;
210
         }
211
      }
212
 
213
      if (isEndTag) {
214
 
215
         // check whether this close tag is on the stack
216
         // if yes, close all tags up to this tag
217
         if (tagStack.contains(tagName)) {
218
            String popped;
219
            do {
220
               popped = (String)tagStack.pop();
221
               if (!popped.equals(tagName))
222
                  printWarning("Inserting '</"+popped+">");
223
               output.append("</"+popped+">");
224
            }
225
            while (!popped.equals(tagName));
226
         }
227
         // if not, just throw it away
228
         else {
229
            printWarning("Deleting <"+tag+">");
230
         }
231
      }
232
      else {
233
 
234
         final int STATE_INITIAL = 1;
235
         final int STATE_EXPECT_ATTRIBUTENAME = 2;
236
         final int STATE_UNQUOTED_ATTRIBUTEVALUE = 3;
237
         final int STATE_SINGLEQUOTE_ATTRIBUTEVALUE = 4;
238
         final int STATE_DOUBLEQUOTE_ATTRIBUTEVALUE = 5;
239
         final int STATE_EXPECT_ATTRIBUTEVALUE = 6;
240
         final int STATE_EXPECT_EQUALSIGN = 7;
241
 
242
         int state = STATE_INITIAL;
243
 
244
         String newAttributes = "";
245
         String attributeName = null;
246
         StringBuffer buf = new StringBuffer();
247
 
248
         char[] attrsAsChars = tagAttributes.toCharArray();
249
         for (int i=0, ilim=attrsAsChars.length+1; i<ilim; ++i) {
250
            int c;
251
            if (i<attrsAsChars.length)
252
               c = (int)attrsAsChars[i];
253
            else
254
               c = -1;
255
 
256
            switch (state) {
257
 
258
            case STATE_INITIAL:
259
               if (" \t\r\n".indexOf(c)>=0){
260
                  continue;
261
               }
262
               else if (-1==c) {
263
                  continue;
264
               }
265
               else {
266
                  state = STATE_EXPECT_ATTRIBUTENAME;
267
                  buf.append((char)c);
268
               }
269
               break;
270
 
271
            case STATE_EXPECT_ATTRIBUTENAME:
272
               if ('='==c) {
273
                  attributeName = buf.toString();
274
                  buf.setLength(0);
275
                  state = STATE_EXPECT_ATTRIBUTEVALUE;
276
               }
277
               else if (-1==c) {
278
                  attributeName = buf.toString();
279
                  buf.setLength(0);
280
                  printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
281
               }
282
               else if (" \t\r\n".indexOf(c)>=0) {
283
                  state = STATE_EXPECT_EQUALSIGN;
284
               }
285
               else {
286
                  buf.append((char)c);
287
               }
288
               break;
289
 
290
            case STATE_EXPECT_EQUALSIGN:
291
               if (" \t\r\n".indexOf(c)>=0){
292
                  continue;
293
               }
294
               else if ('='==c) {
295
                  state = STATE_EXPECT_ATTRIBUTEVALUE;
296
                  attributeName = buf.toString();
297
                  buf.setLength(0);
298
               }
299
               else {
300
                  attributeName = buf.toString();
301
                  buf.setLength(0);
302
                  printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
303
                  newAttributes += " "+attributeName+"=\""+attributeName+"\"";
304
                  buf.append((char)c);
305
                  state = STATE_EXPECT_ATTRIBUTENAME;
306
               }
307
               break;
308
 
309
            case STATE_EXPECT_ATTRIBUTEVALUE:
310
               if (" \t\r\n".indexOf(c)>=0){
311
                  continue;
312
               }
313
               else if ('\"'==c) {
314
                  state = STATE_DOUBLEQUOTE_ATTRIBUTEVALUE;
315
               }
316
               else if ('\''==c) {
317
                  state = STATE_SINGLEQUOTE_ATTRIBUTEVALUE;
318
               }
319
               else {
320
                  state = STATE_UNQUOTED_ATTRIBUTEVALUE;
321
                  buf.append((char)c);
322
               }
323
               break;
324
 
325
            case STATE_UNQUOTED_ATTRIBUTEVALUE:
326
               if (-1==c || " \t\r\n".indexOf(c)>=0){
327
                  state = STATE_INITIAL;
328
                  newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
329
                  buf.setLength(0);
330
               }
331
               else {
332
                  buf.append((char)c);
333
               }
334
               break;
335
 
336
            case STATE_SINGLEQUOTE_ATTRIBUTEVALUE:
337
               if ('\''==c) {
338
                  state = STATE_INITIAL;
339
                  newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
340
                  buf.setLength(0);
341
               }
342
               else {
343
                  buf.append((char)c);
344
               }
345
               break;
346
 
347
            case STATE_DOUBLEQUOTE_ATTRIBUTEVALUE:
348
               if ('\"'==c) {
349
                  state = STATE_INITIAL;
350
                  newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
351
                  buf.setLength(0);
352
               }
353
               else {
354
                  buf.append((char)c);
355
               }
356
               break;
357
            }
358
         }
359
 
360
 
361
         if (!isAtomTag) {
362
 
363
            // check whether this open tag is equal to the topmost
364
            // entry on the stack; if yes, emit a close tag first
365
 
366
            // corrects stuff like '<tr><td>...<td>...');
367
            if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) {
368
               printWarning("Inserting </"+tagName+">");
369
               output.append("</"+tagName+">");
370
               tagStack.pop();
371
            }
372
            else {
373
               processKnownChildTags(tagName, tagStack, output);
374
            }
375
 
376
            // otherwise, we assume there are no close tags required
377
            // before this open tag.
378
            tagStack.push(tagName);
379
 
380
            output.append("<"+tagName+newAttributes+">");
381
         }
382
         else {
383
            output.append("<"+tagName+newAttributes+"/>");
384
         }
385
      }
386
   }
387
 
388
   private boolean processKnownChildTags(String tagName, Stack tagStack, StringBuffer output) {
389
 
390
      TagInfo tagInfo = (TagInfo)tagInfoMap.get(tagName);
391
      if (null != tagInfo) {
392
 
393
         String parentTag = null;
394
         for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) {
395
            String tag = (String)en.nextElement();
396
            if (tagInfo.isLegalParentTag(tag)) {
397
               parentTag = tag;
398
               break;
399
            }
400
         }
401
         if (parentTag != null) {
402
            while (((String)tagStack.peek()) != parentTag) {
403
               String poppedTagName = (String)tagStack.pop();
404
               output.append("</"+poppedTagName+">");
405
               printWarning("Inserting </"+poppedTagName+">");
406
            }
407
            return true;
408
         }
409
      }
410
      return false;
411
   }
412
 
413
   private void flush() {
414
 
415
      // close all pending tags
416
      while (!tagStack.isEmpty()) {
417
         String tagName = (String)tagStack.pop();
418
         printWarning("Inserting </"+tagName+">");
419
         output.append("</"+tagName+">");
420
      }
421
   }
422
 
423
   /**
424
    *  Takes HTML fragment and returns a well-formed XHTML
425
    *  equivalent.
426
    *
427
    *  In the returned String, all tags are properly closed and
428
    *  nested.
429
    *
430
    *  Currently, the returned String is not guaranteed to be
431
    *  well-formed. In particular there are no checks on the tag
432
    *  names, attribute names and entity names.
433
    */
434
   public String getWellformedHTML(String text) {
435
 
436
      final int STATE_INITIAL         = 1;
437
      final int STATE_TAG_START       = 2;
438
      final int STATE_TAG             = 3;
439
      final int STATE_TAG_DOUBLEQUOTE = 4;
440
      final int STATE_TAG_SINGLEQUOTE = 5;
441
      final int STATE_AMP             = 6;
442
 
443
      int state = STATE_INITIAL;
444
      output.setLength(0);
445
 
446
 
447
      StringBuffer buf = new StringBuffer();
448
      char[] textAsChars = text.toCharArray();
449
 
450
   outer_loop:
451
      for (int i=0, ilim=textAsChars.length+1; i<ilim; ++i) {
452
         int c;
453
 
454
         if (i<textAsChars.length) {
455
            c = textAsChars[i];
456
         }
457
         else {
458
            c = -1;
459
         }
460
 
461
         switch (state) {
462
 
463
         case STATE_INITIAL:
464
            if ('<'==c) {
465
               state = STATE_TAG_START;
466
               if (buf.length()>0) {
467
                  haveText(buf.toString());
468
                  buf.setLength(0);
469
               }
470
            }
471
            else if ('>'==c) {
472
               // assume this is a greater-than sign
473
               buf.append("&gt;");
474
            }
475
            else if ('&'==c) {
476
               state = STATE_AMP;
477
            }
478
            else if (-1==c) {
479
               if (buf.length()>0) {
480
                  haveText(buf.toString());
481
                  buf.setLength(0);
482
               }
483
               continue;
484
            }
485
            else {
486
               buf.append((char)c);
487
            }
488
            break;
489
 
490
         case STATE_AMP:
491
            if ('<'==c) {
492
               buf.append("&amp;");
493
               state = STATE_TAG_START;
494
               if (buf.length()>0) {
495
                  haveText(buf.toString());
496
                  buf.setLength(0);
497
               }
498
            }
499
            else if ('>'==c) {
500
               // assume this is a greater-than sign
501
               buf.append("&amp;");
502
               buf.append("&gt;");
503
               state = STATE_INITIAL;
504
            }
505
            else if ('&'==c) {
506
               buf.append("&amp;");
507
               buf.append("&amp;");
508
               state = STATE_INITIAL;
509
            }
510
            else if (-1==c) {
511
               buf.append("&amp;");
512
               haveText(buf.toString());
513
               buf.setLength(0);
514
               state = STATE_INITIAL;
515
               continue;
516
            }
517
            else {
518
               // peek forward and see whether this is a valid entity.
519
               if ('#'==c) {
520
                  buf.append("&");
521
                  buf.append((char)c);
522
                  state = STATE_INITIAL;
523
                  continue outer_loop;
524
               }
525
               else if (Character.isLetter((char)c)) {
526
                  for (int i2=i+1; i2<ilim-1; i2++) {
527
                     if (';' == textAsChars[i2]) {
528
                        buf.append("&");
529
                        buf.append((char)c);
530
                        state = STATE_INITIAL;
531
                        continue outer_loop;
532
                     }
533
                     else if (!Character.isLetter((char)c)
534
                              && !Character.isDigit((char)c)
535
                              && ".-_:".indexOf((char)c) < 0
536
                              //&& !isCombiningChar(c)  // FIXME
537
                              //&& !isExtender(c)       // FIXME
538
                              ) {
539
                        break;
540
                     }
541
                  }
542
                  // not a valid entity declaration; assume &amp;
543
               }
544
               buf.append("&amp;");
545
               buf.append((char)c);
546
               state = STATE_INITIAL;
547
            }
548
 
549
            /*
550
            else if ('#'==c || Character.isLetter((char)c)) {
551
               buf.append("&");
552
               buf.append((char)c);
553
               state = STATE_INITIAL;
554
            }
555
            else {
556
               buf.append("&amp;");
557
               buf.append((char)c);
558
               state = STATE_INITIAL;
559
            }
560
            */
561
            break;
562
 
563
         case STATE_TAG_START:
564
            if (" \t\r\n".indexOf(c)>=0) {
565
               //continue;
566
 
567
               // new: assume this is a less-sign
568
               haveText("&lt;"+c);
569
               state = STATE_INITIAL;
570
            }
571
            else if ('/'==c) {
572
               buf.append((char)c);
573
               state = STATE_TAG;
574
            }
575
            else if ('<'==c) {
576
               // assume this is a less-sign
577
               haveText("&lt;&lt;");
578
               state = STATE_INITIAL;
579
            }
580
            else if ('>'==c) {
581
               // assume this is a less-sign
582
               haveText("&lt;&gt;");
583
               state = STATE_INITIAL;
584
            }
585
            //else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) {
586
            else if (!Character.isLetter((char)c)) {
587
               // assume this is a less-sign
588
               haveText("&lt;"+(char)c);
589
               state = STATE_INITIAL;
590
            }
591
            else {
592
               buf.append((char)c);
593
               state = STATE_TAG;
594
            }
595
            break;
596
 
597
         case STATE_TAG:
598
            if ('\"'==c) {
599
               buf.append((char)c);
600
               state = STATE_TAG_DOUBLEQUOTE;
601
            }
602
            else if ('\''==c) {
603
               buf.append((char)c);
604
               state = STATE_TAG_SINGLEQUOTE;
605
            }
606
            else if ('>'==c) {
607
               state = STATE_INITIAL;
608
               haveStartOrEndTag(buf.toString());
609
               buf.setLength(0);
610
            }
611
            else if ('<'==c) {
612
               // notify user, missing greater-than sign
613
               haveStartOrEndTag(buf.toString());
614
               buf.setLength(0);
615
            }
616
            else if (-1==c) {
617
               printWarning("Unclosed tag at end-of-comment: <"+buf);
618
               haveStartOrEndTag(buf.toString());
619
               buf.setLength(0);
620
            }
621
            else {
622
               buf.append((char)c);
623
            }
624
            break;
625
 
626
         case STATE_TAG_DOUBLEQUOTE:
627
            if ('\"'==c) {
628
               buf.append((char)c);
629
               state = STATE_TAG;
630
            }
631
            else if (-1==c) {
632
               printWarning("Unclosed attribute value at end-of-comment.");
633
               haveStartOrEndTag(buf.toString()+"\"");
634
            }
635
            else {
636
               buf.append((char)c);
637
            }
638
            break;
639
 
640
         case STATE_TAG_SINGLEQUOTE:
641
            if ('\''==c) {
642
               buf.append((char)c);
643
               state = STATE_TAG;
644
            }
645
            else if (-1==c) {
646
               printWarning("Unclosed attribute value at end-of-comment.");
647
               haveStartOrEndTag(buf.toString()+"'");
648
            }
649
            else {
650
               buf.append((char)c);
651
            }
652
            break;
653
         }
654
      }
655
 
656
      return output.toString();
657
   }
658
 
659
   private String getContext() {
660
      if (null != contextClass) {
661
         StringBuffer rc = new StringBuffer();
662
         rc.append(contextClass.qualifiedTypeName());
663
         if (null != contextMember) {
664
            rc.append("."+contextMember.toString());
665
         }
666
         return rc.toString();
667
      }
668
      else {
669
         return null;
670
      }
671
   }
672
 
673
   private void printWarning(String msg) {
674
      if (null != warningReporter && !noWarn) {
675
         String context = getContext();
676
         if (null != context) {
677
            warningReporter.printWarning("In "+getContext()+": "+msg);
678
         }
679
         else {
680
            warningReporter.printWarning("In overview page: "+msg);
681
         }
682
      }
683
   }
684
 
685
   public String terminateText() {
686
      output.setLength(0);
687
      flush();
688
      return output.toString();
689
   }
690
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.