OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [tools/] [gnu/] [classpath/] [tools/] [doclets/] [xmldoclet/] [HtmlRepairer.java] - Rev 779

Compare with Previous | Blame | View Log

/* gnu.classpath.tools.doclets.xmldoclet.HtmlRepairer.java
   Copyright (C) 2003 Free Software Foundation, Inc.
 
This file is part of GNU Classpath.
 
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
 
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING.  If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
 
Linking this library statically or dynamically with other modules is
making a combined work based on this library.  Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
 
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module.  An independent module is a module which is not derived from
or based on this library.  If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so.  If you do not wish to do so, delete this
exception statement from your version. */
 
package gnu.classpath.tools.doclets.xmldoclet;
 
import java.io.*;
import java.util.*;
import com.sun.javadoc.DocErrorReporter;
import com.sun.javadoc.ClassDoc;
import com.sun.javadoc.MemberDoc;
 
/**
 *  Provides methods for tidying up HTML source.
 *
 *  @author Julian Scheid
 */
public final class HtmlRepairer {
 
   private static class TagInfo {
 
      private Set parentTags = new HashSet();
 
      public TagInfo(String parentTag) {
         this.parentTags.add(parentTag);
      }
 
      public TagInfo(String[] parentTagArr) {
         for (int i=0; i<parentTagArr.length; ++i) {
            this.parentTags.add(parentTagArr[i]);
         }
      }
 
      public boolean isLegalParentTag(String tag) {
         return this.parentTags.contains(tag);
      }
   }
 
   private DocErrorReporter warningReporter;
   private boolean noWarn;
   private boolean noEmailWarn;
   private ClassDoc contextClass;
   private MemberDoc contextMember;
   private StringBuffer output = new StringBuffer();
   private Stack tagStack = new Stack();
   private boolean isLeadingTag = true;
   private boolean throwAwayLeadingPara = false;
 
   private static Map tagInfoMap;
 
   private static Set noTextParentTags;
 
   static {
      tagInfoMap = new HashMap();
      tagInfoMap.put("li", new TagInfo(new String[] { "ul", "ol", "nl", "menu", "dir" }));
      tagInfoMap.put("td", new TagInfo(new String[] { "tr" }));
      tagInfoMap.put("th", new TagInfo(new String[] { "tr" }));
      tagInfoMap.put("tr", new TagInfo(new String[] { "table" }));
      tagInfoMap.put("dt", new TagInfo(new String[] { "dl" }));
      tagInfoMap.put("dd", new TagInfo(new String[] { "dl" }));
      tagInfoMap.put("param", new TagInfo(new String[] { "applet" }));
 
      String[] noTextParentTagArr = {
         "area", "base", "body", "br", "dd", "dt", "head", "hr", "html",
         "img", "input", "link", "map", "meta", "ol", "optgroup", "param",
         "select", "table", "tbody", "tfoot", "thead", "tr", "ul",
      };
 
      noTextParentTags = new HashSet();
      for (int i=0; i<noTextParentTagArr.length; ++i) {
         noTextParentTags.add(noTextParentTagArr[i]);
      }
   }
 
   public HtmlRepairer(DocErrorReporter warningReporter,
                       boolean noWarn, boolean noEmailWarn,
                       ClassDoc contextClass, MemberDoc contextMember,
                       boolean throwAwayLeadingPara) {
      this.warningReporter = warningReporter;
      this.noWarn = noWarn;
      this.noEmailWarn = noEmailWarn;
      this.contextClass = contextClass;
      this.contextMember = contextMember;
      this.throwAwayLeadingPara = throwAwayLeadingPara;
   }
 
   private static String replaceStr(String haystack, String needle, String replacement) {
      int ndx=haystack.indexOf(needle);
      if (ndx<0)
         return haystack;
      else
         return haystack.substring(0, ndx)+replacement
            + replaceStr(haystack.substring(ndx+needle.length()), needle, replacement);
   }
 
   private void haveText(String text) {
 
      if (isLeadingTag && throwAwayLeadingPara) {
         if (0 != text.trim().length()) {
            isLeadingTag = false;
         }
      }
 
      if (tagStack.isEmpty() || !noTextParentTags.contains(tagStack.peek())) {
 
         text = replaceStr(text, "&lt1", "&lt;1");
         text = replaceStr(text, "&&", "&amp;&amp;");
         text = replaceStr(text, "& ", "&amp; ");
         text = replaceStr(text, "&\t", "&amp;\t");
         text = replaceStr(text, "&\r", "&amp;\r");
         text = replaceStr(text, "&\n", "&amp;\n");
         for (char c='0'; c<='9'; ++c)
            text = replaceStr(text, "&"+c, "&amp;"+c);
         text = replaceStr(text, "\u00a7", "&sect;");
         output.append(text);
      }
      else {
         printWarning("Discarded text in <" + tagStack.peek() + "> element");
      }
   }
 
   private void haveStartOrEndTag(String tag) {
 
      boolean _isLeadingTag = isLeadingTag;
      isLeadingTag = false;
 
      tag = tag.trim();
 
      boolean isEndTag = tag.startsWith("/");
      boolean isAtomTag = tag.endsWith("/");
 
      if (isEndTag && isAtomTag) {
         // got something like '</a/>' which is invalid.
         // suppose a close tag was intended.
         tag = tag.substring(0, tag.length()-1);
      }
 
      if (tag.length() < 1) {
         printWarning("Deleting broken tag");
         return;
      }
 
      String tagName = tag.substring(isEndTag?1:0, isAtomTag?tag.length()-1:tag.length());
      String tagAttributes = "";
 
      for (int i=0; i<tagName.length(); ++i) {
         if (" \t\r\n".indexOf(tagName.charAt(i))>=0) {
            tagAttributes = tagName.substring(i).trim();
            tagName = tagName.substring(0, i);
            break;
         }
      }
 
      if (!isEndTag && tagName.indexOf('@')>0) {
         if (!noEmailWarn) {
            printWarning("Tag looks like email address: <"+tagName+">");
         }
         output.append("&lt;"+tag+"&gt;");
         return;
      }
 
      tagName = tagName.toLowerCase();
 
      if (_isLeadingTag && "p".equals(tagName) && !isEndTag && throwAwayLeadingPara) {
         return;
      }
 
      if ("p".equals(tagName) || "br".equals(tagName) || "hr".equals(tagName)) {
         // throw away </p> and </br>
         if (isEndTag) {
            return;
         }
         // make sure every <p> is a <p/> and every <br> is a <br/>
         else if (!isAtomTag) {
            tag += "/";
            isAtomTag = true;
         }
      }
 
      if (isEndTag) {
 
         // check whether this close tag is on the stack
         // if yes, close all tags up to this tag
         if (tagStack.contains(tagName)) {
            String popped;
            do {
               popped = (String)tagStack.pop();
               if (!popped.equals(tagName))
                  printWarning("Inserting '</"+popped+">");
               output.append("</"+popped+">");
            }
            while (!popped.equals(tagName));
         }
         // if not, just throw it away
         else {
            printWarning("Deleting <"+tag+">");
         }
      }
      else {
 
         final int STATE_INITIAL = 1;
         final int STATE_EXPECT_ATTRIBUTENAME = 2;
         final int STATE_UNQUOTED_ATTRIBUTEVALUE = 3;
         final int STATE_SINGLEQUOTE_ATTRIBUTEVALUE = 4;
         final int STATE_DOUBLEQUOTE_ATTRIBUTEVALUE = 5;
         final int STATE_EXPECT_ATTRIBUTEVALUE = 6;
         final int STATE_EXPECT_EQUALSIGN = 7;
 
         int state = STATE_INITIAL;
 
         String newAttributes = "";
         String attributeName = null;
         StringBuffer buf = new StringBuffer();
 
         char[] attrsAsChars = tagAttributes.toCharArray();
         for (int i=0, ilim=attrsAsChars.length+1; i<ilim; ++i) {
            int c;
            if (i<attrsAsChars.length)
               c = (int)attrsAsChars[i];
            else
               c = -1;
 
            switch (state) {
 
            case STATE_INITIAL:
               if (" \t\r\n".indexOf(c)>=0){
                  continue;
               }
               else if (-1==c) {
                  continue;
               }
               else {
                  state = STATE_EXPECT_ATTRIBUTENAME;
                  buf.append((char)c);
               }
               break;
 
            case STATE_EXPECT_ATTRIBUTENAME:
               if ('='==c) {
                  attributeName = buf.toString();
                  buf.setLength(0);
                  state = STATE_EXPECT_ATTRIBUTEVALUE;
               }
               else if (-1==c) {
                  attributeName = buf.toString();
                  buf.setLength(0);
                  printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
               }
               else if (" \t\r\n".indexOf(c)>=0) {
                  state = STATE_EXPECT_EQUALSIGN;
               }
               else {
                  buf.append((char)c);
               }
               break;
 
            case STATE_EXPECT_EQUALSIGN:
               if (" \t\r\n".indexOf(c)>=0){
                  continue;
               }
               else if ('='==c) {
                  state = STATE_EXPECT_ATTRIBUTEVALUE;
                  attributeName = buf.toString();
                  buf.setLength(0);
               }
               else {
                  attributeName = buf.toString();
                  buf.setLength(0);
                  printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
                  newAttributes += " "+attributeName+"=\""+attributeName+"\"";
                  buf.append((char)c);
                  state = STATE_EXPECT_ATTRIBUTENAME;
               }
               break;
 
            case STATE_EXPECT_ATTRIBUTEVALUE:
               if (" \t\r\n".indexOf(c)>=0){
                  continue;
               }
               else if ('\"'==c) {
                  state = STATE_DOUBLEQUOTE_ATTRIBUTEVALUE;
               }
               else if ('\''==c) {
                  state = STATE_SINGLEQUOTE_ATTRIBUTEVALUE;
               }
               else {
                  state = STATE_UNQUOTED_ATTRIBUTEVALUE;
                  buf.append((char)c);
               }
               break;
 
            case STATE_UNQUOTED_ATTRIBUTEVALUE:
               if (-1==c || " \t\r\n".indexOf(c)>=0){
                  state = STATE_INITIAL;
                  newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
                  buf.setLength(0);
               }
               else {
                  buf.append((char)c);
               }
               break;
 
            case STATE_SINGLEQUOTE_ATTRIBUTEVALUE:
               if ('\''==c) {
                  state = STATE_INITIAL;
                  newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
                  buf.setLength(0);
               }
               else {
                  buf.append((char)c);
               }
               break;
 
            case STATE_DOUBLEQUOTE_ATTRIBUTEVALUE:
               if ('\"'==c) {
                  state = STATE_INITIAL;
                  newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
                  buf.setLength(0);
               }
               else {
                  buf.append((char)c);
               }
               break;
            }
         }
 
 
         if (!isAtomTag) {
 
            // check whether this open tag is equal to the topmost
            // entry on the stack; if yes, emit a close tag first
 
            // corrects stuff like '<tr><td>...<td>...');
            if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) {
               printWarning("Inserting </"+tagName+">");
               output.append("</"+tagName+">");
               tagStack.pop();
            }
            else {
               processKnownChildTags(tagName, tagStack, output);
            }
 
            // otherwise, we assume there are no close tags required
            // before this open tag.
            tagStack.push(tagName);
 
            output.append("<"+tagName+newAttributes+">");
         }
         else {
            output.append("<"+tagName+newAttributes+"/>");
         }
      }
   }
 
   private boolean processKnownChildTags(String tagName, Stack tagStack, StringBuffer output) {
 
      TagInfo tagInfo = (TagInfo)tagInfoMap.get(tagName);
      if (null != tagInfo) {
 
         String parentTag = null;
         for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) {
            String tag = (String)en.nextElement();
            if (tagInfo.isLegalParentTag(tag)) {
               parentTag = tag;
               break;
            }
         }
         if (parentTag != null) {
            while (((String)tagStack.peek()) != parentTag) {
               String poppedTagName = (String)tagStack.pop();
               output.append("</"+poppedTagName+">");
               printWarning("Inserting </"+poppedTagName+">");
            }
            return true;
         }
      }
      return false;
   }
 
   private void flush() {
 
      // close all pending tags
      while (!tagStack.isEmpty()) {
         String tagName = (String)tagStack.pop();
         printWarning("Inserting </"+tagName+">");
         output.append("</"+tagName+">");
      }
   }
 
   /**
    *  Takes HTML fragment and returns a well-formed XHTML
    *  equivalent.
    *
    *  In the returned String, all tags are properly closed and
    *  nested.
    *
    *  Currently, the returned String is not guaranteed to be
    *  well-formed. In particular there are no checks on the tag
    *  names, attribute names and entity names.
    */
   public String getWellformedHTML(String text) {
 
      final int STATE_INITIAL         = 1;
      final int STATE_TAG_START       = 2;
      final int STATE_TAG             = 3;
      final int STATE_TAG_DOUBLEQUOTE = 4;
      final int STATE_TAG_SINGLEQUOTE = 5;
      final int STATE_AMP             = 6;
 
      int state = STATE_INITIAL;
      output.setLength(0);
 
 
      StringBuffer buf = new StringBuffer();
      char[] textAsChars = text.toCharArray();
 
   outer_loop:
      for (int i=0, ilim=textAsChars.length+1; i<ilim; ++i) {
         int c;
 
         if (i<textAsChars.length) {
            c = textAsChars[i];
         }
         else {
            c = -1;
         }
 
         switch (state) {
 
         case STATE_INITIAL:
            if ('<'==c) {
               state = STATE_TAG_START;
               if (buf.length()>0) {
                  haveText(buf.toString());
                  buf.setLength(0);
               }
            }
            else if ('>'==c) {
               // assume this is a greater-than sign
               buf.append("&gt;");
            }
            else if ('&'==c) {
               state = STATE_AMP;
            }
            else if (-1==c) {
               if (buf.length()>0) {
                  haveText(buf.toString());
                  buf.setLength(0);
               }
               continue;
            }
            else {
               buf.append((char)c);
            }
            break;
 
         case STATE_AMP:
            if ('<'==c) {
               buf.append("&amp;");
               state = STATE_TAG_START;
               if (buf.length()>0) {
                  haveText(buf.toString());
                  buf.setLength(0);
               }
            }
            else if ('>'==c) {
               // assume this is a greater-than sign
               buf.append("&amp;");
               buf.append("&gt;");
               state = STATE_INITIAL;
            }
            else if ('&'==c) {
               buf.append("&amp;");
               buf.append("&amp;");
               state = STATE_INITIAL;
            }
            else if (-1==c) {
               buf.append("&amp;");
               haveText(buf.toString());
               buf.setLength(0);
               state = STATE_INITIAL;
               continue;
            }
            else {
               // peek forward and see whether this is a valid entity.
               if ('#'==c) {
                  buf.append("&");
                  buf.append((char)c);
                  state = STATE_INITIAL;
                  continue outer_loop;
               }
               else if (Character.isLetter((char)c)) {
                  for (int i2=i+1; i2<ilim-1; i2++) {
                     if (';' == textAsChars[i2]) {
                        buf.append("&");
                        buf.append((char)c);
                        state = STATE_INITIAL;
                        continue outer_loop;
                     }
                     else if (!Character.isLetter((char)c)
                              && !Character.isDigit((char)c)
                              && ".-_:".indexOf((char)c) < 0
                              //&& !isCombiningChar(c)  // FIXME
                              //&& !isExtender(c)       // FIXME
                              ) {
                        break;
                     }
                  }
                  // not a valid entity declaration; assume &amp;
               }
               buf.append("&amp;");
               buf.append((char)c);
               state = STATE_INITIAL;
            }
 
            /*
            else if ('#'==c || Character.isLetter((char)c)) {
               buf.append("&");
               buf.append((char)c);
               state = STATE_INITIAL;
            }
            else {
               buf.append("&amp;");
               buf.append((char)c);
               state = STATE_INITIAL;
            }
            */
            break;
 
         case STATE_TAG_START:
            if (" \t\r\n".indexOf(c)>=0) {
               //continue;
 
               // new: assume this is a less-sign
               haveText("&lt;"+c);
               state = STATE_INITIAL;
            }
            else if ('/'==c) {
               buf.append((char)c);
               state = STATE_TAG;
            }
            else if ('<'==c) {
               // assume this is a less-sign
               haveText("&lt;&lt;");
               state = STATE_INITIAL;
            }
            else if ('>'==c) {
               // assume this is a less-sign
               haveText("&lt;&gt;");
               state = STATE_INITIAL;
            }
            //else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) {
            else if (!Character.isLetter((char)c)) {
               // assume this is a less-sign
               haveText("&lt;"+(char)c);
               state = STATE_INITIAL;
            }
            else {
               buf.append((char)c);
               state = STATE_TAG;
            }
            break;
 
         case STATE_TAG:
            if ('\"'==c) {
               buf.append((char)c);
               state = STATE_TAG_DOUBLEQUOTE;
            }
            else if ('\''==c) {
               buf.append((char)c);
               state = STATE_TAG_SINGLEQUOTE;
            }
            else if ('>'==c) {
               state = STATE_INITIAL;
               haveStartOrEndTag(buf.toString());
               buf.setLength(0);
            }
            else if ('<'==c) {
               // notify user, missing greater-than sign
               haveStartOrEndTag(buf.toString());
               buf.setLength(0);
            }
            else if (-1==c) {
               printWarning("Unclosed tag at end-of-comment: <"+buf);
               haveStartOrEndTag(buf.toString());
               buf.setLength(0);
            }
            else {
               buf.append((char)c);
            }
            break;
 
         case STATE_TAG_DOUBLEQUOTE:
            if ('\"'==c) {
               buf.append((char)c);
               state = STATE_TAG;
            }
            else if (-1==c) {
               printWarning("Unclosed attribute value at end-of-comment.");
               haveStartOrEndTag(buf.toString()+"\"");
            }
            else {
               buf.append((char)c);
            }
            break;
 
         case STATE_TAG_SINGLEQUOTE:
            if ('\''==c) {
               buf.append((char)c);
               state = STATE_TAG;
            }
            else if (-1==c) {
               printWarning("Unclosed attribute value at end-of-comment.");
               haveStartOrEndTag(buf.toString()+"'");
            }
            else {
               buf.append((char)c);
            }
            break;
         }
      }
 
      return output.toString();
   }
 
   private String getContext() {
      if (null != contextClass) {
         StringBuffer rc = new StringBuffer();
         rc.append(contextClass.qualifiedTypeName());
         if (null != contextMember) {
            rc.append("."+contextMember.toString());
         }
         return rc.toString();
      }
      else {
         return null;
      }
   }
 
   private void printWarning(String msg) {
      if (null != warningReporter && !noWarn) {
         String context = getContext();
         if (null != context) {
            warningReporter.printWarning("In "+getContext()+": "+msg);
         }
         else {
            warningReporter.printWarning("In overview page: "+msg);
         }
      }
   }
 
   public String terminateText() {
      output.setLength(0);
      flush();
      return output.toString();
   }
}
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.