URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [tools/] [gnu/] [classpath/] [tools/] [doclets/] [xmldoclet/] [HtmlRepairer.java] - Rev 779
Compare with Previous | Blame | View Log
/* gnu.classpath.tools.doclets.xmldoclet.HtmlRepairer.java Copyright (C) 2003 Free Software Foundation, Inc. This file is part of GNU Classpath. GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Classpath; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. Linking this library statically or dynamically with other modules is making a combined work based on this library. Thus, the terms and conditions of the GNU General Public License cover the whole combination. As a special exception, the copyright holders of this library give you permission to link this library with independent modules to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify this library, you may extend this exception to your version of the library, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ package gnu.classpath.tools.doclets.xmldoclet; import java.io.*; import java.util.*; import com.sun.javadoc.DocErrorReporter; import com.sun.javadoc.ClassDoc; import com.sun.javadoc.MemberDoc; /** * Provides methods for tidying up HTML source. * * @author Julian Scheid */ public final class HtmlRepairer { private static class TagInfo { private Set parentTags = new HashSet(); public TagInfo(String parentTag) { this.parentTags.add(parentTag); } public TagInfo(String[] parentTagArr) { for (int i=0; i<parentTagArr.length; ++i) { this.parentTags.add(parentTagArr[i]); } } public boolean isLegalParentTag(String tag) { return this.parentTags.contains(tag); } } private DocErrorReporter warningReporter; private boolean noWarn; private boolean noEmailWarn; private ClassDoc contextClass; private MemberDoc contextMember; private StringBuffer output = new StringBuffer(); private Stack tagStack = new Stack(); private boolean isLeadingTag = true; private boolean throwAwayLeadingPara = false; private static Map tagInfoMap; private static Set noTextParentTags; static { tagInfoMap = new HashMap(); tagInfoMap.put("li", new TagInfo(new String[] { "ul", "ol", "nl", "menu", "dir" })); tagInfoMap.put("td", new TagInfo(new String[] { "tr" })); tagInfoMap.put("th", new TagInfo(new String[] { "tr" })); tagInfoMap.put("tr", new TagInfo(new String[] { "table" })); tagInfoMap.put("dt", new TagInfo(new String[] { "dl" })); tagInfoMap.put("dd", new TagInfo(new String[] { "dl" })); tagInfoMap.put("param", new TagInfo(new String[] { "applet" })); String[] noTextParentTagArr = { "area", "base", "body", "br", "dd", "dt", "head", "hr", "html", "img", "input", "link", "map", "meta", "ol", "optgroup", "param", "select", "table", "tbody", "tfoot", "thead", "tr", "ul", }; noTextParentTags = new HashSet(); for (int i=0; i<noTextParentTagArr.length; ++i) { noTextParentTags.add(noTextParentTagArr[i]); } } public HtmlRepairer(DocErrorReporter warningReporter, boolean noWarn, boolean noEmailWarn, ClassDoc contextClass, MemberDoc contextMember, boolean throwAwayLeadingPara) { this.warningReporter = warningReporter; this.noWarn = noWarn; this.noEmailWarn = noEmailWarn; this.contextClass = contextClass; this.contextMember = contextMember; this.throwAwayLeadingPara = throwAwayLeadingPara; } private static String replaceStr(String haystack, String needle, String replacement) { int ndx=haystack.indexOf(needle); if (ndx<0) return haystack; else return haystack.substring(0, ndx)+replacement + replaceStr(haystack.substring(ndx+needle.length()), needle, replacement); } private void haveText(String text) { if (isLeadingTag && throwAwayLeadingPara) { if (0 != text.trim().length()) { isLeadingTag = false; } } if (tagStack.isEmpty() || !noTextParentTags.contains(tagStack.peek())) { text = replaceStr(text, "<1", "<1"); text = replaceStr(text, "&&", "&&"); text = replaceStr(text, "& ", "& "); text = replaceStr(text, "&\t", "&\t"); text = replaceStr(text, "&\r", "&\r"); text = replaceStr(text, "&\n", "&\n"); for (char c='0'; c<='9'; ++c) text = replaceStr(text, "&"+c, "&"+c); text = replaceStr(text, "\u00a7", "§"); output.append(text); } else { printWarning("Discarded text in <" + tagStack.peek() + "> element"); } } private void haveStartOrEndTag(String tag) { boolean _isLeadingTag = isLeadingTag; isLeadingTag = false; tag = tag.trim(); boolean isEndTag = tag.startsWith("/"); boolean isAtomTag = tag.endsWith("/"); if (isEndTag && isAtomTag) { // got something like '</a/>' which is invalid. // suppose a close tag was intended. tag = tag.substring(0, tag.length()-1); } if (tag.length() < 1) { printWarning("Deleting broken tag"); return; } String tagName = tag.substring(isEndTag?1:0, isAtomTag?tag.length()-1:tag.length()); String tagAttributes = ""; for (int i=0; i<tagName.length(); ++i) { if (" \t\r\n".indexOf(tagName.charAt(i))>=0) { tagAttributes = tagName.substring(i).trim(); tagName = tagName.substring(0, i); break; } } if (!isEndTag && tagName.indexOf('@')>0) { if (!noEmailWarn) { printWarning("Tag looks like email address: <"+tagName+">"); } output.append("<"+tag+">"); return; } tagName = tagName.toLowerCase(); if (_isLeadingTag && "p".equals(tagName) && !isEndTag && throwAwayLeadingPara) { return; } if ("p".equals(tagName) || "br".equals(tagName) || "hr".equals(tagName)) { // throw away </p> and </br> if (isEndTag) { return; } // make sure every <p> is a <p/> and every <br> is a <br/> else if (!isAtomTag) { tag += "/"; isAtomTag = true; } } if (isEndTag) { // check whether this close tag is on the stack // if yes, close all tags up to this tag if (tagStack.contains(tagName)) { String popped; do { popped = (String)tagStack.pop(); if (!popped.equals(tagName)) printWarning("Inserting '</"+popped+">"); output.append("</"+popped+">"); } while (!popped.equals(tagName)); } // if not, just throw it away else { printWarning("Deleting <"+tag+">"); } } else { final int STATE_INITIAL = 1; final int STATE_EXPECT_ATTRIBUTENAME = 2; final int STATE_UNQUOTED_ATTRIBUTEVALUE = 3; final int STATE_SINGLEQUOTE_ATTRIBUTEVALUE = 4; final int STATE_DOUBLEQUOTE_ATTRIBUTEVALUE = 5; final int STATE_EXPECT_ATTRIBUTEVALUE = 6; final int STATE_EXPECT_EQUALSIGN = 7; int state = STATE_INITIAL; String newAttributes = ""; String attributeName = null; StringBuffer buf = new StringBuffer(); char[] attrsAsChars = tagAttributes.toCharArray(); for (int i=0, ilim=attrsAsChars.length+1; i<ilim; ++i) { int c; if (i<attrsAsChars.length) c = (int)attrsAsChars[i]; else c = -1; switch (state) { case STATE_INITIAL: if (" \t\r\n".indexOf(c)>=0){ continue; } else if (-1==c) { continue; } else { state = STATE_EXPECT_ATTRIBUTENAME; buf.append((char)c); } break; case STATE_EXPECT_ATTRIBUTENAME: if ('='==c) { attributeName = buf.toString(); buf.setLength(0); state = STATE_EXPECT_ATTRIBUTEVALUE; } else if (-1==c) { attributeName = buf.toString(); buf.setLength(0); printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\""); } else if (" \t\r\n".indexOf(c)>=0) { state = STATE_EXPECT_EQUALSIGN; } else { buf.append((char)c); } break; case STATE_EXPECT_EQUALSIGN: if (" \t\r\n".indexOf(c)>=0){ continue; } else if ('='==c) { state = STATE_EXPECT_ATTRIBUTEVALUE; attributeName = buf.toString(); buf.setLength(0); } else { attributeName = buf.toString(); buf.setLength(0); printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\""); newAttributes += " "+attributeName+"=\""+attributeName+"\""; buf.append((char)c); state = STATE_EXPECT_ATTRIBUTENAME; } break; case STATE_EXPECT_ATTRIBUTEVALUE: if (" \t\r\n".indexOf(c)>=0){ continue; } else if ('\"'==c) { state = STATE_DOUBLEQUOTE_ATTRIBUTEVALUE; } else if ('\''==c) { state = STATE_SINGLEQUOTE_ATTRIBUTEVALUE; } else { state = STATE_UNQUOTED_ATTRIBUTEVALUE; buf.append((char)c); } break; case STATE_UNQUOTED_ATTRIBUTEVALUE: if (-1==c || " \t\r\n".indexOf(c)>=0){ state = STATE_INITIAL; newAttributes += " "+attributeName + "=\"" + buf.toString() + "\""; buf.setLength(0); } else { buf.append((char)c); } break; case STATE_SINGLEQUOTE_ATTRIBUTEVALUE: if ('\''==c) { state = STATE_INITIAL; newAttributes += " "+attributeName + "=\"" + buf.toString() + "\""; buf.setLength(0); } else { buf.append((char)c); } break; case STATE_DOUBLEQUOTE_ATTRIBUTEVALUE: if ('\"'==c) { state = STATE_INITIAL; newAttributes += " "+attributeName + "=\"" + buf.toString() + "\""; buf.setLength(0); } else { buf.append((char)c); } break; } } if (!isAtomTag) { // check whether this open tag is equal to the topmost // entry on the stack; if yes, emit a close tag first // corrects stuff like '<tr><td>...<td>...'); if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) { printWarning("Inserting </"+tagName+">"); output.append("</"+tagName+">"); tagStack.pop(); } else { processKnownChildTags(tagName, tagStack, output); } // otherwise, we assume there are no close tags required // before this open tag. tagStack.push(tagName); output.append("<"+tagName+newAttributes+">"); } else { output.append("<"+tagName+newAttributes+"/>"); } } } private boolean processKnownChildTags(String tagName, Stack tagStack, StringBuffer output) { TagInfo tagInfo = (TagInfo)tagInfoMap.get(tagName); if (null != tagInfo) { String parentTag = null; for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) { String tag = (String)en.nextElement(); if (tagInfo.isLegalParentTag(tag)) { parentTag = tag; break; } } if (parentTag != null) { while (((String)tagStack.peek()) != parentTag) { String poppedTagName = (String)tagStack.pop(); output.append("</"+poppedTagName+">"); printWarning("Inserting </"+poppedTagName+">"); } return true; } } return false; } private void flush() { // close all pending tags while (!tagStack.isEmpty()) { String tagName = (String)tagStack.pop(); printWarning("Inserting </"+tagName+">"); output.append("</"+tagName+">"); } } /** * Takes HTML fragment and returns a well-formed XHTML * equivalent. * * In the returned String, all tags are properly closed and * nested. * * Currently, the returned String is not guaranteed to be * well-formed. In particular there are no checks on the tag * names, attribute names and entity names. */ public String getWellformedHTML(String text) { final int STATE_INITIAL = 1; final int STATE_TAG_START = 2; final int STATE_TAG = 3; final int STATE_TAG_DOUBLEQUOTE = 4; final int STATE_TAG_SINGLEQUOTE = 5; final int STATE_AMP = 6; int state = STATE_INITIAL; output.setLength(0); StringBuffer buf = new StringBuffer(); char[] textAsChars = text.toCharArray(); outer_loop: for (int i=0, ilim=textAsChars.length+1; i<ilim; ++i) { int c; if (i<textAsChars.length) { c = textAsChars[i]; } else { c = -1; } switch (state) { case STATE_INITIAL: if ('<'==c) { state = STATE_TAG_START; if (buf.length()>0) { haveText(buf.toString()); buf.setLength(0); } } else if ('>'==c) { // assume this is a greater-than sign buf.append(">"); } else if ('&'==c) { state = STATE_AMP; } else if (-1==c) { if (buf.length()>0) { haveText(buf.toString()); buf.setLength(0); } continue; } else { buf.append((char)c); } break; case STATE_AMP: if ('<'==c) { buf.append("&"); state = STATE_TAG_START; if (buf.length()>0) { haveText(buf.toString()); buf.setLength(0); } } else if ('>'==c) { // assume this is a greater-than sign buf.append("&"); buf.append(">"); state = STATE_INITIAL; } else if ('&'==c) { buf.append("&"); buf.append("&"); state = STATE_INITIAL; } else if (-1==c) { buf.append("&"); haveText(buf.toString()); buf.setLength(0); state = STATE_INITIAL; continue; } else { // peek forward and see whether this is a valid entity. if ('#'==c) { buf.append("&"); buf.append((char)c); state = STATE_INITIAL; continue outer_loop; } else if (Character.isLetter((char)c)) { for (int i2=i+1; i2<ilim-1; i2++) { if (';' == textAsChars[i2]) { buf.append("&"); buf.append((char)c); state = STATE_INITIAL; continue outer_loop; } else if (!Character.isLetter((char)c) && !Character.isDigit((char)c) && ".-_:".indexOf((char)c) < 0 //&& !isCombiningChar(c) // FIXME //&& !isExtender(c) // FIXME ) { break; } } // not a valid entity declaration; assume & } buf.append("&"); buf.append((char)c); state = STATE_INITIAL; } /* else if ('#'==c || Character.isLetter((char)c)) { buf.append("&"); buf.append((char)c); state = STATE_INITIAL; } else { buf.append("&"); buf.append((char)c); state = STATE_INITIAL; } */ break; case STATE_TAG_START: if (" \t\r\n".indexOf(c)>=0) { //continue; // new: assume this is a less-sign haveText("<"+c); state = STATE_INITIAL; } else if ('/'==c) { buf.append((char)c); state = STATE_TAG; } else if ('<'==c) { // assume this is a less-sign haveText("<<"); state = STATE_INITIAL; } else if ('>'==c) { // assume this is a less-sign haveText("<>"); state = STATE_INITIAL; } //else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) { else if (!Character.isLetter((char)c)) { // assume this is a less-sign haveText("<"+(char)c); state = STATE_INITIAL; } else { buf.append((char)c); state = STATE_TAG; } break; case STATE_TAG: if ('\"'==c) { buf.append((char)c); state = STATE_TAG_DOUBLEQUOTE; } else if ('\''==c) { buf.append((char)c); state = STATE_TAG_SINGLEQUOTE; } else if ('>'==c) { state = STATE_INITIAL; haveStartOrEndTag(buf.toString()); buf.setLength(0); } else if ('<'==c) { // notify user, missing greater-than sign haveStartOrEndTag(buf.toString()); buf.setLength(0); } else if (-1==c) { printWarning("Unclosed tag at end-of-comment: <"+buf); haveStartOrEndTag(buf.toString()); buf.setLength(0); } else { buf.append((char)c); } break; case STATE_TAG_DOUBLEQUOTE: if ('\"'==c) { buf.append((char)c); state = STATE_TAG; } else if (-1==c) { printWarning("Unclosed attribute value at end-of-comment."); haveStartOrEndTag(buf.toString()+"\""); } else { buf.append((char)c); } break; case STATE_TAG_SINGLEQUOTE: if ('\''==c) { buf.append((char)c); state = STATE_TAG; } else if (-1==c) { printWarning("Unclosed attribute value at end-of-comment."); haveStartOrEndTag(buf.toString()+"'"); } else { buf.append((char)c); } break; } } return output.toString(); } private String getContext() { if (null != contextClass) { StringBuffer rc = new StringBuffer(); rc.append(contextClass.qualifiedTypeName()); if (null != contextMember) { rc.append("."+contextMember.toString()); } return rc.toString(); } else { return null; } } private void printWarning(String msg) { if (null != warningReporter && !noWarn) { String context = getContext(); if (null != context) { warningReporter.printWarning("In "+getContext()+": "+msg); } else { warningReporter.printWarning("In overview page: "+msg); } } } public String terminateText() { output.setLength(0); flush(); return output.toString(); } }