/* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ package netscape.plugin.composer.io; import java.io.*; /** An HTML lexical stream. Takes a Reader and breaks it * up into lexical tokens. * @see Reader * @see Comment * @see JavaScriptEntity * @see Entity * @see Tag * @see Text * @see Token */ public class LexicalStream { private SlidingBuffer in; private FooStringBuffer buffer; private final static String NEWLINE = new String("\n"); private boolean bHaveClosedStream; /** Create a lexical stream from a unicode string. * @param in the input string. */ public LexicalStream(String in) { this(new CharArrayReader(in.toCharArray())); } /** Create a lexical stream from a Reader. The * stream's close() method will be called automaticly * the first time next() returns null. (i.e. when the * iterator finishes delivering tokens.) * @param in the input stream. */ public LexicalStream(Reader in) { this.in = new SlidingBuffer(in); } private int read() throws IOException { return in.read(); } private boolean lookAhead(char c) throws IOException { return in.lookAhead(c); } private boolean lookAhead(String s) throws IOException { return in.lookAhead(s); } private boolean lookAhead(String s, boolean ignoreCase) throws IOException { return in.lookAhead(s, ignoreCase); } private boolean eatNewline() throws IOException { return in.eatNewline(); } private boolean eatWhiteSpace() throws IOException { return in.eatWhiteSpace(); } /** Return the next token in an HTML input stream. \r\n's are * considered their own token (though we get rid of the \r). * Returns null if the input stream has run out of tokens. * @return the next token in the stream, or null if the stream is * out of tokens. */ public Token next() throws IOException { for (;;) { int c = read(); if (c < 0) break; if (c == '&') { if (buffer != null) { in.unread(1); break; } FooStringBuffer buf = new FooStringBuffer(); /* Don't allow javascript entities outside of parameter values. if (in.lookAhead('{')) { parseJavaScriptEntity(buf); return new JavaScriptEntity(buf); } */ parseEntity(buf); return new Entity(buf); } else if (c == '<') { if (buffer != null) { in.unread(1); break; } if (in.lookAhead('/')) { return parseTag(false); } else if (in.lookAhead('!')) { return parseComment(); } return parseTag(true); } if (c == '\r') { if (buffer != null) { in.unread(1); break; } in.lookAhead('\n'); return new Text(NEWLINE); } if (c == '\n') { if (buffer != null) { in.unread(1); break; } return new Text(NEWLINE); } if (buffer == null) { buffer = new FooStringBuffer(); } buffer.append((char)c); } if (buffer != null) { String rv = buffer.toString(); buffer = null; return new Text(rv); } if ( ! bHaveClosedStream ) { in.close(); bHaveClosedStream = true; } return null; } private boolean isWhitespace(char c){ /* JDK 1.1 return Character.isWhitespace(c); */ return Character.isSpace(c); } private Token parseTag(boolean open) throws IOException { // Capture tag name FooStringBuffer name = new FooStringBuffer(); int c; for (;;) { c = read(); if (c < 0) break; if ((c == '>') || isWhitespace((char)c)) break; name.append((char) c); } if (name.length() == 0) { name.append('<'); if (!open) name.append('/'); if (c >= 0) { name.append((char) c); } return new Text(name.toString()); } Tag tag = new Tag(name.toString(), open); if (c == '>') return tag; // Now process tag attributes for (;;) { c = read(); if ((c < 0) || (c == '>')) break; if (isWhitespace((char)c)) continue; in.unread(1); parseTagAttribute(tag); } return tag; } private void parseTagAttribute(Tag tag) throws IOException { // First get attribute name FooStringBuffer name = new FooStringBuffer(); int c; for (;;) { c = read(); if (c < 0) break; if ((c == '>') || (c == '=')) { in.unread(1); break; } if (isWhitespace((char)c)) { break; } name.append((char) c); } if (name.length() == 0) { return; } // Allow for whitespace between the attribute name and value eatWhiteSpace(); c = read(); FooStringBuffer value = null; if (c != '=') { // No attribute value follows the attribute name in.unread(1); } else { // Allow for whitespace between the '=' and the attribute value eatWhiteSpace(); // Possibly an attribute value follows the attribute name c = read(); if (c < 0) { // No attribute value follows the attribute name } else if (c == '>') { // No attribute value follows the attribute name. This // is a syntax error within the tag in.unread(1); } else { // Grab attribute value if ((c == '\'') || (c == '"')) { value = parseQuotedString(c); } else { value = new FooStringBuffer(); value.append((char) c); for (;;) { c = read(); if (c < 0) break; if (c == '>') { in.unread(1); break; } if (isWhitespace((char)c)) break; // XXX allow for concatenated quotes? value.append((char) c); } } } } tag.addAttribute(name.toString(), (value!=null) ? value.toString() : null); } private FooStringBuffer parseQuotedString(int stop) throws IOException { FooStringBuffer out = new FooStringBuffer(); for (;;) { int c = read(); if (c < 0) { break; } if (c == '&') { // Entities can be embedded in html quoted strings; they will be // reparsed later when the attribute value is evaluated if (in.peek() == '{') { read(); parseJavaScriptEntity(out); } else { parseEntity(out); } } else { if (c == stop) { break; } out.append((char) c); } } return out; } /* Process an HTML comment */ private Comment parseComment() throws IOException { FooStringBuffer out = new FooStringBuffer(); boolean fancyTerminator = false; if (in.lookAhead('-')) { if (in.lookAhead('-')) { // This comment started with "" fancyTerminator = true; } else { out.append('-'); } } // Gobble up data that lives in the comment until we find the // comment terminator (which is either ">" or "-->") for (;;) { int c = read(); if (c < 0) { break; } if (fancyTerminator) { if (c == '-') { if (in.lookAhead('-')) { if (in.lookAhead('>')) { break; } else { out.append("--"); } } else { // the minus sign will be put out by the out.append((char) c); below. } } } else if (c == '>') { break; } out.append((char)c); } return fancyTerminator ? new Comment("--" + out + "--") : new Comment(out); } /* Process an HTML entity */ private void parseEntity(FooStringBuffer out) throws IOException { for (;;) { int c = read(); if (c < 0) { break; } if (c == ';') { break; } // Ending an entity with a space is a Netscape-ism we support if (isWhitespace((char)c)) { in.unread(1); break; } out.append((char)c); } } /* Process an HTML script entity */ private void parseJavaScriptEntity(FooStringBuffer out) throws IOException { int count = 1; for (;;) { int c = read(); if (c < 0) break; if ((c == '\'') || (c == '"')) { parseJavaScriptQuotedString(out, c); } else if (c == '{') { out.append((char) c); count++; } else if (c == '}') { if (--count == 0) { in.lookAhead(';'); // eat trailing ";" that we don't care about return; } out.append((char) c); } else if (c == '/') { c = read(); if (c < 0) break; if (c == '*') { parseCComment(out); } else if (c == '/') { parseEOLComment(out); } else { out.append('/'); out.append((char) c); } } else { out.append((char) c); } } } private void parseJavaScriptQuotedString(FooStringBuffer out, int stop) throws IOException { out.append((char) stop); for (;;) { int c = read(); if (c < 0) { break; } out.append((char) c); if (c == '\\') { c = read(); if (c < 0) { break; } out.append((char) c); continue; } if (c == stop) { break; } } } private void parseCComment(FooStringBuffer out) throws IOException { out.append("/*"); for (;;) { int c = read(); if (c < 0) { break; } out.append((char) c); if (c == '*') { c = read(); if (c < 0) { break; } out.append((char) c); if (c == '/') { break; } } } } private void parseEOLComment(FooStringBuffer out) throws IOException { out.append("//"); for (;;) { int c = read(); if (c < 0) { break; } out.append((char) c); if ((c == '\n') || (c == '\r')) { out.append((char) c); break; } } } }