Mozilla/mozilla/modules/edtplug/classes/netscape/plugin/composer/io/LexicalStream.java

/* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

package netscape.plugin.composer.io;

import java.io.*;

/** An HTML lexical stream. Takes a Reader and breaks it
 * up into lexical tokens.
 * @see Reader
 * @see Comment
 * @see JavaScriptEntity
 * @see Entity
 * @see Tag
 * @see Text
 * @see Token
 */

public class LexicalStream {
  private SlidingBuffer in;
  private FooStringBuffer buffer;
  private final static String NEWLINE = new String("\n");
  private boolean bHaveClosedStream;

  /** Create a lexical stream from a unicode string.
   * @param in the input string.
   */

  public LexicalStream(String in) {
    this(new CharArrayReader(in.toCharArray()));
  }

  /** Create a lexical stream from a Reader. The
   * stream's close() method will be called automaticly
   * the first time next() returns null. (i.e. when the
   * iterator finishes delivering tokens.)
   * @param in the input stream.
   */

  public LexicalStream(Reader in) {
    this.in = new SlidingBuffer(in);
  }

  private int read() throws IOException {
    return in.read();
  }

  private boolean lookAhead(char c) throws IOException {
    return in.lookAhead(c);
  }

  private boolean lookAhead(String s) throws IOException {
    return in.lookAhead(s);
  }

  private boolean lookAhead(String s, boolean ignoreCase) throws IOException {
    return in.lookAhead(s, ignoreCase);
  }

  private boolean eatNewline() throws IOException {
    return in.eatNewline();
  }

  private boolean eatWhiteSpace() throws IOException {
    return in.eatWhiteSpace();
  }

  /** Return the next token in an HTML input stream. \r\n's are
   * considered their own token (though we get rid of the \r).
   * Returns null if the input stream has run out of tokens.
   * @return the next token in the stream, or null if the stream is
   * out of tokens.
   */
  public Token next() throws IOException {
    for (;;) {
      int c = read();
      if (c < 0) break;
      if (c == '&') {
        if (buffer != null) {
          in.unread(1);
          break;
        }
    FooStringBuffer buf = new FooStringBuffer();
    /* Don't allow javascript entities outside of parameter values.
     if (in.lookAhead('{')) {
      parseJavaScriptEntity(buf);
      return new JavaScriptEntity(buf);
    }
    */
    parseEntity(buf);
    return new Entity(buf);
      } else if (c == '<') {
    if (buffer != null) {
      in.unread(1);
      break;
    }
    if (in.lookAhead('/')) {
      return parseTag(false);
    } else if (in.lookAhead('!')) {
      return parseComment();
    }
    return parseTag(true);
      }
      if (c == '\r') {
    if (buffer != null) {
      in.unread(1);
      break;
    }
    in.lookAhead('\n');
    return new Text(NEWLINE);
      }
      if (c == '\n') {
    if (buffer != null) {
      in.unread(1);
      break;
    }
    return new Text(NEWLINE);
      }
      if (buffer == null) {
    buffer = new FooStringBuffer();
      }
      buffer.append((char)c);
    }
    if (buffer != null) {
      String rv = buffer.toString();
      buffer = null;
      return new Text(rv);
    }
    if ( ! bHaveClosedStream ) {
        in.close();
        bHaveClosedStream = true;
    }
    return null;
  }

  private boolean isWhitespace(char c){
    /* JDK 1.1 return Character.isWhitespace(c); */
    return Character.isSpace(c);
  }
  private Token parseTag(boolean open) throws IOException {
    // Capture tag name
    FooStringBuffer name = new FooStringBuffer();
    int c;
    for (;;) {
      c = read();
      if (c < 0) break;
      if ((c == '>') || isWhitespace((char)c)) break;
      name.append((char) c);
    }
    if (name.length() == 0) {
      name.append('<');
      if (!open) name.append('/');
      if (c >= 0) {
    name.append((char) c);
      }
      return new Text(name.toString());
    }
    Tag tag = new Tag(name.toString(), open);
    if (c == '>') return tag;

    // Now process tag attributes
    for (;;) {
      c = read();
      if ((c < 0) || (c == '>')) break;
      if (isWhitespace((char)c)) continue;
      in.unread(1);
      parseTagAttribute(tag);
    }
    return tag;
  }

  private void parseTagAttribute(Tag tag) throws IOException {
    // First get attribute name
    FooStringBuffer name = new FooStringBuffer();
    int c;
    for (;;) {
      c = read();
      if (c < 0) break;
      if ((c == '>') || (c == '=')) {
    in.unread(1);
    break;
      }
      if (isWhitespace((char)c)) {
    break;
      }
      name.append((char) c);
    }
    if (name.length() == 0) {
      return;
    }

    // Allow for whitespace between the attribute name and value
    eatWhiteSpace();
    c = read();

    FooStringBuffer value = null;
    if (c != '=') {
      // No attribute value follows the attribute name
      in.unread(1);
    } else {
      // Allow for whitespace between the '=' and the attribute value
      eatWhiteSpace();

      // Possibly an attribute value follows the attribute name
      c = read();
      if (c < 0) {
    // No attribute value follows the attribute name
      } else if (c == '>') {
    // No attribute value follows the attribute name. This
    // is a syntax error within the tag
    in.unread(1);
      } else {
    // Grab attribute value
    if ((c == '\'') || (c == '"')) {
      value = parseQuotedString(c);
    } else {
      value = new FooStringBuffer();
      value.append((char) c);
      for (;;) {
        c = read();
        if (c < 0) break;
        if (c == '>') {
          in.unread(1);
          break;
        }
        if (isWhitespace((char)c)) break;
        // XXX allow for concatenated quotes?
        value.append((char) c);
      }
    }
      }
    }
    tag.addAttribute(name.toString(), (value!=null) ? value.toString() : null);
  }

  private FooStringBuffer parseQuotedString(int stop) throws IOException {
    FooStringBuffer out = new FooStringBuffer();
    for (;;) {
      int c = read();
      if (c < 0) {
    break;
      }
      if (c == '&') {
    // Entities can be embedded in html quoted strings; they will be
    // reparsed later when the attribute value is evaluated
    if (in.peek() == '{') {
      read();
      parseJavaScriptEntity(out);
    } else {
      parseEntity(out);
    }
      } else {
    if (c == stop) {
      break;
    }
    out.append((char) c);
      }
    }
    return out;
  }

  /* Process an HTML comment */
  private Comment parseComment() throws IOException {
    FooStringBuffer out = new FooStringBuffer();
    boolean fancyTerminator = false;
    if (in.lookAhead('-')) {
      if (in.lookAhead('-')) {
    // This comment started with "<!--"; therefore we will look for
    // its terminator which is "-->"
    fancyTerminator = true;
      } else {
    out.append('-');
      }
    }

    // Gobble up data that lives in the comment until we find the
    // comment terminator (which is either ">" or "-->")
    for (;;) {
      int c = read();
      if (c < 0) {
        break;
      }
      if (fancyTerminator) {
        if (c == '-') {
          if (in.lookAhead('-')) {
            if (in.lookAhead('>')) {
              break;
            } else {
              out.append("--");
            }
          } else {
            // the minus sign will be put out by the out.append((char) c); below.
          }
        }
      } else if (c == '>') {
        break;
      }
      out.append((char)c);
    }
    return fancyTerminator ? new Comment("--" + out + "--") : new Comment(out);
  }

  /* Process an HTML entity */
  private void parseEntity(FooStringBuffer out) throws IOException {
    for (;;) {
      int c = read();
      if (c < 0) {
    break;
      }
      if (c == ';') {
    break;
      }
      // Ending an entity with a space is a Netscape-ism we support
      if (isWhitespace((char)c)) {
    in.unread(1);
    break;
      }
      out.append((char)c);
    }
  }

  /* Process an HTML script entity */
  private void parseJavaScriptEntity(FooStringBuffer out) throws IOException {
    int count = 1;
    for (;;) {
      int c = read();
      if (c < 0) break;
      if ((c == '\'') || (c == '"')) {
    parseJavaScriptQuotedString(out, c);
      } else if (c == '{') {
    out.append((char) c);
    count++;
      } else if (c == '}') {
    if (--count == 0) {
      in.lookAhead(';'); // eat trailing ";" that we don't care about
      return;
    }
    out.append((char) c);
      } else if (c == '/') {
    c = read();
    if (c < 0) break;
    if (c == '*') {
      parseCComment(out);
    } else if (c == '/') {
      parseEOLComment(out);
    } else {
      out.append('/');
      out.append((char) c);
    }
      } else {
    out.append((char) c);
      }
    }
  }

  private void parseJavaScriptQuotedString(FooStringBuffer out, int stop) throws IOException {
    out.append((char) stop);
    for (;;) {
      int c = read();
      if (c < 0) {
    break;
      }
      out.append((char) c);
      if (c == '\\') {
    c = read();
    if (c < 0) {
      break;
    }
    out.append((char) c);
    continue;
      }
      if (c == stop) {
    break;
      }
    }
  }

  private void parseCComment(FooStringBuffer out) throws IOException {
    out.append("/*");
    for (;;) {
      int c = read();
      if (c < 0) {
    break;
      }
      out.append((char) c);
      if (c == '*') {
    c = read();
    if (c < 0) {
      break;
    }
    out.append((char) c);
    if (c == '/') {
      break;
    }
      }
    }
  }

  private void parseEOLComment(FooStringBuffer out) throws IOException {
    out.append("//");
    for (;;) {
      int c = read();
      if (c < 0) {
    break;
      }
      out.append((char) c);
      if ((c == '\n') || (c == '\r')) {
    out.append((char) c);
    break;
      }
    }
  }
}