/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */
 
/**
 * MODULE NOTES:
 * @update  gess 4/1/98
 * 
 *  This class does two primary jobs:
 *    1) It iterates the tokens provided during the 
 *       tokenization process, identifing where elements
 *       begin and end (doing validation and normalization).
 *    2) It controls and coordinates with an instance of
 *       the IContentSink interface, to coordinate the
 *       the production of the content model.
 *
 *  The basic operation of this class assumes that an HTML
 *  document is non-normalized. Therefore, we don't process
 *  the document in a normalized way. Don't bother to look
 *  for methods like: doHead() or doBody().
 *
 *  Instead, in order to be backward compatible, we must
 *  scan the set of tokens and perform this basic set of
 *  operations:
 *    1)  Determine the token type (easy, since the tokens know)
 *    2)  Determine the appropriate section of the HTML document
 *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
 *    3)  Insert content into our document (via the sink) into
 *        the correct section.
 *    4)  In the case of tags that belong in the BODY, we must
 *        ensure that our underlying document state reflects
 *        the appropriate context for our tag. 
 *
 *        For example,if we see a <TR>, we must ensure our 
 *        document contains a table into which the row can
 *        be placed. This may result in "implicit containers" 
 *        created to ensure a well-formed document.
 *         
 */

#ifndef NS_PARSER__
#define NS_PARSER__

#include "nsIParser.h"
#include "nsDeque.h"
#include "nsParserNode.h"
#include "nsParserTypes.h"
#include "nsIURL.h"
#include "CParserContext.h"

#define NS_PARSER_IID      \
  {0x2ce606b0, 0xbee6,  0x11d1,  \
  {0xaa, 0xd9, 0x00,    0x80, 0x5f, 0x8a, 0x3e, 0x14}}


class IContentSink;
class nsIHTMLContentSink;
class nsIDTD;
class nsIDTDDebug;
class CScanner;
class nsIParserFilter;
class fstream;

class nsParser : public nsIParser, public nsIStreamListener {
            
  public:
friend class CTokenHandler;

    NS_DECL_ISUPPORTS


    /**
     * default constructor
     * @update	gess5/11/98
     */
    nsParser();


    /**
     * Destructor
     * @update	gess5/11/98
     */
    virtual ~nsParser();

    /**
     * Select given content sink into parser for parser output
     * @update	gess5/11/98
     * @param   aSink is the new sink to be used by parser
     * @return  old sink, or NULL
     */
    virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
    
    virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
    
    virtual void RegisterDTD(nsIDTD* aDTD);

    /**
     *  
     *  
     *  @update  gess 6/9/98
     *  @param   
     *  @return  
     */
    virtual CScanner* GetScanner(void);

    /**
     * Cause parser to parse input from given URL 
     * @update	gess5/11/98
     * @param   aURL is a descriptor for source document
     * @param   aListener is a listener to forward notifications to
     * @return  TRUE if all went well -- FALSE otherwise
     */
    virtual PRInt32 Parse(nsIURL* aURL,
                          nsIStreamObserver* aListener, 
                          nsIDTDDebug* aDTDDebug = 0);

    /**
     * Cause parser to parse input from given nsIInputStream 
     * @update	gess5/11/98
     * @param   pIStream is an nsIInputStream
     * @param   aListener is a listener to forward notifications to
     * @return  TRUE if all went well -- FALSE otherwise
     */
    virtual PRInt32 Parse(nsIInputStream* pIStream,
                          nsIStreamObserver* aListener, 
                          nsIDTDDebug* aDTDDebug = 0);

    /**
     * Cause parser to parse input from given file in given mode
     * @update	gess5/11/98
     * @param   aFilename is a path for file document
     * @return  TRUE if all went well -- FALSE otherwise
     */
    virtual PRInt32 Parse(nsString& aFilename);

    /**
     * Cause parser to parse input from given stream 
     * @update	gess5/11/98
     * @param   aStream is the i/o source
     * @return  TRUE if all went well -- FALSE otherwise
     */
    virtual PRInt32 Parse(fstream& aStream);

    /**
     * @update	gess5/11/98
     * @param   anHTMLString contains a string-full of real HTML
     * @param   appendTokens tells us whether we should insert tokens inline, or append them.
     * @return  TRUE if all went well -- FALSE otherwise
     */
    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);

    /**
     * This method gets called (automatically) during incremental parsing
     * @update	gess5/11/98
     * @return  TRUE if all went well, otherwise FALSE
     */
    virtual PRInt32 ResumeParse();

    /**
     * Causes the parser to scan foward, collecting nearby (sequential)
     * attribute tokens into the given node.
     * @update	gess5/11/98
     * @param   node to store attributes
     * @return  number of attributes added to node.
     */
    virtual PRInt32 CollectAttributes(nsCParserNode& aNode,PRInt32 aCount);

    /**
     * Causes the next skipped-content token (if any) to
     * be consumed by this node.
     * @update	gess5/11/98
     * @param   node to consume skipped-content
     * @param   holds the number of skipped content elements encountered
     * @return  Error condition.
     */
    virtual PRInt32 CollectSkippedContent(nsCParserNode& aNode,PRInt32& aCount);

    /**
     *  This debug routine is used to cause the tokenizer to
     *  iterate its token list, asking each token to dump its
     *  contents to the given output stream.
     *  
     *  @update  gess 3/25/98
     *  @param   
     *  @return  
     */
    void DebugDumpSource(ostream& out);


     //*********************************************
      // These methods are callback methods used by
      // net lib to let us know about our inputstream.
      //*********************************************
    NS_IMETHOD GetBindInfo(nsIURL* aURL);
    NS_IMETHOD OnProgress(nsIURL* aURL, PRInt32 Progress, PRInt32 ProgressMax, const nsString& aMmsg);
    NS_IMETHOD OnStartBinding(nsIURL* aURL, const char *aContentType);
    NS_IMETHOD OnDataAvailable(nsIURL* aURL, nsIInputStream *pIStream, PRInt32 length);
    NS_IMETHOD OnStopBinding(nsIURL* aURL, PRInt32 status, const nsString& aMsg);

protected:

    /**
     * 
     * @update	gess5/18/98
     * @param 
     * @return
     */
    PRInt32 WillBuildModel(nsString& aFilename);

    /**
     * 
     * @update	gess5/18/98
     * @param 
     * @return
     */
    PRInt32 DidBuildModel(PRInt32 anErrorCode);

    /**
     * This method gets called when the tokens have been consumed, and it's time
     * to build the model via the content sink.
     * @update	gess5/11/98
     * @return  YES if model building went well -- NO otherwise.
     */
    virtual PRInt32 BuildModel(void);
  
private:

    /*******************************************
      These are the tokenization methods...
     *******************************************/

    /**
     *  Cause the tokenizer to consume the next token, and 
     *  return an error result.
     *  
     *  @update  gess 3/25/98
     *  @param   anError -- ref to error code
     *  @return  new token or null
     */
    virtual PRInt32 ConsumeToken(CToken*& aToken);

    /**
     *  Part of the code sandwich, this gets called right before
     *  the tokenization process begins. The main reason for
     *  this call is to allow the delegate to do initialization.
     *  
     *  @update  gess 3/25/98
     *  @param   
     *  @return  TRUE if it's ok to proceed
     */
    PRBool WillTokenize();

   
    /**
     *  This is the primary control routine. It iteratively
     *  consumes tokens until an error occurs or you run out
     *  of data.
     *  
     *  @update  gess 3/25/98
     *  @return  error code 
     */
    PRInt32 Tokenize();

    /**
     *  This is the tail-end of the code sandwich for the
     *  tokenization process. It gets called once tokenziation
     *  has completed.
     *  
     *  @update  gess 3/25/98
     *  @param   
     *  @return  TRUE if all went well
     */
    PRBool DidTokenize();

    /**
     *  This debug routine is used to cause the tokenizer to
     *  iterate its token list, asking each token to dump its
     *  contents to the given output stream.
     *  
     *  @update  gess 3/25/98
     *  @param   
     *  @return  
     */
    void DebugDumpTokens(ostream& out);

    
    /**
     * This method is used as a backstop to compute the kind of content
     * that is contained in the scanner stream. This method is important
     * because it allows us to defer the resolution of our DTD (and hence)
     * filters and maybe eventually sinks based on the input type.
     *
     * @update	gess6/22/98
     * @param 
     * @return  TRUE if we figured it out.
     */
    eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType);


protected:
    //*********************************************
    // And now, some data members...
    //*********************************************

  /*****************************************************
    All of these moved into the parse-context object:

    PRInt32             mMajorIteration;
    PRInt32             mMinorIteration;

    nsIURL*             mURL;
    nsString            mSourceType;
    nsString            mTargetType;
    eAutoDetectResult   mAutoDetectStatus;

    nsDequeIterator*    mCurrentPos;
    nsDequeIterator*    mMarkPos;
    nsDeque             mTokenDeque;
    CScanner*           mScanner;
    nsIDTD*             mDTD;

    eParseMode          mParseMode;
    char*               mTransferBuffer;
   *****************************************************/

    CParserContext*     mParserContext;

  /*****************************************************
    The above fields are moving into parse-context 
   *****************************************************/


    nsIStreamObserver*  mObserver;
    nsIContentSink*     mSink;
    nsIParserFilter*    mParserFilter;


	  nsIDTDDebug*		    mDTDDebug;


};


#endif