diff --git a/mozilla/htmlparser/src/nsDTDUtils.cpp b/mozilla/htmlparser/src/nsDTDUtils.cpp index 39fc1181a76..eb623d5faf7 100644 --- a/mozilla/htmlparser/src/nsDTDUtils.cpp +++ b/mozilla/htmlparser/src/nsDTDUtils.cpp @@ -236,7 +236,7 @@ void CTokenRecycler::RecycleToken(CToken* aToken) { /** * - * @update gess8/4/98 + * @update vidur 11/12/98 * @param * @return */ @@ -262,6 +262,7 @@ CToken* CTokenRecycler::CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag, case eToken_style: result=new CStyleToken(); break; case eToken_skippedcontent: result=new CSkippedContentToken(aString); break; case eToken_instruction:result=new CInstructionToken(); break; + case eToken_cdatasection:result=new CCDATASectionToken(); break; default: break; } diff --git a/mozilla/htmlparser/src/nsHTMLTokens.cpp b/mozilla/htmlparser/src/nsHTMLTokens.cpp index 2e9812f30a9..5aebaf01759 100644 --- a/mozilla/htmlparser/src/nsHTMLTokens.cpp +++ b/mozilla/htmlparser/src/nsHTMLTokens.cpp @@ -456,6 +456,110 @@ nsresult CTextToken::Consume(PRUnichar aChar, CScanner& aScanner) { return result; } +/* + * default constructor + * + * @update vidur 11/12/98 + * @param aName -- string to init token name with + * @return + */ +CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) { +} + + +/* + * string based constructor + * + * @update vidur 11/12/98 + * @param aName -- string to init token name with + * @return + */ +CCDATASectionToken::CCDATASectionToken(const nsString& aName) : CHTMLToken(aName) { + mTypeID=eHTMLTag_unknown; +} + +/* + * + * + * @update vidur 11/12/98 + * @param + * @return + */ +const char* CCDATASectionToken::GetClassName(void) { + return "cdatasection"; +} + +/* + * + * + * @update vidur 11/12/98 + * @param + * @return + */ +PRInt32 CCDATASectionToken::GetTokenType(void) { + return eToken_cdatasection; +} + +/* + * Consume as much marked test from scanner as possible. + * + * @update vidur 11/12/98 + * @param aChar -- last char consumed from stream + * @param aScanner -- controller of underlying input source + * @return error result + */ +nsresult CCDATASectionToken::Consume(PRUnichar aChar, CScanner& aScanner) { + static nsAutoString terminals("]\r"); + nsresult result=NS_OK; + PRBool done=PR_FALSE; + + while((NS_OK==result) && (!done)) { + result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE); + if(NS_OK==result) { + result=aScanner.Peek(aChar); + if(kCR==aChar) { + result=aScanner.GetChar(aChar); //strip off the \r + result=aScanner.Peek(aChar); //then see what's next. + switch(aChar) { + case kCR: + result=aScanner.GetChar(aChar); //strip off the \r + mTextValue.Append("\n\n"); + break; + case kNewLine: + //which means we saw \r\n, which becomes \n + result=aScanner.GetChar(aChar); //strip off the \n + //now fall through on purpose... + default: + mTextValue.Append("\n"); + break; + } + } + else if (kRightSquareBracket==aChar) { + result=aScanner.GetChar(aChar); //strip off the ] + result=aScanner.Peek(aChar); //then see what's next. + if (kRightSquareBracket==aChar) { + result=aScanner.GetChar(aChar); //strip off the second ] + result=aScanner.Peek(aChar); //then see what's next. + if (kGreaterThan==aChar) { + result=aScanner.GetChar(aChar); //strip off the > + done=PR_TRUE; + } + else { + // This isn't the end of the CDATA section so go on + mTextValue.Append("]"); + } + } + else { + // This isn't the end of the CDATA section so go on + mTextValue.Append("]"); + } + } + else done=PR_TRUE; + } + } + return result; +} + /* * default constructor * diff --git a/mozilla/htmlparser/src/nsHTMLTokens.h b/mozilla/htmlparser/src/nsHTMLTokens.h index 3f3a4aff0d1..34c7105b0c1 100644 --- a/mozilla/htmlparser/src/nsHTMLTokens.h +++ b/mozilla/htmlparser/src/nsHTMLTokens.h @@ -54,6 +54,7 @@ enum eHTMLTokenTypes { eToken_start=1, eToken_end, eToken_comment, eToken_entity, eToken_whitespace, eToken_newline, eToken_text, eToken_attribute, eToken_script, eToken_style, eToken_skippedcontent, eToken_instruction, + eToken_cdatasection, eToken_last //make sure this stays the last token... }; @@ -79,7 +80,6 @@ public: protected: }; - /** * This declares start tokens, which always take the form . * This class also knows how to consume related attributes. @@ -207,6 +207,23 @@ class CTextToken: public CHTMLToken { }; +/** + * CDATASection tokens contain raw unescaped text content delimited by + * a ![CDATA[ and ]]. + * XXX Not really a HTML construct - maybe we need a separation + * + * @update vidur 11/12/98 + */ +class CCDATASectionToken : public CHTMLToken { +public: + CCDATASectionToken(); + CCDATASectionToken(const nsString& aString); + virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner); + virtual const char* GetClassName(void); + virtual PRInt32 GetTokenType(void); +}; + + /** * Attribute tokens are used to contain attribute key/value * pairs whereever they may occur. Typically, they should diff --git a/mozilla/htmlparser/src/nsParserTypes.h b/mozilla/htmlparser/src/nsParserTypes.h index bbfe9c941a2..090aa7268ac 100644 --- a/mozilla/htmlparser/src/nsParserTypes.h +++ b/mozilla/htmlparser/src/nsParserTypes.h @@ -73,7 +73,8 @@ const PRUint32 kRightParen = ')'; const PRUint32 kLeftBrace = '{'; const PRUint32 kRightBrace = '}'; const PRUint32 kQuestionMark = '?'; - +const PRUint32 kLeftSquareBracket = '['; +const PRUint32 kRightSquareBracket = ']'; #endif diff --git a/mozilla/htmlparser/src/nsWellFormedDTD.cpp b/mozilla/htmlparser/src/nsWellFormedDTD.cpp index 6508a2d77d9..0c21e9d0fdc 100644 --- a/mozilla/htmlparser/src/nsWellFormedDTD.cpp +++ b/mozilla/htmlparser/src/nsWellFormedDTD.cpp @@ -473,6 +473,74 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeComment(PRUnichar aChar,CScanner& aScanner, return result; } +/* + * Consume characters as long as they match the string passed in. + * If they don't match, put them all back. + * XXX The scanner should be able to do this. + * + * @update vidur 11/12/98 + */ +static nsresult +ConsumeConditional(CScanner& aScanner, + const nsString& aMatchString, + PRBool& aMatch) +{ + nsresult result=NS_OK; + PRUnichar matchChar; + + PRInt32 i, count = aMatchString.Length(); + for (i=0; i < count; i++) { + result = aScanner.GetChar(matchChar); + if ((NS_OK != result) || (aMatchString.CharAt(i) != matchChar)) { + break; + } + } + + if (NS_OK == result) { + if (i != count) { + for (; i >= 0; i--) { + aScanner.PutBack(aMatchString.CharAt(i)); + } + aMatch = PR_FALSE; + } + else { + aMatch = PR_TRUE; + } + } + + return result; +} + +/** + * This method is called when we see a "AddLeaf(theNode); break; diff --git a/mozilla/htmlparser/src/nsWellFormedDTD.h b/mozilla/htmlparser/src/nsWellFormedDTD.h index 2e7d2e84920..9e8504f2921 100644 --- a/mozilla/htmlparser/src/nsWellFormedDTD.h +++ b/mozilla/htmlparser/src/nsWellFormedDTD.h @@ -225,6 +225,7 @@ protected: NS_IMETHODIMP ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); + NS_IMETHODIMP ConsumeEscapedContent(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken); diff --git a/mozilla/parser/htmlparser/src/nsDTDUtils.cpp b/mozilla/parser/htmlparser/src/nsDTDUtils.cpp index 39fc1181a76..eb623d5faf7 100644 --- a/mozilla/parser/htmlparser/src/nsDTDUtils.cpp +++ b/mozilla/parser/htmlparser/src/nsDTDUtils.cpp @@ -236,7 +236,7 @@ void CTokenRecycler::RecycleToken(CToken* aToken) { /** * - * @update gess8/4/98 + * @update vidur 11/12/98 * @param * @return */ @@ -262,6 +262,7 @@ CToken* CTokenRecycler::CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag, case eToken_style: result=new CStyleToken(); break; case eToken_skippedcontent: result=new CSkippedContentToken(aString); break; case eToken_instruction:result=new CInstructionToken(); break; + case eToken_cdatasection:result=new CCDATASectionToken(); break; default: break; } diff --git a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp index 2e9812f30a9..5aebaf01759 100644 --- a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp @@ -456,6 +456,110 @@ nsresult CTextToken::Consume(PRUnichar aChar, CScanner& aScanner) { return result; } +/* + * default constructor + * + * @update vidur 11/12/98 + * @param aName -- string to init token name with + * @return + */ +CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) { +} + + +/* + * string based constructor + * + * @update vidur 11/12/98 + * @param aName -- string to init token name with + * @return + */ +CCDATASectionToken::CCDATASectionToken(const nsString& aName) : CHTMLToken(aName) { + mTypeID=eHTMLTag_unknown; +} + +/* + * + * + * @update vidur 11/12/98 + * @param + * @return + */ +const char* CCDATASectionToken::GetClassName(void) { + return "cdatasection"; +} + +/* + * + * + * @update vidur 11/12/98 + * @param + * @return + */ +PRInt32 CCDATASectionToken::GetTokenType(void) { + return eToken_cdatasection; +} + +/* + * Consume as much marked test from scanner as possible. + * + * @update vidur 11/12/98 + * @param aChar -- last char consumed from stream + * @param aScanner -- controller of underlying input source + * @return error result + */ +nsresult CCDATASectionToken::Consume(PRUnichar aChar, CScanner& aScanner) { + static nsAutoString terminals("]\r"); + nsresult result=NS_OK; + PRBool done=PR_FALSE; + + while((NS_OK==result) && (!done)) { + result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE); + if(NS_OK==result) { + result=aScanner.Peek(aChar); + if(kCR==aChar) { + result=aScanner.GetChar(aChar); //strip off the \r + result=aScanner.Peek(aChar); //then see what's next. + switch(aChar) { + case kCR: + result=aScanner.GetChar(aChar); //strip off the \r + mTextValue.Append("\n\n"); + break; + case kNewLine: + //which means we saw \r\n, which becomes \n + result=aScanner.GetChar(aChar); //strip off the \n + //now fall through on purpose... + default: + mTextValue.Append("\n"); + break; + } + } + else if (kRightSquareBracket==aChar) { + result=aScanner.GetChar(aChar); //strip off the ] + result=aScanner.Peek(aChar); //then see what's next. + if (kRightSquareBracket==aChar) { + result=aScanner.GetChar(aChar); //strip off the second ] + result=aScanner.Peek(aChar); //then see what's next. + if (kGreaterThan==aChar) { + result=aScanner.GetChar(aChar); //strip off the > + done=PR_TRUE; + } + else { + // This isn't the end of the CDATA section so go on + mTextValue.Append("]"); + } + } + else { + // This isn't the end of the CDATA section so go on + mTextValue.Append("]"); + } + } + else done=PR_TRUE; + } + } + return result; +} + /* * default constructor * diff --git a/mozilla/parser/htmlparser/src/nsHTMLTokens.h b/mozilla/parser/htmlparser/src/nsHTMLTokens.h index 3f3a4aff0d1..34c7105b0c1 100644 --- a/mozilla/parser/htmlparser/src/nsHTMLTokens.h +++ b/mozilla/parser/htmlparser/src/nsHTMLTokens.h @@ -54,6 +54,7 @@ enum eHTMLTokenTypes { eToken_start=1, eToken_end, eToken_comment, eToken_entity, eToken_whitespace, eToken_newline, eToken_text, eToken_attribute, eToken_script, eToken_style, eToken_skippedcontent, eToken_instruction, + eToken_cdatasection, eToken_last //make sure this stays the last token... }; @@ -79,7 +80,6 @@ public: protected: }; - /** * This declares start tokens, which always take the form . * This class also knows how to consume related attributes. @@ -207,6 +207,23 @@ class CTextToken: public CHTMLToken { }; +/** + * CDATASection tokens contain raw unescaped text content delimited by + * a ![CDATA[ and ]]. + * XXX Not really a HTML construct - maybe we need a separation + * + * @update vidur 11/12/98 + */ +class CCDATASectionToken : public CHTMLToken { +public: + CCDATASectionToken(); + CCDATASectionToken(const nsString& aString); + virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner); + virtual const char* GetClassName(void); + virtual PRInt32 GetTokenType(void); +}; + + /** * Attribute tokens are used to contain attribute key/value * pairs whereever they may occur. Typically, they should diff --git a/mozilla/parser/htmlparser/src/nsParserTypes.h b/mozilla/parser/htmlparser/src/nsParserTypes.h index bbfe9c941a2..090aa7268ac 100644 --- a/mozilla/parser/htmlparser/src/nsParserTypes.h +++ b/mozilla/parser/htmlparser/src/nsParserTypes.h @@ -73,7 +73,8 @@ const PRUint32 kRightParen = ')'; const PRUint32 kLeftBrace = '{'; const PRUint32 kRightBrace = '}'; const PRUint32 kQuestionMark = '?'; - +const PRUint32 kLeftSquareBracket = '['; +const PRUint32 kRightSquareBracket = ']'; #endif diff --git a/mozilla/parser/htmlparser/src/nsWellFormedDTD.cpp b/mozilla/parser/htmlparser/src/nsWellFormedDTD.cpp index 6508a2d77d9..0c21e9d0fdc 100644 --- a/mozilla/parser/htmlparser/src/nsWellFormedDTD.cpp +++ b/mozilla/parser/htmlparser/src/nsWellFormedDTD.cpp @@ -473,6 +473,74 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeComment(PRUnichar aChar,CScanner& aScanner, return result; } +/* + * Consume characters as long as they match the string passed in. + * If they don't match, put them all back. + * XXX The scanner should be able to do this. + * + * @update vidur 11/12/98 + */ +static nsresult +ConsumeConditional(CScanner& aScanner, + const nsString& aMatchString, + PRBool& aMatch) +{ + nsresult result=NS_OK; + PRUnichar matchChar; + + PRInt32 i, count = aMatchString.Length(); + for (i=0; i < count; i++) { + result = aScanner.GetChar(matchChar); + if ((NS_OK != result) || (aMatchString.CharAt(i) != matchChar)) { + break; + } + } + + if (NS_OK == result) { + if (i != count) { + for (; i >= 0; i--) { + aScanner.PutBack(aMatchString.CharAt(i)); + } + aMatch = PR_FALSE; + } + else { + aMatch = PR_TRUE; + } + } + + return result; +} + +/** + * This method is called when we see a "AddLeaf(theNode); break; diff --git a/mozilla/parser/htmlparser/src/nsWellFormedDTD.h b/mozilla/parser/htmlparser/src/nsWellFormedDTD.h index 2e7d2e84920..9e8504f2921 100644 --- a/mozilla/parser/htmlparser/src/nsWellFormedDTD.h +++ b/mozilla/parser/htmlparser/src/nsWellFormedDTD.h @@ -225,6 +225,7 @@ protected: NS_IMETHODIMP ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); + NS_IMETHODIMP ConsumeEscapedContent(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken); NS_IMETHODIMP ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken);