From beda106d3bea790fa757187bfebdac24b888cc05 Mon Sep 17 00:00:00 2001 From: "peterv%propagandism.org" Date: Sun, 2 May 2004 11:16:26 +0000 Subject: [PATCH] Merging patch by bz (from mozilla/htmlparser). Change our concept of a "tagname" to include all sorts of random chars like IE does (that is, allow any char that's not in a short list of terminal chars). Bug 236002, r=choess, sr=peterv git-svn-id: svn://10.0.0.236/trunk@155822 18797224-902f-48f8-a5cc-f745e15eee43 --- .../parser/htmlparser/src/nsHTMLTokens.cpp | 10 +- mozilla/parser/htmlparser/src/nsScanner.cpp | 101 ++++-------------- mozilla/parser/htmlparser/src/nsScanner.h | 21 ++-- 3 files changed, 38 insertions(+), 94 deletions(-) diff --git a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp index 89f8907b660..45a9b443ec5 100644 --- a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp @@ -182,7 +182,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag nsresult result=NS_OK; if (aFlag & NS_IPARSER_FLAG_HTML) { nsAutoString theSubstr; - result=aScanner.GetIdentifier(theSubstr,PR_TRUE); + result=aScanner.ReadTagIdentifier(theSubstr); mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr); // Save the original tag string if this is user-defined or if we // are viewing source @@ -195,7 +195,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag //was written but since we didn't respect the '_', we only saw . Then //we searched for end title, which never comes (they give </title_>). - result=aScanner.ReadIdentifier(mTextValue,PR_TRUE); + result=aScanner.ReadTagIdentifier(mTextValue); mTypeID = nsHTMLTags::LookupTag(mTextValue); } @@ -284,7 +284,7 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) nsresult result = NS_OK; if (aFlag & NS_IPARSER_FLAG_HTML) { nsAutoString theSubstr; - result=aScanner.GetIdentifier(theSubstr,PR_TRUE); + result=aScanner.ReadTagIdentifier(theSubstr); NS_ENSURE_SUCCESS(result, result); mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr); @@ -296,7 +296,7 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) } } else { - result = aScanner.ReadIdentifier(mTextValue,PR_TRUE); + result = aScanner.ReadTagIdentifier(mTextValue); NS_ENSURE_SUCCESS(result, result); mTypeID = nsHTMLTags::LookupTag(mTextValue); @@ -1939,7 +1939,7 @@ CEntityToken::ConsumeEntity(PRUnichar aChar, theChar == '_' || theChar == ':') { aScanner.GetChar(aChar); // Consume & - result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE. + result=aScanner.ReadEntityIdentifier(aString); } else { return NS_HTMLTOKENS_NOT_AN_ENTITY; diff --git a/mozilla/parser/htmlparser/src/nsScanner.cpp b/mozilla/parser/htmlparser/src/nsScanner.cpp index f851e919f49..bf63e2fb331 100644 --- a/mozilla/parser/htmlparser/src/nsScanner.cpp +++ b/mozilla/parser/htmlparser/src/nsScanner.cpp @@ -733,14 +733,12 @@ nsresult nsScanner::SkipPast(nsString& aValidSet){ } /** - * Consume characters until you did not find the terminal char + * Consume characters until you run into space, a '<', a '>', or a '/'. * - * @update gess 3/25/98 * @param aString - receives new data from stream - * @param aIgnore - If set ignores ':','-','_','.' * @return error code */ -nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) { +nsresult nsScanner::ReadTagIdentifier(nsString& aString) { if (!mSlidingBuffer) { return kEOF; @@ -758,26 +756,29 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) { theChar=*current; if(theChar) { - found=PR_FALSE; + found = PR_TRUE; switch(theChar) { - case ':': - case '_': - case '-': - case '.': - found=allowPunct; + case '\n': + case '\r': + case ' ' : + case '\b': + case '\t': + case '\v': + case '\f': + case '<': + case '>': + case '/': + found = PR_FALSE; break; default: - found = ('a'<=theChar && theChar<='z') || - ('A'<=theChar && theChar<='Z') || - ('0'<=theChar && theChar<='9'); break; } if(!found) { // If we the current character isn't a valid character for - // the identifier, we're done. Copy the results into + // the identifier, we're done. Append the results to // the string passed in. - CopyUnicodeTo(mCurrentPosition, current, aString); + AppendUnicodeTo(mCurrentPosition, current, aString); break; } } @@ -795,14 +796,13 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) { } /** - * Consume characters until you did not find the terminal char + * Consume characters until you run into a char that's not valid in an + * entity name * - * @update gess 3/25/98 * @param aString - receives new data from stream - * @param allowPunct - If set ignores ':','-','_','.' * @return error code */ -nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) { +nsresult nsScanner::ReadEntityIdentifier(nsString& aString) { if (!mSlidingBuffer) { return kEOF; @@ -823,11 +823,11 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) { if(theChar) { found=PR_FALSE; switch(theChar) { - case ':': case '_': case '-': case '.': - found=allowPunct; + // Don't allow ':' in entity names. See bug 23791 + found = PR_TRUE; break; default: found = ('a'<=theChar && theChar<='z') || @@ -855,65 +855,6 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) { return result; } -nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart, - nsScannerIterator& aEnd, - PRBool allowPunct) { - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - nsScannerIterator origin, current, end; - PRBool found=PR_FALSE; - - origin = mCurrentPosition; - current = mCurrentPosition; - end = mEndPosition; - - while(current != end) { - - theChar=*current; - if(theChar) { - found=PR_FALSE; - switch(theChar) { - case ':': - case '_': - case '-': - found=allowPunct; - break; - default: - if(('a'<=theChar) && (theChar<='z')) - found=PR_TRUE; - else if(('A'<=theChar) && (theChar<='Z')) - found=PR_TRUE; - else if(('0'<=theChar) && (theChar<='9')) - found=PR_TRUE; - break; - } - - if(!found) { - aStart = mCurrentPosition; - aEnd = current; - break; - } - } - ++current; - } - - SetPosition(current); - if (current == end) { - aStart = origin; - aEnd = current; - return Eof(); - } - - //DoErrTest(aString); - - return result; -} - /** * Consume digits * diff --git a/mozilla/parser/htmlparser/src/nsScanner.h b/mozilla/parser/htmlparser/src/nsScanner.h index 61579ccf543..d7fcde1cd12 100644 --- a/mozilla/parser/htmlparser/src/nsScanner.h +++ b/mozilla/parser/htmlparser/src/nsScanner.h @@ -187,18 +187,21 @@ class nsScanner { nsresult Eof(void); /** - * Consume characters until you find the terminal char + * Consume characters until you run into space, a '<', a '>', or a '/'. * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param addTerminal tells us whether to append terminal to aString + * @param aString - receives new data from stream * @return error code */ - nsresult GetIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE); - nsresult ReadIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE); - nsresult ReadIdentifier(nsScannerIterator& aStart, - nsScannerIterator& aEnd, - PRBool allowPunct=PR_FALSE); + nsresult ReadTagIdentifier(nsString& aString); + + /** + * Consume characters until you run into a char that's not valid in an + * entity name + * + * @param aString - receives new data from stream + * @return error code + */ + nsresult ReadEntityIdentifier(nsString& aString); nsresult ReadNumber(nsString& aString,PRInt32 aBase); nsresult ReadWhitespace(nsString& aString, PRInt32& aNewlinesSkipped);