diff --git a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp
index 89f8907b660..45a9b443ec5 100644
--- a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp
@@ -182,7 +182,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag
nsresult result=NS_OK;
if (aFlag & NS_IPARSER_FLAG_HTML) {
nsAutoString theSubstr;
- result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
+ result=aScanner.ReadTagIdentifier(theSubstr);
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
// Save the original tag string if this is user-defined or if we
// are viewing source
@@ -195,7 +195,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag
//was written but since we didn't respect the '_', we only saw . Then
//we searched for end title, which never comes (they give ).
- result=aScanner.ReadIdentifier(mTextValue,PR_TRUE);
+ result=aScanner.ReadTagIdentifier(mTextValue);
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
@@ -284,7 +284,7 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
nsresult result = NS_OK;
if (aFlag & NS_IPARSER_FLAG_HTML) {
nsAutoString theSubstr;
- result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
+ result=aScanner.ReadTagIdentifier(theSubstr);
NS_ENSURE_SUCCESS(result, result);
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
@@ -296,7 +296,7 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
}
}
else {
- result = aScanner.ReadIdentifier(mTextValue,PR_TRUE);
+ result = aScanner.ReadTagIdentifier(mTextValue);
NS_ENSURE_SUCCESS(result, result);
mTypeID = nsHTMLTags::LookupTag(mTextValue);
@@ -1939,7 +1939,7 @@ CEntityToken::ConsumeEntity(PRUnichar aChar,
theChar == '_' ||
theChar == ':') {
aScanner.GetChar(aChar); // Consume &
- result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
+ result=aScanner.ReadEntityIdentifier(aString);
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
diff --git a/mozilla/parser/htmlparser/src/nsScanner.cpp b/mozilla/parser/htmlparser/src/nsScanner.cpp
index f851e919f49..bf63e2fb331 100644
--- a/mozilla/parser/htmlparser/src/nsScanner.cpp
+++ b/mozilla/parser/htmlparser/src/nsScanner.cpp
@@ -733,14 +733,12 @@ nsresult nsScanner::SkipPast(nsString& aValidSet){
}
/**
- * Consume characters until you did not find the terminal char
+ * Consume characters until you run into space, a '<', a '>', or a '/'.
*
- * @update gess 3/25/98
* @param aString - receives new data from stream
- * @param aIgnore - If set ignores ':','-','_','.'
* @return error code
*/
-nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
+nsresult nsScanner::ReadTagIdentifier(nsString& aString) {
if (!mSlidingBuffer) {
return kEOF;
@@ -758,26 +756,29 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
theChar=*current;
if(theChar) {
- found=PR_FALSE;
+ found = PR_TRUE;
switch(theChar) {
- case ':':
- case '_':
- case '-':
- case '.':
- found=allowPunct;
+ case '\n':
+ case '\r':
+ case ' ' :
+ case '\b':
+ case '\t':
+ case '\v':
+ case '\f':
+ case '<':
+ case '>':
+ case '/':
+ found = PR_FALSE;
break;
default:
- found = ('a'<=theChar && theChar<='z') ||
- ('A'<=theChar && theChar<='Z') ||
- ('0'<=theChar && theChar<='9');
break;
}
if(!found) {
// If we the current character isn't a valid character for
- // the identifier, we're done. Copy the results into
+ // the identifier, we're done. Append the results to
// the string passed in.
- CopyUnicodeTo(mCurrentPosition, current, aString);
+ AppendUnicodeTo(mCurrentPosition, current, aString);
break;
}
}
@@ -795,14 +796,13 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
}
/**
- * Consume characters until you did not find the terminal char
+ * Consume characters until you run into a char that's not valid in an
+ * entity name
*
- * @update gess 3/25/98
* @param aString - receives new data from stream
- * @param allowPunct - If set ignores ':','-','_','.'
* @return error code
*/
-nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
+nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
if (!mSlidingBuffer) {
return kEOF;
@@ -823,11 +823,11 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
if(theChar) {
found=PR_FALSE;
switch(theChar) {
- case ':':
case '_':
case '-':
case '.':
- found=allowPunct;
+ // Don't allow ':' in entity names. See bug 23791
+ found = PR_TRUE;
break;
default:
found = ('a'<=theChar && theChar<='z') ||
@@ -855,65 +855,6 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
return result;
}
-nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart,
- nsScannerIterator& aEnd,
- PRBool allowPunct) {
-
- if (!mSlidingBuffer) {
- return kEOF;
- }
-
- PRUnichar theChar=0;
- nsresult result=Peek(theChar);
- nsScannerIterator origin, current, end;
- PRBool found=PR_FALSE;
-
- origin = mCurrentPosition;
- current = mCurrentPosition;
- end = mEndPosition;
-
- while(current != end) {
-
- theChar=*current;
- if(theChar) {
- found=PR_FALSE;
- switch(theChar) {
- case ':':
- case '_':
- case '-':
- found=allowPunct;
- break;
- default:
- if(('a'<=theChar) && (theChar<='z'))
- found=PR_TRUE;
- else if(('A'<=theChar) && (theChar<='Z'))
- found=PR_TRUE;
- else if(('0'<=theChar) && (theChar<='9'))
- found=PR_TRUE;
- break;
- }
-
- if(!found) {
- aStart = mCurrentPosition;
- aEnd = current;
- break;
- }
- }
- ++current;
- }
-
- SetPosition(current);
- if (current == end) {
- aStart = origin;
- aEnd = current;
- return Eof();
- }
-
- //DoErrTest(aString);
-
- return result;
-}
-
/**
* Consume digits
*
diff --git a/mozilla/parser/htmlparser/src/nsScanner.h b/mozilla/parser/htmlparser/src/nsScanner.h
index 61579ccf543..d7fcde1cd12 100644
--- a/mozilla/parser/htmlparser/src/nsScanner.h
+++ b/mozilla/parser/htmlparser/src/nsScanner.h
@@ -187,18 +187,21 @@ class nsScanner {
nsresult Eof(void);
/**
- * Consume characters until you find the terminal char
+ * Consume characters until you run into space, a '<', a '>', or a '/'.
*
- * @update gess 3/25/98
- * @param aString receives new data from stream
- * @param addTerminal tells us whether to append terminal to aString
+ * @param aString - receives new data from stream
* @return error code
*/
- nsresult GetIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE);
- nsresult ReadIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE);
- nsresult ReadIdentifier(nsScannerIterator& aStart,
- nsScannerIterator& aEnd,
- PRBool allowPunct=PR_FALSE);
+ nsresult ReadTagIdentifier(nsString& aString);
+
+ /**
+ * Consume characters until you run into a char that's not valid in an
+ * entity name
+ *
+ * @param aString - receives new data from stream
+ * @return error code
+ */
+ nsresult ReadEntityIdentifier(nsString& aString);
nsresult ReadNumber(nsString& aString,PRInt32 aBase);
nsresult ReadWhitespace(nsString& aString,
PRInt32& aNewlinesSkipped);