From beda106d3bea790fa757187bfebdac24b888cc05 Mon Sep 17 00:00:00 2001
From: "peterv%propagandism.org"
 <peterv%propagandism.org@18797224-902f-48f8-a5cc-f745e15eee43>
Date: Sun, 2 May 2004 11:16:26 +0000
Subject: [PATCH] Merging patch by bz (from mozilla/htmlparser). Change our
 concept of a "tagname" to include all sorts of random chars like IE does
 (that is, allow any char that's not in a short list of terminal chars). Bug
 236002, r=choess, sr=peterv

git-svn-id: svn://10.0.0.236/trunk@155822 18797224-902f-48f8-a5cc-f745e15eee43
---
 .../parser/htmlparser/src/nsHTMLTokens.cpp    |  10 +-
 mozilla/parser/htmlparser/src/nsScanner.cpp   | 101 ++++--------------
 mozilla/parser/htmlparser/src/nsScanner.h     |  21 ++--
 3 files changed, 38 insertions(+), 94 deletions(-)
diff --git a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp
index 89f8907b660..45a9b443ec5 100644
--- a/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp
@@ -182,7 +182,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag
   nsresult result=NS_OK;
   if (aFlag & NS_IPARSER_FLAG_HTML) {
     nsAutoString theSubstr;
-    result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
+    result=aScanner.ReadTagIdentifier(theSubstr);
     mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
     // Save the original tag string if this is user-defined or if we
     // are viewing source
@@ -195,7 +195,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag
     //was written <title_> but since we didn't respect the '_', we only saw <title>. Then 
     //we searched for end title, which never comes (they give </title_>). 
 
-    result=aScanner.ReadIdentifier(mTextValue,PR_TRUE);  
+    result=aScanner.ReadTagIdentifier(mTextValue);  
     mTypeID = nsHTMLTags::LookupTag(mTextValue);
   }
 
@@ -284,7 +284,7 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
   nsresult result = NS_OK;
   if (aFlag & NS_IPARSER_FLAG_HTML) {
     nsAutoString theSubstr;
-    result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
+    result=aScanner.ReadTagIdentifier(theSubstr);
     NS_ENSURE_SUCCESS(result, result);
     
     mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
@@ -296,7 +296,7 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
     }
   }
   else {
-    result = aScanner.ReadIdentifier(mTextValue,PR_TRUE);
+    result = aScanner.ReadTagIdentifier(mTextValue);
     NS_ENSURE_SUCCESS(result, result);
 
     mTypeID = nsHTMLTags::LookupTag(mTextValue);
@@ -1939,7 +1939,7 @@ CEntityToken::ConsumeEntity(PRUnichar aChar,
         theChar == '_' ||
         theChar == ':') {
         aScanner.GetChar(aChar); // Consume &
-        result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
+        result=aScanner.ReadEntityIdentifier(aString);
       }
       else {
         return NS_HTMLTOKENS_NOT_AN_ENTITY;
diff --git a/mozilla/parser/htmlparser/src/nsScanner.cpp b/mozilla/parser/htmlparser/src/nsScanner.cpp
index f851e919f49..bf63e2fb331 100644
--- a/mozilla/parser/htmlparser/src/nsScanner.cpp
+++ b/mozilla/parser/htmlparser/src/nsScanner.cpp
@@ -733,14 +733,12 @@ nsresult nsScanner::SkipPast(nsString& aValidSet){
 }
 
 /**
- *  Consume characters until you did not find the terminal char
+ *  Consume characters until you run into space, a '<', a '>', or a '/'.
  *  
- *  @update  gess 3/25/98
  *  @param   aString - receives new data from stream
- *  @param   aIgnore - If set ignores ':','-','_','.'
  *  @return  error code
  */
-nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
+nsresult nsScanner::ReadTagIdentifier(nsString& aString) {
 
   if (!mSlidingBuffer) {
     return kEOF;
@@ -758,26 +756,29 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
  
     theChar=*current;
     if(theChar) {
-      found=PR_FALSE;
+      found = PR_TRUE;
       switch(theChar) {
-        case ':':
-        case '_':
-        case '-':
-        case '.':
-          found=allowPunct;
+        case '\n':
+        case '\r':
+        case ' ' :
+        case '\b':
+        case '\t':
+        case '\v':
+        case '\f':
+        case '<':
+        case '>':
+        case '/':
+          found = PR_FALSE;
           break;
         default:
-          found = ('a'<=theChar && theChar<='z') ||
-                  ('A'<=theChar && theChar<='Z') ||
-                  ('0'<=theChar && theChar<='9');
           break;
       }
 
       if(!found) {
         // If we the current character isn't a valid character for
-        // the identifier, we're done. Copy the results into
+        // the identifier, we're done. Append the results to
         // the string passed in.
-        CopyUnicodeTo(mCurrentPosition, current, aString);
+        AppendUnicodeTo(mCurrentPosition, current, aString);
         break;
       }
     }
@@ -795,14 +796,13 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
 }
 
 /**
- *  Consume characters until you did not find the terminal char
+ *  Consume characters until you run into a char that's not valid in an
+ *  entity name
  *  
- *  @update  gess 3/25/98
  *  @param   aString - receives new data from stream
- *  @param   allowPunct - If set ignores ':','-','_','.'
  *  @return  error code
  */
-nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
+nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
 
   if (!mSlidingBuffer) {
     return kEOF;
@@ -823,11 +823,11 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
     if(theChar) {
       found=PR_FALSE;
       switch(theChar) {
-        case ':':
         case '_':
         case '-':
         case '.':
-          found=allowPunct;
+          // Don't allow ':' in entity names.  See bug 23791
+          found = PR_TRUE;
           break;
         default:
           found = ('a'<=theChar && theChar<='z') ||
@@ -855,65 +855,6 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
   return result;
 }
 
-nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart,
-                                   nsScannerIterator& aEnd,
-                                   PRBool allowPunct) {
-
-  if (!mSlidingBuffer) {
-    return kEOF;
-  }
-
-  PRUnichar         theChar=0;
-  nsresult          result=Peek(theChar);
-  nsScannerIterator origin, current, end;
-  PRBool            found=PR_FALSE;  
-
-  origin = mCurrentPosition;
-  current = mCurrentPosition;
-  end = mEndPosition;
-
-  while(current != end) {
- 
-    theChar=*current;
-    if(theChar) {
-      found=PR_FALSE;
-      switch(theChar) {
-        case ':':
-        case '_':
-        case '-':
-          found=allowPunct;
-          break;
-        default:
-          if(('a'<=theChar) && (theChar<='z'))
-            found=PR_TRUE;
-          else if(('A'<=theChar) && (theChar<='Z'))
-            found=PR_TRUE;
-          else if(('0'<=theChar) && (theChar<='9'))
-            found=PR_TRUE;
-          break;
-      }
-
-      if(!found) {
-        aStart = mCurrentPosition;
-        aEnd = current;
-        break;
-      }
-    }
-    ++current;
-  }
-  
-  SetPosition(current);
-  if (current == end) {
-    aStart = origin;
-    aEnd = current;
-    return Eof();
-  }
-
-  //DoErrTest(aString);
-
-  return result;
-}
-
 /**
  *  Consume digits 
  *  
diff --git a/mozilla/parser/htmlparser/src/nsScanner.h b/mozilla/parser/htmlparser/src/nsScanner.h
index 61579ccf543..d7fcde1cd12 100644
--- a/mozilla/parser/htmlparser/src/nsScanner.h
+++ b/mozilla/parser/htmlparser/src/nsScanner.h
@@ -187,18 +187,21 @@ class nsScanner {
       nsresult Eof(void);
 
       /**
-       *  Consume characters until you find the terminal char
+       *  Consume characters until you run into space, a '<', a '>', or a '/'.
        *  
-       *  @update  gess 3/25/98
-       *  @param   aString receives new data from stream
-       *  @param   addTerminal tells us whether to append terminal to aString
+       *  @param   aString - receives new data from stream
        *  @return  error code
        */
-      nsresult GetIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE);
-      nsresult ReadIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE);
-      nsresult ReadIdentifier(nsScannerIterator& aStart,
-                              nsScannerIterator& aEnd,
-                              PRBool allowPunct=PR_FALSE);
+      nsresult ReadTagIdentifier(nsString& aString);
+
+      /**
+       *  Consume characters until you run into a char that's not valid in an
+       *  entity name
+       *  
+       *  @param   aString - receives new data from stream
+       *  @return  error code
+       */
+      nsresult ReadEntityIdentifier(nsString& aString);
       nsresult ReadNumber(nsString& aString,PRInt32 aBase);
       nsresult ReadWhitespace(nsString& aString, 
                               PRInt32& aNewlinesSkipped);