Bug91437 - Speeding up the tokenizer slightly by using a smarter scan algorithm and avoiding trying to skip whitespace when there are none. r=harishd sr=jst

git-svn-id: svn://10.0.0.236/trunk@101192 18797224-902f-48f8-a5cc-f745e15eee43
2001-08-16 05:24:17 +00:00
parent 2d23b9911a
commit d4bca51618
6 changed files with 320 additions and 354 deletions
--- a/mozilla/htmlparser/src/nsScanner.cpp
+++ b/mozilla/htmlparser/src/nsScanner.cpp
@@ -48,6 +48,25 @@ nsScannerString::ReplaceCharacter(nsReadingIterator<PRUnichar>& aPosition,
  *pos = aChar;
 }

+nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) :
+  mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set
+{
+  // Build filter that will be used to filter out characters with
+  // bits that none of the terminal chars have. This works very well
+  // because terminal chars often have only the last 4-6 bits set and
+  // normal ascii letters have bit 7 set. Other letters have even higher
+  // bits set.
+  
+  // Calculate filter
+  const PRUnichar *current = aTerminateChars;
+  PRUnichar terminalChar = *current;
+  while (terminalChar) {
+    mFilter &= ~terminalChar;
+    ++current;
+    terminalChar = *current;
+  }
+}
+
 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);

 const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
@@ -533,17 +552,18 @@ nsresult nsScanner::SkipWhitespace(void) {
    return kEOF;
  }

-  PRUnichar         theChar=0;
-  nsresult          result=Peek(theChar);
-  nsReadingIterator<PRUnichar> current, end;
-  PRBool            found=PR_FALSE;  
+  nsReadingIterator<PRUnichar> current;
+  PRBool            found;
+  PRBool            skipped = PR_FALSE;

  mNewlinesSkipped = 0;
  current = mCurrentPosition;
-  end = mEndPosition;

-  while(current != end) {
-    theChar=*current;
+  PRUnichar         theChar=0;
+  nsresult          result=Peek(theChar);
+  NS_ENSURE_SUCCESS(result, result);
+
+  while (current != mEndPosition) {
    switch(theChar) {
      case '\n': mNewlinesSkipped++;
      case ' ' :
@@ -559,19 +579,22 @@ nsresult nsScanner::SkipWhitespace(void) {
    if(!found) {
      break;
    }
-    else {
-      ++current;
-    }
+    ++current;
+    theChar = *current;
+    skipped = PR_TRUE;
  }

-  SetPosition(current);
-  if (current == end) {
+  if (!skipped)
+    return NS_OK;
+  
+  if (current == mEndPosition) {
+    SetPosition(current);
    return Eof();
  }

-  //DoErrTest(aString);
+  SetPosition(current);

-  return result;
+  return NS_OK;

 }

@@ -1156,156 +1179,113 @@ nsresult nsScanner::ReadWhile(nsString& aString,
 *  @return  error code
 */
 nsresult nsScanner::ReadUntil(nsAWritableString& aString,
-                              const nsAFlatString& aTerminalSet,
+                              const nsReadEndCondition& aEndCondition,
                              PRBool addTerminal)
 {  
  if (!mSlidingBuffer) {
    return kEOF;
  }

-  PRUnichar         theChar=0;
-  nsresult          result=Peek(theChar);
-  nsReadingIterator<PRUnichar> origin, current, end;
-  const PRUnichar* setstart = aTerminalSet.get();
+  nsReadingIterator<PRUnichar> origin, current;
+  const PRUnichar* setstart = aEndCondition.mChars;
  const PRUnichar* setcurrent;

  origin = mCurrentPosition;
  current = origin;
-  end = mEndPosition;
-
-  while(current != end) {
-    setcurrent = setstart;
-    theChar=*current;
-    if(theChar) {
-      while (*setcurrent) {
-        if (*setcurrent == theChar) {
-          if(addTerminal)
-            ++current;
-          AppendUnicodeTo(origin, current, aString);
-          goto found;
-        }
-        ++setcurrent;
-      }
-    }
-    ++current;
-  }
-found:
-
-  SetPosition(current);
-  if (current == end) {
-    AppendUnicodeTo(origin, current, aString);
-    return Eof();
-  }
-
-  //DoErrTest(aString);
-
-  return result;
-
-}
-
-/**
- *  Consume characters until you encounter one contained in given
- *  input set.
- *  
- *  @update  gess 3/25/98
- *  @param   aString will contain the result of this method
- *  @param   aTerminalSet is an ordered string that contains
- *           the set of INVALID characters
- *  @return  error code
- */
-nsresult nsScanner::ReadUntil(nsAWritableString& aString,
-                              const nsAFlatCString& aTerminalSet,
-                              PRBool addTerminal)
-{
-  if (!mSlidingBuffer) {
-    return kEOF;
-  }

  PRUnichar         theChar=0;
  nsresult          result=Peek(theChar);
-  nsReadingIterator<PRUnichar> origin, current, end;
-  const char* setstart = aTerminalSet.get();
-  const char* setcurrent;
+  NS_ENSURE_SUCCESS(result, result);
+  
+  while (current != mEndPosition) {
+    // Filter out completely wrong characters
+    // Check if all bits are in the required area
+    if(!(theChar & aEndCondition.mFilter)) {
+      // They were. Do a thorough check.

-  origin = mCurrentPosition;
-  current = origin;
-  end = mEndPosition;
-
-  while(current != end) {
-    setcurrent = setstart;
-    theChar=*current;
-    if(theChar) {
+      setcurrent = setstart;
      while (*setcurrent) {
        if (*setcurrent == theChar) {
-          if(addTerminal)
-            ++current;
-          AppendUnicodeTo(origin, current, aString);
          goto found;
        }
        ++setcurrent;
      }
    }
+    
    ++current;
+    theChar = *current;
  }
-found:

+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
  SetPosition(current);
-  if (current == end) {
-    AppendUnicodeTo(origin, current, aString);
-    return Eof();
-  }
+  AppendUnicodeTo(origin, current, aString);
+  return Eof();
+
+found:
+  if(addTerminal)
+    ++current;
+  AppendUnicodeTo(origin, current, aString);
+  SetPosition(current);
+
  //DoErrTest(aString);

-  return result;
+  return NS_OK;
 }

-
 nsresult nsScanner::ReadUntil(nsReadingIterator<PRUnichar>& aStart, 
                              nsReadingIterator<PRUnichar>& aEnd,
-                              const nsAFlatString& aTerminalSet,
+                              const nsReadEndCondition &aEndCondition,
                              PRBool addTerminal)
 {
  if (!mSlidingBuffer) {
    return kEOF;
  }

-  PRUnichar         theChar=0;
-  nsresult          result=Peek(theChar);
-  nsReadingIterator<PRUnichar> origin, current, end;
-  const PRUnichar* setstart = aTerminalSet.get();
+  nsReadingIterator<PRUnichar> origin, current;
+  const PRUnichar* setstart = aEndCondition.mChars;
  const PRUnichar* setcurrent;

  origin = mCurrentPosition;
  current = origin;
-  end = mEndPosition;

-  while(current != end) {
-    setcurrent = setstart;
-    theChar=*current;
-    if(theChar) {
+  PRUnichar         theChar=0;
+  nsresult          result=Peek(theChar);
+  NS_ENSURE_SUCCESS(result, result);
+  
+  while (current != mEndPosition) {
+    // Filter out completely wrong characters
+    // Check if all bits are in the required area
+    if(!(theChar & aEndCondition.mFilter)) {
+      // They were. Do a thorough check.
+      setcurrent = setstart;
      while (*setcurrent) {
        if (*setcurrent == theChar) {
-          if(addTerminal)
-            ++current;
-          aStart = origin;
-          aEnd = current;
          goto found;
        }
-        ++setcurrent;
+      ++setcurrent;
      }
    }
+    
    ++current;
+    theChar = *current;
  }
-found:

+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
  SetPosition(current);
-  if (current == end) {
-    aStart = origin;
-    aEnd = current;
-    return Eof();
-  }
+  aStart = origin;
+  aEnd = current;
+  return Eof();

-  return result;
+ found:
+  if(addTerminal)
+    ++current;
+  aStart = origin;
+  aEnd = current;
+  SetPosition(current);
+
+  return NS_OK; 
 }

 /**
@@ -1323,36 +1303,32 @@ nsresult nsScanner::ReadUntil(nsAWritableString& aString,
    return kEOF;
  }

-  PRUnichar theChar=0;
-  nsresult  result=Peek(theChar);
-  nsReadingIterator<PRUnichar> origin, current, end;
+  nsReadingIterator<PRUnichar> origin, current;

  origin = mCurrentPosition;
  current = origin;
-  end = mEndPosition;

-  while(current != end) {
-    
-    theChar=*current;
-    if(theChar) {
-      if(aTerminalChar==theChar) {
-        if(addTerminal)
-          ++current;
-        AppendUnicodeTo(origin, current, aString);
-        break;
-      }
+  PRUnichar theChar;
+  nsresult  result=Peek(theChar);
+  NS_ENSURE_SUCCESS(result, result);
+  
+  while (current != mEndPosition) {
+    if (aTerminalChar == theChar) {
+      if(addTerminal)
+        ++current;
+      AppendUnicodeTo(origin, current, aString);
+      SetPosition(current);
+      return NS_OK;
    }
    ++current;
+    theChar = *current;
  }

+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
+  AppendUnicodeTo(origin, current, aString);
  SetPosition(current);
-  if (current == end) {
-    AppendUnicodeTo(origin, current, aString);
-    return Eof();
-  }
-
-  //DoErrTest(aString);
-  return result;
+  return Eof();

 }