Merging patch by bz (from mozilla/htmlparser).

Change our concept of a "tagname" to include all sorts of random chars like IE does (that is, allow any char that's not in a short list of terminal chars).
Bug 236002, r=choess, sr=peterv


git-svn-id: svn://10.0.0.236/trunk@155822 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
peterv%propagandism.org
2004-05-02 11:16:26 +00:00
parent 512e303238
commit beda106d3b
3 changed files with 38 additions and 94 deletions

View File

@@ -733,14 +733,12 @@ nsresult nsScanner::SkipPast(nsString& aValidSet){
}
/**
* Consume characters until you did not find the terminal char
* Consume characters until you run into space, a '<', a '>', or a '/'.
*
* @update gess 3/25/98
* @param aString - receives new data from stream
* @param aIgnore - If set ignores ':','-','_','.'
* @return error code
*/
nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
nsresult nsScanner::ReadTagIdentifier(nsString& aString) {
if (!mSlidingBuffer) {
return kEOF;
@@ -758,26 +756,29 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
theChar=*current;
if(theChar) {
found=PR_FALSE;
found = PR_TRUE;
switch(theChar) {
case ':':
case '_':
case '-':
case '.':
found=allowPunct;
case '\n':
case '\r':
case ' ' :
case '\b':
case '\t':
case '\v':
case '\f':
case '<':
case '>':
case '/':
found = PR_FALSE;
break;
default:
found = ('a'<=theChar && theChar<='z') ||
('A'<=theChar && theChar<='Z') ||
('0'<=theChar && theChar<='9');
break;
}
if(!found) {
// If we the current character isn't a valid character for
// the identifier, we're done. Copy the results into
// the identifier, we're done. Append the results to
// the string passed in.
CopyUnicodeTo(mCurrentPosition, current, aString);
AppendUnicodeTo(mCurrentPosition, current, aString);
break;
}
}
@@ -795,14 +796,13 @@ nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
}
/**
* Consume characters until you did not find the terminal char
* Consume characters until you run into a char that's not valid in an
* entity name
*
* @update gess 3/25/98
* @param aString - receives new data from stream
* @param allowPunct - If set ignores ':','-','_','.'
* @return error code
*/
nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
if (!mSlidingBuffer) {
return kEOF;
@@ -823,11 +823,11 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
if(theChar) {
found=PR_FALSE;
switch(theChar) {
case ':':
case '_':
case '-':
case '.':
found=allowPunct;
// Don't allow ':' in entity names. See bug 23791
found = PR_TRUE;
break;
default:
found = ('a'<=theChar && theChar<='z') ||
@@ -855,65 +855,6 @@ nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
return result;
}
nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart,
nsScannerIterator& aEnd,
PRBool allowPunct) {
if (!mSlidingBuffer) {
return kEOF;
}
PRUnichar theChar=0;
nsresult result=Peek(theChar);
nsScannerIterator origin, current, end;
PRBool found=PR_FALSE;
origin = mCurrentPosition;
current = mCurrentPosition;
end = mEndPosition;
while(current != end) {
theChar=*current;
if(theChar) {
found=PR_FALSE;
switch(theChar) {
case ':':
case '_':
case '-':
found=allowPunct;
break;
default:
if(('a'<=theChar) && (theChar<='z'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='Z'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
break;
}
if(!found) {
aStart = mCurrentPosition;
aEnd = current;
break;
}
}
++current;
}
SetPosition(current);
if (current == end) {
aStart = origin;
aEnd = current;
return Eof();
}
//DoErrTest(aString);
return result;
}
/**
* Consume digits
*