From 4fa2effbda2bdd46fcad9a841a19092fd4e3f69e Mon Sep 17 00:00:00 2001 From: "harishd%netscape.com" Date: Thu, 29 Jun 2000 23:05:46 +0000 Subject: [PATCH] Bug 40477 ( nsbeta2+ ) - Don't rely on ';' to terminate entities. r=pollmann. git-svn-id: svn://10.0.0.236/trunk@73558 18797224-902f-48f8-a5cc-f745e15eee43 --- .../html/document/src/nsHTMLContentSink.cpp | 81 ++++++++++++++++--- .../html/document/src/nsHTMLContentSink.cpp | 81 ++++++++++++++++--- 2 files changed, 136 insertions(+), 26 deletions(-) diff --git a/mozilla/content/html/document/src/nsHTMLContentSink.cpp b/mozilla/content/html/document/src/nsHTMLContentSink.cpp index 248a913c38b..ad0f05818c3 100644 --- a/mozilla/content/html/document/src/nsHTMLContentSink.cpp +++ b/mozilla/content/html/document/src/nsHTMLContentSink.cpp @@ -504,6 +504,47 @@ HTMLContentSink::SinkTraceNode(PRUint32 aBit, } #endif +/** + * Helper to find identifiers that can terminate an entity + * + * harishd 06/23/00 + * + * @param aSource - Search for entity terminator in this string + * @param aChar - Holds the terminated character + * @param aStartOffset - Beings search, in aSource, from this offset. + */ + +PRInt32 +GetEntityTerminator(nsString& aSource,PRUnichar& aChar,PRInt32 aStartOffset=0) { + + PRUnichar theChar=aChar=0; + PRInt32 theOffset=aStartOffset; + PRInt32 theLength=aSource.Length(); + PRBool found=PR_FALSE; + + while(theOffsetGetDTDMode(mode); while(-1!=theAmpPos) { @@ -534,19 +580,19 @@ void HTMLContentSink::ReduceEntities(nsString& aString) { theOutString.Append(&theBuf[theStartPos],theAmpPos-theStartPos); } - PRInt32 theSemiPos=aString.FindChar(';',PR_FALSE,theAmpPos+1); + theTermPos=GetEntityTerminator(aString,theTermChar,theAmpPos+1); - if(-1!=theSemiPos) { + if(-1!=theTermPos) { //having found a semi, copy chars between amppos and semipos; - aString.Mid(theNCRStr,theAmpPos+1,theSemiPos-theAmpPos-1); + aString.Mid(theNCRStr,theAmpPos+1,theTermPos-theAmpPos-1); } else { aString.Mid(theNCRStr,theAmpPos+1,theLen-theAmpPos-1); PRInt32 theNewAmpPos=aString.FindChar('&',PR_FALSE,theAmpPos+1); - theSemiPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1; + theTermPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1; } - theStartPos=theSemiPos+1; + theStartPos=theTermPos+1; PRUnichar theChar=(theLen>theAmpPos+1) ? aString.CharAt(theAmpPos+1) : '\0'; PRUnichar theEntity=0; @@ -564,24 +610,33 @@ void HTMLContentSink::ReduceEntities(nsString& aString) { default: if(nsCRT::IsAsciiAlpha(theChar)) { dtd->ConvertEntityToUnicode(theNCRStr, &theNCRValue); + if (eDTDMode_strict!=mode) { + // XXX - Hack - Nav. does not support entity values > 255 + // on the other hand IE supports entity values > 255 with a + // semicolon. I think it's reasonable to emulate IE than Nav. + if(theNCRValue>255 && theTermChar!=';') break; + } if(-1!=theNCRValue) { theEntity=PRUnichar(theNCRValue); } } - if(!theEntity) { - //what looked like an entity is not really one. - //so let's copy the ncrstring back to the output string - aString.Mid(theNCRStr,theAmpPos,theSemiPos-theAmpPos+1); - theOutString.Append(theNCRStr); - } break; } //switch if(theEntity) { theOutString.Append(theEntity); + if(theTermChar!='\0' && theTermChar!='&' && theTermChar!=';') { + theOutString.Append(theTermChar); + } } - theAmpPos = aString.FindChar('&',PR_FALSE,theSemiPos+1); - + else { + //what looked like an entity is not really one. + //so let's copy the ncrstring back to the output string + if(theTermChar!='&') { theTermPos++; } + aString.Mid(theNCRStr,theAmpPos,theTermPos-theAmpPos); + theOutString.Append(theNCRStr); + } + theAmpPos = aString.FindChar('&',PR_FALSE,theTermPos); } //while if(0GetDTDMode(mode); while(-1!=theAmpPos) { @@ -534,19 +580,19 @@ void HTMLContentSink::ReduceEntities(nsString& aString) { theOutString.Append(&theBuf[theStartPos],theAmpPos-theStartPos); } - PRInt32 theSemiPos=aString.FindChar(';',PR_FALSE,theAmpPos+1); + theTermPos=GetEntityTerminator(aString,theTermChar,theAmpPos+1); - if(-1!=theSemiPos) { + if(-1!=theTermPos) { //having found a semi, copy chars between amppos and semipos; - aString.Mid(theNCRStr,theAmpPos+1,theSemiPos-theAmpPos-1); + aString.Mid(theNCRStr,theAmpPos+1,theTermPos-theAmpPos-1); } else { aString.Mid(theNCRStr,theAmpPos+1,theLen-theAmpPos-1); PRInt32 theNewAmpPos=aString.FindChar('&',PR_FALSE,theAmpPos+1); - theSemiPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1; + theTermPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1; } - theStartPos=theSemiPos+1; + theStartPos=theTermPos+1; PRUnichar theChar=(theLen>theAmpPos+1) ? aString.CharAt(theAmpPos+1) : '\0'; PRUnichar theEntity=0; @@ -564,24 +610,33 @@ void HTMLContentSink::ReduceEntities(nsString& aString) { default: if(nsCRT::IsAsciiAlpha(theChar)) { dtd->ConvertEntityToUnicode(theNCRStr, &theNCRValue); + if (eDTDMode_strict!=mode) { + // XXX - Hack - Nav. does not support entity values > 255 + // on the other hand IE supports entity values > 255 with a + // semicolon. I think it's reasonable to emulate IE than Nav. + if(theNCRValue>255 && theTermChar!=';') break; + } if(-1!=theNCRValue) { theEntity=PRUnichar(theNCRValue); } } - if(!theEntity) { - //what looked like an entity is not really one. - //so let's copy the ncrstring back to the output string - aString.Mid(theNCRStr,theAmpPos,theSemiPos-theAmpPos+1); - theOutString.Append(theNCRStr); - } break; } //switch if(theEntity) { theOutString.Append(theEntity); + if(theTermChar!='\0' && theTermChar!='&' && theTermChar!=';') { + theOutString.Append(theTermChar); + } } - theAmpPos = aString.FindChar('&',PR_FALSE,theSemiPos+1); - + else { + //what looked like an entity is not really one. + //so let's copy the ncrstring back to the output string + if(theTermChar!='&') { theTermPos++; } + aString.Mid(theNCRStr,theAmpPos,theTermPos-theAmpPos); + theOutString.Append(theNCRStr); + } + theAmpPos = aString.FindChar('&',PR_FALSE,theTermPos); } //while if(0