/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Mozilla Communicator client code. * * The Initial Developer of the Original Code is Netscape Communications * Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ #include "nsCOMPtr.h" #include "nsTextTransformer.h" #include "nsIContent.h" #include "nsIFrame.h" #include "nsIStyleContext.h" #include "nsITextContent.h" #include "nsStyleConsts.h" #include "nsILineBreaker.h" #include "nsIWordBreaker.h" #include "nsHTMLIIDs.h" #include "nsIServiceManager.h" #include "nsUnicharUtilCIID.h" #include "nsICaseConversion.h" #include "prenv.h" nsAutoTextBuffer::nsAutoTextBuffer() : mBuffer(mAutoBuffer), mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE) { } nsAutoTextBuffer::~nsAutoTextBuffer() { if (mBuffer && (mBuffer != mAutoBuffer)) { delete [] mBuffer; } } nsresult nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead) { PRInt32 newSize = mBufferLen * 2; if (newSize < mBufferLen + aAtLeast) { newSize = mBufferLen + aAtLeast + 100; } return GrowTo(newSize, aCopyToHead); } nsresult nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead) { if (aNewSize > mBufferLen) { PRUnichar* newBuffer = new PRUnichar[aNewSize]; if (!newBuffer) { return NS_ERROR_OUT_OF_MEMORY; } nsCRT::memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen], mBuffer, sizeof(PRUnichar) * mBufferLen); if (mBuffer != mAutoBuffer) { delete [] mBuffer; } mBuffer = newBuffer; mBufferLen = aNewSize; } return NS_OK; } //---------------------------------------------------------------------- static NS_DEFINE_IID(kUnicharUtilCID, NS_UNICHARUTIL_CID); static NS_DEFINE_IID(kICaseConversionIID, NS_ICASECONVERSION_IID); static nsICaseConversion* gCaseConv = nsnull; nsresult nsTextTransformer::Initialize() { nsresult res = NS_OK; if (!gCaseConv) { res = nsServiceManager::GetService(kUnicharUtilCID, kICaseConversionIID, (nsISupports**)&gCaseConv); NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil"); NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil"); } return res; } void nsTextTransformer::Shutdown() { if (gCaseConv) { nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv); gCaseConv = nsnull; } } // For now, we have only a single character to strip out. If we get // any more, change this to use a bitset to lookup into. #define IS_DISCARDED(_ch) \ ((_ch) == CH_SHY) #define MAX_UNIBYTE 127 MOZ_DECL_CTOR_COUNTER(nsTextTransformer); nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker, nsIWordBreaker* aWordBreaker) : mHasMultibyte(PR_FALSE), mFrag(nsnull), mOffset(0), mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE), mMode(eNormal), mLineBreaker(aLineBreaker), mWordBreaker(aWordBreaker) { MOZ_COUNT_CTOR(nsTextTransformer); #ifdef DEBUG static PRBool firstTime = PR_TRUE; if (firstTime) { firstTime = PR_FALSE; SelfTest(aLineBreaker, aWordBreaker); } #endif } nsTextTransformer::~nsTextTransformer() { MOZ_COUNT_DTOR(nsTextTransformer); } nsresult nsTextTransformer::Init(nsIFrame* aFrame, nsIContent* aContent, PRInt32 aStartingOffset) { // Get the contents text content nsresult rv; nsCOMPtr tc = do_QueryInterface(aContent, &rv); if (tc.get()) { tc->GetText(&mFrag); // Sanitize aStartingOffset if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) { aStartingOffset = 0; } else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(), "bad starting offset")) { aStartingOffset = mFrag->GetLength(); } mOffset = aStartingOffset; // Get the frames text style information const nsStyleText* styleText; aFrame->GetStyleData(eStyleStruct_Text, (const nsStyleStruct*&) styleText); if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) { mMode = ePreformatted; } else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) { mMode = ePreWrap; } mTextTransform = styleText->mTextTransform; } return rv; } //---------------------------------------------------------------------- // wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanNormalWhiteSpace_F() { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; for (; offset < fragLen; offset++) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch)) { // If character is not discardable then stop looping, otherwise // let the discarded character collapse with the other spaces. if (!IS_DISCARDED(ch)) { break; } } } mTransformBuf.mBuffer[0] = ' '; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBuffer(); PRUnichar* endbp = mTransformBuf.GetBufferEnd(); for (; offset < fragLen; offset++) { PRUnichar ch = frag->CharAt(offset); if (XP_IS_SPACE(ch)) { break; } if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { // Strip discarded characters from the transformed output continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ch; } *aWordLen = bp - mTransformBuf.GetBuffer(); return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak, PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; const PRUnichar* cp0 = frag->Get2b(); PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRUnichar firstChar = frag->CharAt(offset++); if (CH_NBSP == firstChar) { firstChar = ' '; } mTransformBuf.mBuffer[0] = firstChar; if (firstChar > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; // Only evaluate complex breaking logic if there are more characters // beyond the first to look at. PRInt32 numChars = 1; if (offset < fragLen) { const PRUnichar* cp = cp0 + offset; PRBool breakBetween = PR_FALSE; if (aForLineBreak) { mLineBreaker->BreakInBetween(mTransformBuf.GetBuffer(), 1, cp, (fragLen-offset), &breakBetween); } else { mWordBreaker->BreakInBetween(mTransformBuf.GetBuffer(), 1, cp, (fragLen-offset), &breakBetween); } if (!breakBetween) { // Find next position PRBool tryNextFrag; PRUint32 next; if (aForLineBreak) { mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag); } else { mWordBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag); } numChars = (PRInt32) (next - (PRUint32) offset) + 1; // Grow buffer before copying nsresult rv = mTransformBuf.GrowTo(numChars); if (NS_FAILED(rv)) { numChars = mTransformBuf.GetBufferLength(); } // 1. convert nbsp into space // 2. check mHasMultibyte flag // 3. copy buffer PRUnichar* bp = mTransformBuf.GetBuffer() + 1; const PRUnichar* end = cp + numChars - 1; while (cp < end) { PRUnichar ch = *cp++; if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) { // Strip discarded characters from the transformed output continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; *bp++ = ch; } // Recompute offset and numChars in case we stripped something offset += numChars - 1; numChars = bp - mTransformBuf.GetBuffer(); } } *aWordLen = numChars; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBuffer(); PRUnichar* endbp = mTransformBuf.GetBufferEnd(); for (; offset < fragLen; offset++) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) { if (IS_DISCARDED(ch)) { // Keep looping if this is a discarded character continue; } break; } if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ' '; } *aWordLen = bp - mTransformBuf.GetBuffer(); return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanPreData_F(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBuffer(); PRUnichar* endbp = mTransformBuf.GetBufferEnd(); for (; offset < fragLen; offset++) { PRUnichar ch = frag->CharAt(offset); if ((ch == '\t') || (ch == '\n')) { break; } if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ch; } *aWordLen = bp - mTransformBuf.GetBuffer(); return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRUnichar* bp = mTransformBuf.GetBuffer(); PRUnichar* endbp = mTransformBuf.GetBufferEnd(); const unsigned char* cp = (const unsigned char*) frag->Get1b(); const unsigned char* end = cp + frag->GetLength(); cp += mOffset; while (cp < end) { PRUnichar ch = (PRUnichar) *cp++; if ((ch == '\t') || (ch == '\n')) { cp--; break; } if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ch; } *aWordLen = bp - mTransformBuf.GetBuffer(); return cp - ((const unsigned char*)frag->Get1b()); } //---------------------------------------- PRUnichar* nsTextTransformer::GetNextWord(PRBool aInWord, PRInt32* aWordLenResult, PRInt32* aContentLenResult, PRBool* aIsWhiteSpaceResult, PRBool aForLineBreak) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRInt32 wordLen = 0; PRBool isWhitespace = PR_FALSE; PRUnichar* result = nsnull; // Fix word breaking problem w/ PREFORMAT and PREWRAP // for word breaking, we should really go to the normal code if((! aForLineBreak) && (eNormal != mMode)) mMode = eNormal; while (offset < fragLen) { PRUnichar firstChar = frag->CharAt(offset); // Eat up any discarded characters before dispatching if (IS_DISCARDED(firstChar)) { offset++; continue; } switch (mMode) { default: case eNormal: if (XP_IS_SPACE(firstChar)) { offset = ScanNormalWhiteSpace_F(); wordLen = 1; isWhitespace = PR_TRUE; } else if (frag->Is2b()) { offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen); } else { offset = ScanNormalAsciiText_F(&wordLen); } break; case ePreformatted: if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[0] = firstChar; offset++; wordLen = 1; isWhitespace = PR_TRUE; } else if (frag->Is2b()) { offset = ScanPreData_F(&wordLen); } else { offset = ScanPreAsciiData_F(&wordLen); } break; case ePreWrap: if (XP_IS_SPACE(firstChar)) { if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[0] = firstChar; offset++; wordLen = 1; } else { offset = ScanPreWrapWhiteSpace_F(&wordLen); } isWhitespace = PR_TRUE; } else if (frag->Is2b()) { offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen); } else { offset = ScanNormalAsciiText_F(&wordLen); } break; } result = mTransformBuf.GetBuffer(); if (!isWhitespace) { switch (mTextTransform) { case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: gCaseConv->ToTitle(result, result, wordLen, !aInWord); break; case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: gCaseConv->ToLower(result, result, wordLen); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: gCaseConv->ToUpper(result, result, wordLen); break; } } break; } *aWordLenResult = wordLen; *aContentLenResult = offset - mOffset; *aIsWhiteSpaceResult = isWhitespace; mOffset = offset; return result; } //---------------------------------------------------------------------- // wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanNormalWhiteSpace_B() { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch)) { // If character is not discardable then stop looping, otherwise // let the discarded character collapse with the other spaces. if (!IS_DISCARDED(ch)) { break; } } } mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' '; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBufferEnd(); PRUnichar* startbp = mTransformBuf.GetBuffer(); while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if (XP_IS_SPACE(ch)) { break; } if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; if (bp == startbp) { PRInt32 oldLength = mTransformBuf.mBufferLen; nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBufferEnd() - oldLength; startbp = mTransformBuf.GetBuffer(); } *--bp = ch; } *aWordLen = mTransformBuf.GetBufferEnd() - bp; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak, PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; const PRUnichar* cp0 = frag->Get2b(); PRInt32 offset = mOffset - 1; PRUnichar firstChar = frag->CharAt(offset); if (CH_NBSP == firstChar) { firstChar = ' '; } mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar; if (firstChar > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; PRInt32 numChars = 1; if (offset > 0) { const PRUnichar* cp = cp0 + offset; PRBool breakBetween = PR_FALSE; if (aForLineBreak) { mLineBreaker->BreakInBetween(cp0, offset + 1, mTransformBuf.GetBufferEnd()-1, 1, &breakBetween); } else { mWordBreaker->BreakInBetween(cp0, offset + 1, mTransformBuf.GetBufferEnd()-1, 1, &breakBetween); } if (!breakBetween) { // Find next position PRBool tryPrevFrag; PRUint32 prev; if (aForLineBreak) { mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag); } else { mWordBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag); } numChars = (PRInt32) ((PRUint32) offset - prev) + 1; // Grow buffer before copying nsresult rv = mTransformBuf.GrowTo(numChars); if (NS_FAILED(rv)) { numChars = mTransformBuf.GetBufferLength(); } // 1. convert nbsp into space // 2. check mHasMultibyte flag // 3. copy buffer PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1; const PRUnichar* end = cp - numChars + 1; while (cp > end) { PRUnichar ch = *--cp; if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; *--bp = ch; } // Recompute offset and numChars in case we stripped something offset = offset - numChars; numChars = mTransformBuf.GetBufferEnd() - bp; } } *aWordLen = numChars; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBufferEnd(); PRUnichar* startbp = mTransformBuf.GetBuffer(); while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) { // Keep looping if this is a discarded character if (IS_DISCARDED(ch)) { continue; } break; } if (bp == startbp) { PRInt32 oldLength = mTransformBuf.mBufferLen; nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBufferEnd() - oldLength; startbp = mTransformBuf.GetBuffer(); } *--bp = ' '; } *aWordLen = mTransformBuf.GetBufferEnd() - bp; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanPreData_B(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBufferEnd(); PRUnichar* startbp = mTransformBuf.GetBuffer(); while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if ((ch == '\t') || (ch == '\n')) { break; } if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE; if (bp == startbp) { PRInt32 oldLength = mTransformBuf.mBufferLen; nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input offset++; break; } bp = mTransformBuf.GetBufferEnd() - oldLength; startbp = mTransformBuf.GetBuffer(); } *--bp = ch; } *aWordLen = mTransformBuf.GetBufferEnd() - bp; return offset; } //---------------------------------------- PRUnichar* nsTextTransformer::GetPrevWord(PRBool aInWord, PRInt32* aWordLenResult, PRInt32* aContentLenResult, PRBool* aIsWhiteSpaceResult, PRBool aForLineBreak) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRInt32 wordLen = 0; PRBool isWhitespace = PR_FALSE; PRUnichar* result = nsnull; // Fix word breaking problem w/ PREFORMAT and PREWRAP // for word breaking, we should really go to the normal code if((! aForLineBreak) && (eNormal != mMode)) mMode = eNormal; while (--offset >= 0) { PRUnichar firstChar = frag->CharAt(offset); // Eat up any discarded characters before dispatching if (IS_DISCARDED(firstChar)) { continue; } switch (mMode) { default: case eNormal: if (XP_IS_SPACE(firstChar)) { offset = ScanNormalWhiteSpace_B(); wordLen = 1; isWhitespace = PR_TRUE; } else if (frag->Is2b()) { offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen); } else { offset = ScanNormalAsciiText_B(&wordLen); } break; case ePreformatted: if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar; offset--; // make sure we overshoot wordLen = 1; isWhitespace = PR_TRUE; } else { offset = ScanPreData_B(&wordLen); } break; case ePreWrap: if (XP_IS_SPACE(firstChar)) { if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar; offset--; // make sure we overshoot wordLen = 1; } else { offset = ScanPreWrapWhiteSpace_B(&wordLen); } isWhitespace = PR_TRUE; } else if (frag->Is2b()) { offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen); } else { offset = ScanNormalAsciiText_B(&wordLen); } break; } // Backwards scanning routines *always* overshoot by one for the // returned offset value. offset = offset + 1; result = mTransformBuf.GetBufferEnd() - wordLen; if (!isWhitespace) { switch (mTextTransform) { case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: gCaseConv->ToTitle(result, result, wordLen, !aInWord); break; case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: gCaseConv->ToLower(result, result, wordLen); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: gCaseConv->ToUpper(result, result, wordLen); break; } } break; } *aWordLenResult = wordLen; *aContentLenResult = mOffset - offset; *aIsWhiteSpaceResult = isWhitespace; mOffset = offset; return result; } //---------------------------------------------------------------------- // Self test logic for this class. This will (hopefully) make sure // that the forward and backward word iterator methods continue to // function as people change things... #ifdef DEBUG struct SelfTestSection { int length; int* data; }; #define NUM_MODES 3 struct SelfTestData { const PRUnichar* text; SelfTestSection modes[NUM_MODES]; }; static PRUint8 preModeValue[NUM_MODES] = { NS_STYLE_WHITESPACE_NORMAL, NS_STYLE_WHITESPACE_PRE, NS_STYLE_WHITESPACE_MOZ_PRE_WRAP }; static PRUnichar test1text[] = { 'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t', 'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0 }; static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 }; static int test1PreResults[] = { 9, 1, 12 }; static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 }; static PRUnichar test2text[] = { 0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t', 0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0 }; static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 }; static int test2PreResults[] = { 9, 1, 13 }; static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 }; static PRUnichar test3text[] = { 0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0 }; static int test3Results[] = { 4, 1, 2, 1, 2, 1, }; static int test3PreResults[] = { 7, 1, 3, }; static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, }; static PRUnichar test4text[] = { 'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t', 'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0 }; static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 }; static int test4PreResults[] = { 10, 1, 12 }; static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 }; static PRUnichar test5text[] = { CH_SHY, 0 }; static int test5Results[] = { 0 }; static int test5PreResults[] = { 0 }; static int test5PreWrapResults[] = { 0 }; #if 0 static PRUnichar test6text[] = { 0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d, 0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!', '\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0, 0x30da, 0x30fc, 0x30b8, 0x3002, 0 }; static int test6Results[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 4, 1, 5, 1, 2, 1, 2, 2 }; static int test6PreResults[] = { 20, 1, 13 }; static int test6PreWrapResults[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 4, 1, 5, 1, 2, 1, 2, 2 }; #endif static SelfTestData tests[] = { { test1text, { { sizeof(test1Results)/sizeof(int), test1Results, }, { sizeof(test1PreResults)/sizeof(int), test1PreResults, }, { sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } } }, { test2text, { { sizeof(test2Results)/sizeof(int), test2Results, }, { sizeof(test2PreResults)/sizeof(int), test2PreResults, }, { sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } } }, { test3text, { { sizeof(test3Results)/sizeof(int), test3Results, }, { sizeof(test3PreResults)/sizeof(int), test3PreResults, }, { sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } } }, { test4text, { { sizeof(test4Results)/sizeof(int), test4Results, }, { sizeof(test4PreResults)/sizeof(int), test4PreResults, }, { sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } } }, { test5text, { { sizeof(test5Results)/sizeof(int), test5Results, }, { sizeof(test5PreResults)/sizeof(int), test5PreResults, }, { sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } } }, #if 0 { test6text, { { sizeof(test6Results)/sizeof(int), test6Results, }, { sizeof(test6PreResults)/sizeof(int), test6PreResults, }, { sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } } }, #endif }; #define NUM_TESTS (sizeof(tests) / sizeof(tests[0])) void nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker, nsIWordBreaker* aWordBreaker) { PRBool gNoisy = PR_FALSE; if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) { gNoisy = PR_TRUE; } PRBool error = PR_FALSE; PRInt32 testNum = 0; SelfTestData* st = tests; SelfTestData* last = st + NUM_TESTS; for (; st < last; st++) { PRUnichar* bp; PRInt32 wordLen, contentLen; PRBool ws; PRBool isAsciiTest = PR_TRUE; const PRUnichar* cp = st->text; while (*cp) { if (*cp > 255) { isAsciiTest = PR_FALSE; break; } cp++; } nsTextFragment frag(st->text); nsTextTransformer tx(aLineBreaker, aWordBreaker); for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) { // Do forwards test if (gNoisy) { nsAutoString uc2(st->text); printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode"); fputs(uc2, stdout); printf("'\n"); } tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE); int* expectedResults = st->modes[preMode].data; int resultsLen = st->modes[preMode].length; while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws))) { if (gNoisy) { nsAutoString tmp(bp, wordLen); printf(" '"); fputs(tmp, stdout); printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n", ws ? "yes" : "no", wordLen, *expectedResults, contentLen, tx.mOffset); } if (*expectedResults != wordLen) { error = PR_TRUE; break; } expectedResults++; } if (expectedResults != st->modes[preMode].data + resultsLen) { if (st->modes[preMode].data[0] != 0) { error = PR_TRUE; } } // Do backwards test if (gNoisy) { nsAutoString uc2(st->text); printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode"); fputs(uc2, stdout); printf("'\n"); } tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL, NS_STYLE_TEXT_TRANSFORM_NONE); expectedResults = st->modes[preMode].data + resultsLen; while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) { --expectedResults; if (gNoisy) { nsAutoString tmp(bp, wordLen); printf(" '"); fputs(tmp, stdout); printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n", ws ? "yes" : "no", wordLen, contentLen, tx.mOffset); } if (*expectedResults != wordLen) { error = PR_TRUE; break; } } if (expectedResults != st->modes[preMode].data) { if (st->modes[preMode].data[0] != 0) { error = PR_TRUE; } } if (error) { fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum); } testNum++; } } if (error) { NS_ABORT(); } } nsresult nsTextTransformer::Init2(const nsTextFragment* aFrag, PRInt32 aStartingOffset, PRUint8 aWhiteSpace, PRUint8 aTextTransform) { mFrag = aFrag; // Sanitize aStartingOffset if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) { aStartingOffset = 0; } else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(), "bad starting offset")) { aStartingOffset = mFrag->GetLength(); } mOffset = aStartingOffset; // Get the frames text style information if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) { mMode = ePreformatted; } else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) { mMode = ePreWrap; } mTextTransform = aTextTransform; return NS_OK; } #endif /* DEBUG */