/* * (C) Copyright The MITRE Corporation 1999 All rights reserved. * * The contents of this file are subject to the Mozilla Public License * Version 1.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * The program provided "as is" without any warranty express or * implied, including the warranty of non-infringement and the implied * warranties of merchantibility and fitness for a particular purpose. * The Copyright owner will not be liable for any damages suffered by * you as a result of using the Program. In no event will the Copyright * owner be liable for any special, indirect or consequential damages or * lost profits even if the Copyright owner has been advised of the * possibility of their occurrence. * * Please see release.txt distributed with this file for more information. * */ // Tom Kneeland (3/17/99) // // Implementation of a simple String class // // Modification History: // Who When What // TK 03/17/99 Created // TK 03/23/99 Released without "lastIndexOf" functions // TK 04/02/99 Added support for 'const' strings, and added // 'operator=' for constant char*. // TK 04/09/99 Overloaded the output operator (<<). Currently it only // supports outputing the String to a C sytle character based // stream. // TK 04/09/99 Provided support for the extraction of the DOM_CHAR // representation of the string. The new method, "toDomChar()" // returns a constant pointer to the internal DOM_CHAR string // buffer. // TK 04/10/99 Added the implementation for appending an array of DOM_CHARs // to a string. It should be noted that a length needs to be // provided in order to determine the length of the source // array. // TK 04/22/99 Fixed a bug where setting a string equal to NULL would cause // a core dump. Also added support for constructing a string // using the NULL identifier. // Modified the output operator (<<) to accept a const String // reference. This eliminates a wasteful copy constructor call. // TK 04/28/99 Modified the clear() method to leave the DOM_CHAR array // in place. // TK 04/28/99 Added 3 new member functions: insert, deleteChars, and // replace. // TK 05/05/99 Added support for implicit integer conversion. This allows // integers to be appended, inserted, and used as replacements // for DOM_CHARs. To support this feature, ConvertInt has been // added which converts the given integer to a string and stores // it in the target. // TK 05/05/99 Converted DOM_CHAR to UNICODE_CHAR // // KV 07/29/1999 Added lastIndexOf methods // KV 07/29/1999 Changed indexOf methods with no offset, to call the // indexOf methods with offset of 0. This allows re-use of // code, makes it easier to debug, and minimizes the size of // the implementation // LF 08/06/1999 In method #operator=, // added line: return *this // KV 08/11/1999 changed charAt to return -1, if index is out of bounds, instead of 0, // since 0, is a valid character, and this makes my code more compatible // with Java // KV 08/11/1999 removed PRBool, uses baseutils.h (MBool) // TK 12/03/1999 Made some of the interface functions virtual, to support // wrapping Mozilla nsStrings in a String interface // TK 12/09/1999 Since "String" can be extended, we can not be certin of its // implementation, therefore any function accepting a String // object as an argument must only deal with its public // interface. The following member functions have been // modified: append, insert, replace, indexOf, isEqual, // lastIndexOf, and subString // // Modified subString(Int32 start, String& dest) to simmply // call subString(Int32 start, Int32 end, String& dest). This // helps with code reuse. // // Made ConvetInt a protected member function so it is // available to classes derrived from String. This is possible // since the implementation of ConvertInt only uses the public // interface of String // // Made UnicodeLength a protected member function since it // only calculates the length of a null terminated UNICODE_CHAR // array. // TK 12/17/1999 To support non-null terminated UNICODE_CHAR* arrays, an // additional insert function has been added that accepts a // length parameter. // // Modified append(const UNICODE_CHAR* source) to simply // calculate the length of the UNICODE_CHAR array, and then // defer its processing to // append(const UNICODE_CHAR* source, Int32 sourceLength) // TK 12/22/1999 Enhanced Trim() to to remove additional "white space" // characters (added \n, \t, and \r). // // TK 02/14/2000 Added a constructon which accepts a UNICODE_CHAR* array, and // its associated length. // // TK 03/10/2000 Fixed a bug found by Bobbi Guarino where // String::indexOf(const String& string...) was not RETURNing // a value. // // TK 03/30/2000 Changed toChar to toCharArray and provided an overloaded // version which will instantiate its own character buffer. #include #include #include "TxString.h" #include // //Default Constructor, create an empty String // String::String() { strBuffer = NULL; bufferLength = 0; strLength = 0; } // //Create an empty String of a specific size // String::String(Int32 initSize) { strBuffer = new UNICODE_CHAR[initSize]; bufferLength = initSize; strLength = 0; } // //Create a copy of the source String //TK 12/09/1999 - To ensure compatibility with sub classes of String, this // constructor has been modified to use String's public // interface only. // String::String(const String& source) { Int32 copyLoop; //Allocate space for the source string strLength = source.length(); //-- modified by kvisco to only use necessay amount of space //-- was: bufferLength = source.bufferLength; bufferLength = strLength; strBuffer = new UNICODE_CHAR[bufferLength]; //Copy the new string data after the old data for (copyLoop=0;copyLoop strLength) { ensureCapacity(totalOffset - strLength); strLength += totalOffset - strLength; } for (replaceLoop=0;replaceLoop strLength) { ensureCapacity(totalOffset - strLength); strLength += totalOffset - strLength; } for (replaceLoop=0;replaceLoop strLength) { ensureCapacity(totalOffset - strLength); strLength += totalOffset - strLength; } for (replaceLoop=0;replaceLoop current length, the string will be extended * and padded with '\0' null characters. Otherwise the String * will be truncated **/ void String::setLength(Int32 length) { setLength(length, '\0'); } //-- setLength /** * Sets the Length of this String, if length is less than 0, it will * be set to 0; if length > current length, the string will be extended * and padded with given pad character. Otherwise the String * will be truncated **/ void String::setLength(Int32 length, UNICODE_CHAR padChar) { if ( length < 0 ) strLength = 0; else if ( length > strLength ) { Int32 diff = length-strLength; ensureCapacity(diff); for ( Int32 i = strLength; i < length; i++ ) strBuffer[i] = padChar; strLength = length; } else strLength = length; } //-- setLength // //Delete the "substring" starting at "offset" and proceeding for "count" number //of characters (or until the end of the string, whichever comes first). // void String::deleteChars(Int32 offset, Int32 count) { Int32 deleteLoop; Int32 offsetCount; offset = offset < 0 ? 0 : offset; offsetCount = offset + count; if (offsetCount < strLength) { for (deleteLoop=0;deleteLoop= 0)) return strBuffer[index]; else return (UNICODE_CHAR)-1; } // //Clear out the string by simply setting the length to zero. The buffer is //left intact. // void String::clear() { strLength = 0; } // //Make sure the buffer has room for 'capacity' UNICODE_CHARS. // void String::ensureCapacity(Int32 capacity) { UNICODE_CHAR* tempStrBuffer = NULL; //Check for the desired capacity Int32 freeSpace = bufferLength - strLength; //(added by kvisco) if (freeSpace < capacity) { //-- modified by kvisco to only add needed capacity, //-- not extra bytes as before //-- old : bufferLength += capacity; bufferLength += capacity - freeSpace; tempStrBuffer = new UNICODE_CHAR[bufferLength]; copyString(tempStrBuffer); //If the old string contained any data, delete it, and save the new. if (strBuffer) delete strBuffer; strBuffer = tempStrBuffer; } } /** * Performs a CASE SENSITIVE search of the string for the first occurence * of 'data'. If found return the index, else return NOT_FOUND. * -- changed by kvisco to call indexOf(UNICODE_CHAR, Int32) **/ Int32 String::indexOf(UNICODE_CHAR data) const { return indexOf(data, 0); } //-- indexOf // //Starting at 'offset' perform a CASE SENSITIVE search of the string looking //for the first occurence of 'data'. If found return the index, else return //NOT_FOUND. If the offset is less than zero, then start at zero. // Int32 String::indexOf(UNICODE_CHAR data, Int32 offset) const { Int32 searchIndex = offset < 0 ? searchIndex = 0 : searchIndex = offset; while (1) { if (searchIndex >= strLength) return NOT_FOUND; else if (strBuffer[searchIndex] == data) return searchIndex; else ++searchIndex; } } //-- indexOf // //Returns the index of the first occurence of data //TK 12/09/1999 - Modified to simply use indexOf(const String&, Int32). // Int32 String::indexOf(const String& data) const { return indexOf(data, 0); } // //Returns the index of the first occurrence of data starting at offset //TK 12/09/1999 - Modified to use the "data" String's public interface to // retreive the Unicode Char buffer when calling isEqual. // This ensures compatibility with classes derrived from String. // Int32 String::indexOf(const String& data, Int32 offset) const { Int32 searchIndex = offset < 0 ? 0 : offset; while (1) { if (searchIndex <= (strLength - data.length())) { if (isEqual(&strBuffer[searchIndex], data.toUnicode(), data.length())) return searchIndex; } else return NOT_FOUND; searchIndex++; } } // //Check for equality between this string, and data //TK 12/09/1999 - Modified to use data.toUnicode() public member function // when working with data's unicode buffer. This ensures // compatibility with derrived classes. // MBool String::isEqual(const String& data) const { if (this == &data) return MB_TRUE; else if (strLength != data.length()) return MB_FALSE; else return isEqual(strBuffer, data.toUnicode(), data.length()); } /** * Returns index of last occurrence of data *
* Added implementation 19990729 (kvisco) **/ Int32 String::lastIndexOf(UNICODE_CHAR data) const { return lastIndexOf(data, strLength-1); } //-- lastIndexOf /** * Returns the index of the last occurrence of data starting at offset *
* Added implementation 19990729 (kvisco) **/ Int32 String::lastIndexOf(UNICODE_CHAR data, Int32 offset) const { if ((offset < 0) || (offset >= strLength)) return NOT_FOUND; Int32 searchIndex = offset; while (searchIndex >= 0) { if (strBuffer[searchIndex] == data) return searchIndex; --searchIndex; } return NOT_FOUND; } //-- lastIndexOf /** * Returns the index of the last occurrence of data *
* Added implementation 19990729 (kvisco) **/ Int32 String::lastIndexOf(const String& data) const { return lastIndexOf(data, strLength-1); } //-- lastIndexOf /** * Returns the index of the last occurrence of data starting at offset *
* Added implementation 19990729 (kvisco) * TK 12/09/1999 - Completed implementation... **/ Int32 String::lastIndexOf(const String& data, Int32 offset) const { Int32 searchIndex; const UNICODE_CHAR* dataStrBuffer = NULL; if ((offset < 0) || (offset >= strLength)) return NOT_FOUND; else { searchIndex = offset; //If there is not enough space between searchIndex and the length of the of //the string for "data" to appear, then there is no reason to search it. if ((strLength - searchIndex) < data.length()) searchIndex = strLength - data.length(); dataStrBuffer = data.toUnicode(); while (searchIndex >= 0) { if (isEqual(&strBuffer[searchIndex], data.toUnicode(), data.length())) return searchIndex; --searchIndex; } } return NOT_FOUND; } // //Returns the length of the String // Int32 String::length() const { return strLength; } // //Returns a subString starting at start //TK 12/09/1999 - Modified to simply use subString(Int32, Int32, String&) // String& String::subString(Int32 start, String& dest) const { return subString(start, strLength, dest); } /** * Returns the subString starting at start and ending at end * Note: the dest String is cleared before use * TK 12/09/1999 - Modified to use the "dest" String's public interface to * ensure compatibility wtih derrived classes. **/ String& String::subString(Int32 start, Int32 end, String& dest) const { Int32 srcLoop; Int32 destLoop = 0; start = start < 0? 0 : start; end = end > strLength? strLength : end; dest.clear(); if ((start < end)) { dest.ensureCapacity(end - start); for (srcLoop=start;srcLoop= 'A') && (strBuffer[conversionLoop] <= 'Z')) strBuffer[conversionLoop] += 32; } } // //Convert String to uppercase // void String::toUpperCase() { Int32 conversionLoop; for (conversionLoop=0;conversionLoop= 'a') && (strBuffer[conversionLoop] <= 'z')) strBuffer[conversionLoop] -= 32; } } // //Trim whitespace from both ends of String // void String::trim() { Int32 trimLoop = strLength - 1; Int32 cutLoop; MBool done = MB_FALSE; //As long as we are not working on an emtpy string, trim from the right //first, so we don't have to move useless spaces when we trim from the left. if (strLength > 0) { while (!done) { switch (strBuffer[trimLoop]) { case ' ' : case '\t' : case '\n' : case '\r' : --strLength; --trimLoop; break; default : done = MB_TRUE; break; } } } //Now, if there are any characters left to the string, Trim to the left. //First count the number of "left" spaces. Then move all characters to the //left by that ammount. if (strLength > 0) { done = MB_FALSE; trimLoop = 0; while (!done) { switch (strBuffer[trimLoop]) { case ' ' : case '\t' : case '\n' : case '\r' : ++trimLoop; break; default : done = MB_TRUE; break; } } if (trimLoop < strLength) { for (cutLoop=trimLoop;cutLoop