Adding a new interface which does a charset conversion plus NE and NCR for fallback. Part of bug fix #8865, r=ftang.

git-svn-id: svn://10.0.0.236/trunk@50374 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
nhotta%netscape.com 1999-10-11 23:47:27 +00:00
parent a30fed502d
commit 59ee43f7fc
3 changed files with 483 additions and 0 deletions

View File

@ -0,0 +1,59 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "nsISupports.idl"
#include "nsIEntityConverter.idl"
%{C++
#define NS_SAVEASCHARSET_CID { 0xcd233e0, 0x7a86, 0x11d3, { 0x91, 0x5c, 0x0, 0x60, 0x8, 0xa6, 0xed, 0xf6 } }
#define NS_SAVEASCHARSET_PROGID "component://netscape/intl/saveascharset"
%}
[scriptable, uuid(33B87F70-7A9C-11d3-915C-006008A6EDF6)]
interface nsISaveAsCharset : nsISupports
{
// attributes
const unsigned long mask_Fallback = 0x000000FF; // mask for fallback (8bits)
const unsigned long mask_Entity = 0x00000300; // mask for entity (2bits)
const unsigned long attr_FallbackNone = 0; // no fall back for unconverted chars (skipped)
const unsigned long attr_FallbackQuestionMark = 1; // unconverted chars are replaced by '?'
const unsigned long attr_FallbackEscapeU = 2; // unconverted chars are escaped as \uxxxx
const unsigned long attr_FallbackDecimalNCR = 3; // unconverted chars are replaced by decimal NCR
const unsigned long attr_FallbackHexNCR = 4; // unconverted chars are replaced by hex NCR
const unsigned long attr_EntityNone = 0; // generate no Named Entity
const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // generate Named Entity before charset conversion
const unsigned long attr_EntityAfterCharsetConv = 0x00000200; // generate Named Entity after charset conversion
// default attribute for plain text
const unsigned long attr_plainTextDefault = attr_FallbackNone + attr_EntityNone;
// default attribute for html text
// generate entity before charset conversion, use decimal NCR
const unsigned long attr_htmlTextDefault = attr_FallbackDecimalNCR + attr_EntityBeforeCharsetConv;
// set up charset, attribute and entity version
// see nsIEntityConverter.idl for possible value of entityVersion (entityNone for plain text).
void Init(in string charset, in unsigned long attr, in unsigned long entityVersion);
// convert UCS-2 html to target charset
// may return the result code of the unicode converter (NS_ERROR_UENC_NOMAPPING)
// if the attribute does not specify any fall back (e.g. attrPlainTextDefault)
string Convert(in wstring inString);
};

View File

@ -0,0 +1,346 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#define NS_IMPL_IDS
#include "nsICharsetConverterManager.h"
#include "nsICharsetAlias.h"
#undef NS_IMPL_IDS
#include "prmem.h"
#include "prprf.h"
#include "nsIServiceManager.h"
#include "nsIComponentManager.h"
#include "nsSaveAsCharset.h"
//
// guids
//
static NS_DEFINE_IID(kISaveAsCharsetIID, NS_ISAVEASCHARSET_IID);
static NS_DEFINE_IID(kIFactoryIID, NS_IFACTORY_IID);
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
static NS_DEFINE_CID(kEntityConverterCID, NS_ENTITYCONVERTER_CID);
//
// nsISupports methods
//
NS_IMPL_ISUPPORTS(nsSaveAsCharset, kISaveAsCharsetIID)
//
// nsSaveAsCharset
//
nsSaveAsCharset::nsSaveAsCharset()
{
NS_INIT_REFCNT();
mAttribute = attr_htmlTextDefault;
mEntityVersion = 0;
mEncoder = NULL;
mEntityConverter = NULL;
}
nsSaveAsCharset::~nsSaveAsCharset()
{
NS_IF_RELEASE(mEncoder);
NS_IF_RELEASE(mEntityConverter);
}
NS_IMETHODIMP
nsSaveAsCharset::Init(const char *charset, PRUint32 attr, PRUint32 entityVersion)
{
nsresult rv = NS_OK;
nsString aCharset(charset);
mAttribute = attr;
mEntityVersion = entityVersion;
// set up unicode encoder
NS_WITH_SERVICE(nsICharsetConverterManager, ccm, kCharsetConverterManagerCID, &rv);
if (NS_FAILED(rv)) return rv;
if (NULL == ccm) return NS_ERROR_FAILURE;
rv = ccm->GetUnicodeEncoder(&aCharset, &mEncoder);
if (NS_FAILED(rv)) return rv;
if (NULL == mEncoder) return NS_ERROR_FAILURE;
// set up entity converter
if (attr_EntityNone != MASK_ENTITY(mAttribute)) {
rv = nsComponentManager::CreateInstance(kEntityConverterCID,
NULL,
nsCOMTypeInfo<nsIEntityConverter>::GetIID(),
(void**)&mEntityConverter);
if (NULL == mEntityConverter) return NS_ERROR_FAILURE;
}
return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::Convert(const PRUnichar *inString, char **_retval)
{
if (nsnull == _retval)
return NS_ERROR_NULL_POINTER;
nsresult rv;
if (NULL == mEncoder) return NS_ERROR_FAILURE; // need to call Init() before Convert()
if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
if (NULL == mEntityConverter) return NS_ERROR_FAILURE;
PRUnichar *entity = NULL;
// do the entity conversion first
rv = DoEntityConversion(inString, &entity);
if(NS_SUCCEEDED(rv) && NULL != entity) {
rv = DoCharsetConversion(entity, _retval);
nsAllocator::Free(entity);
}
}
else {
rv = DoCharsetConversion(inString, _retval);
}
return rv;
}
/////////////////////////////////////////////////////////////////////////////////////////
// do the fallback, reallocate the buffer if necessary
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
NS_IMETHODIMP
nsSaveAsCharset::HandleFallBack(PRUnichar character, char **outString, PRInt32 *bufferLength,
PRInt32 *currentPos, PRInt32 estimatedLength)
{
if((nsnull == outString ) || (nsnull == bufferLength) ||(nsnull ==currentPos))
return NS_ERROR_NULL_POINTER;
char fallbackStr[256];
nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
if (NS_SUCCEEDED(rv)) {
PRInt32 tempLen = (PRInt32) PL_strlen(fallbackStr);
// reallocate if the buffer is not large enough
if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
char *temp = (char *) PR_Realloc(*outString, *bufferLength + tempLen);
if (NULL != temp) {
// adjust length/pointer after realloc
*bufferLength += tempLen;
*outString = temp;
} else {
*outString = NULL;
*bufferLength =0;
return NS_ERROR_OUT_OF_MEMORY;
}
}
nsCRT::memcpy((*outString + *currentPos), fallbackStr, tempLen);
*currentPos += tempLen;
}
return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
{
if(nsnull == outString )
return NS_ERROR_NULL_POINTER;
NS_ASSERTION(outString, "invalid input");
*outString = NULL;
nsresult rv;
PRInt32 inStringLength = nsCRT::strlen(inString); // original input string length
PRInt32 bufferLength; // allocated buffer length
PRInt32 srcLength = inStringLength;
PRInt32 dstLength;
char *dstPtr = NULL;
nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
// estimate and allocate the target buffer (reserve extra memory for fallback)
rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
if (NS_FAILED(rv)) return rv;
bufferLength = dstLength + 512; // reserve 512 byte for fallback.
dstPtr = (char *) PR_Malloc(bufferLength);
if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;
for (PRInt32 pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
// convert from unicode
dstLength = bufferLength - pos2;
rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
pos1 += srcLength;
pos2 += dstLength;
dstPtr[pos2] = '\0';
// break: this is usually the case (no error) OR unrecoverable error
if (NS_ERROR_UENC_NOMAPPING != rv) break;
// remember this happened and reset the result
saveResult = rv;
rv = NS_OK;
// reset the encoder, estimate target length again
mEncoder->Reset();
srcLength = inStringLength - pos1;
// do the fallback
if (!ATTR_NO_FALLBACK(mAttribute)) {
PRUnichar unMappedChar = inString[pos1];
rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
if (NS_FAILED(rv))
break;
rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
if (NS_FAILED(rv))
break;
dstPtr[pos2] = '\0';
}
pos1++; // for the unmapped char
}
if (NS_FAILED(rv)) {
PR_FREEIF(dstPtr);
return rv;
}
*outString = dstPtr; // set the result string
// set error code so that the caller can do own fall back
if ((NS_ERROR_UENC_NOMAPPING == saveResult) && ATTR_NO_FALLBACK(mAttribute)) {
rv = NS_ERROR_UENC_NOMAPPING;
}
return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::DoEntityConversion(const PRUnichar *inString, PRUnichar **outString)
{
NS_ASSERTION(outString, "invalid input");
if(nsnull == outString)
return NS_ERROR_NULL_POINTER;
*outString = NULL;
nsresult rv;
nsString aString(inString), tempString("");
for (PRInt32 i = 0; i < aString.Length(); i++) {
PRUnichar *entity = NULL;
rv = mEntityConverter->ConvertToEntity(inString[i], &entity);
if (NS_SUCCEEDED(rv) && NULL != entity) {
tempString.Append(entity);
nsAllocator::Free(entity);
}
else {
tempString.Append(inString[i]);
}
}
*outString = tempString.ToNewUnicode();
if(nsnull == outString)
return NS_ERROR_OUT_OF_MEMORY;
else
return NS_OK;
}
NS_IMETHODIMP
nsSaveAsCharset::DoEntityConversion(PRUnichar inCharacter, char *outString, PRInt32 bufferLength)
{
NS_ASSERTION(outString, "invalid input");
if(nsnull == outString)
return NS_ERROR_NULL_POINTER;
*outString = NULL;
PRUnichar *entity = NULL;
nsresult rv;
rv = mEntityConverter->ConvertToEntity(inCharacter, &entity);
if (NS_SUCCEEDED(rv) && NULL != entity) {
nsString temp(entity);
nsAllocator::Free(entity);
if (NULL == temp.ToCString(outString, bufferLength))
return NS_ERROR_OUT_OF_MEMORY;
}
return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::DoConversionFallBack(PRUnichar inCharacter, char *outString, PRInt32 bufferLength)
{
NS_ASSERTION(outString, "invalid input");
if(nsnull == outString )
return NS_ERROR_NULL_POINTER;
*outString = NULL;
nsresult rv = NS_OK;
if (ATTR_NO_FALLBACK(mAttribute)) {
return NS_OK;
}
if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
rv = DoEntityConversion(inCharacter, outString, bufferLength);
if (NS_SUCCEEDED(rv)) return rv;
}
switch (MASK_FALLBACK(mAttribute)) {
case attr_FallbackQuestionMark:
if(bufferLength>=2) {
*outString++='?';
*outString='\0';
rv = NS_OK;
} else {
rv = NS_ERROR_FAILURE;
}
break;
case attr_FallbackEscapeU:
rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inCharacter) > 0) ? NS_OK : NS_ERROR_FAILURE;
break;
case attr_FallbackDecimalNCR:
rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inCharacter) > 0) ? NS_OK : NS_ERROR_FAILURE;
break;
case attr_FallbackHexNCR:
rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inCharacter) > 0) ? NS_OK : NS_ERROR_FAILURE;
break;
case attr_FallbackNone:
rv = NS_OK;
break;
default:
rv = NS_ERROR_ILLEGAL_VALUE;
break;
}
return rv;
}
/////////////////////////////////////////////////////////////////////////////////////////
nsresult
NS_NewSaveAsCharset(nsISupports **inst)
{
if(nsnull == inst )
return NS_ERROR_NULL_POINTER;
*inst = (nsISupports *) new nsSaveAsCharset;
if (NULL == *inst) return NS_ERROR_OUT_OF_MEMORY;
return NS_OK;
}

View File

@ -0,0 +1,78 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "nsIFactory.h"
#include "nsString.h"
#include "nsISaveAsCharset.h"
#define MASK_FALLBACK(a) (nsISaveAsCharset::mask_Fallback & (a))
#define MASK_ENTITY(a) (nsISaveAsCharset::mask_Entity & (a))
#define ATTR_NO_FALLBACK(a) (nsISaveAsCharset::attr_FallbackNone == MASK_FALLBACK(a) && \
nsISaveAsCharset::attr_EntityAfterCharsetConv != MASK_ENTITY(a))
class nsIUnicodeEncoder;
class nsIEntityConverter;
class nsSaveAsCharset : public nsISaveAsCharset
{
public:
//
// implementation methods
//
nsSaveAsCharset();
virtual ~nsSaveAsCharset();
//
// nsISupports
//
NS_DECL_ISUPPORTS
//
// nsIEntityConverter
//
NS_IMETHOD Init(const char *charset, PRUint32 attr, PRUint32 entityVersion);
NS_IMETHOD Convert(const PRUnichar *inString, char **_retval);
protected:
NS_IMETHOD DoCharsetConversion(const PRUnichar *inString, char **outString);
NS_IMETHOD DoEntityConversion(const PRUnichar *inString, PRUnichar **outString);
NS_IMETHOD DoEntityConversion(PRUnichar inCharacter, char *outString, PRInt32 bufferLength);
NS_IMETHOD DoConversionFallBack(PRUnichar inCharacter, char *outString, PRInt32 bufferLength);
// do the fallback, reallocate the buffer if necessary
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
NS_IMETHOD HandleFallBack(PRUnichar character, char **outString, PRInt32 *bufferLength,
PRInt32 *currentPos, PRInt32 estimatedLength);
PRUint32 mAttribute; // conversion attribute
PRUint32 mEntityVersion; // see nsIEntityConverter
nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode)
nsIEntityConverter *mEntityConverter;
};
nsresult NS_NewSaveAsCharset(nsISupports **inst);