Adding a new interface which does a charset conversion plus NE and NCR for fallback. Part of bug fix #8865, r=ftang.
git-svn-id: svn://10.0.0.236/trunk@50374 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
parent
a30fed502d
commit
59ee43f7fc
59
mozilla/intl/unicharutil/idl/nsISaveAsCharset.idl
Normal file
59
mozilla/intl/unicharutil/idl/nsISaveAsCharset.idl
Normal file
@ -0,0 +1,59 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "nsISupports.idl"
|
||||
#include "nsIEntityConverter.idl"
|
||||
|
||||
%{C++
|
||||
#define NS_SAVEASCHARSET_CID { 0xcd233e0, 0x7a86, 0x11d3, { 0x91, 0x5c, 0x0, 0x60, 0x8, 0xa6, 0xed, 0xf6 } }
|
||||
#define NS_SAVEASCHARSET_PROGID "component://netscape/intl/saveascharset"
|
||||
%}
|
||||
|
||||
[scriptable, uuid(33B87F70-7A9C-11d3-915C-006008A6EDF6)]
|
||||
interface nsISaveAsCharset : nsISupports
|
||||
{
|
||||
// attributes
|
||||
const unsigned long mask_Fallback = 0x000000FF; // mask for fallback (8bits)
|
||||
const unsigned long mask_Entity = 0x00000300; // mask for entity (2bits)
|
||||
|
||||
const unsigned long attr_FallbackNone = 0; // no fall back for unconverted chars (skipped)
|
||||
const unsigned long attr_FallbackQuestionMark = 1; // unconverted chars are replaced by '?'
|
||||
const unsigned long attr_FallbackEscapeU = 2; // unconverted chars are escaped as \uxxxx
|
||||
const unsigned long attr_FallbackDecimalNCR = 3; // unconverted chars are replaced by decimal NCR
|
||||
const unsigned long attr_FallbackHexNCR = 4; // unconverted chars are replaced by hex NCR
|
||||
|
||||
const unsigned long attr_EntityNone = 0; // generate no Named Entity
|
||||
const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // generate Named Entity before charset conversion
|
||||
const unsigned long attr_EntityAfterCharsetConv = 0x00000200; // generate Named Entity after charset conversion
|
||||
|
||||
// default attribute for plain text
|
||||
const unsigned long attr_plainTextDefault = attr_FallbackNone + attr_EntityNone;
|
||||
|
||||
// default attribute for html text
|
||||
// generate entity before charset conversion, use decimal NCR
|
||||
const unsigned long attr_htmlTextDefault = attr_FallbackDecimalNCR + attr_EntityBeforeCharsetConv;
|
||||
|
||||
// set up charset, attribute and entity version
|
||||
// see nsIEntityConverter.idl for possible value of entityVersion (entityNone for plain text).
|
||||
void Init(in string charset, in unsigned long attr, in unsigned long entityVersion);
|
||||
|
||||
// convert UCS-2 html to target charset
|
||||
// may return the result code of the unicode converter (NS_ERROR_UENC_NOMAPPING)
|
||||
// if the attribute does not specify any fall back (e.g. attrPlainTextDefault)
|
||||
string Convert(in wstring inString);
|
||||
};
|
||||
346
mozilla/intl/unicharutil/src/nsSaveAsCharset.cpp
Normal file
346
mozilla/intl/unicharutil/src/nsSaveAsCharset.cpp
Normal file
@ -0,0 +1,346 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#define NS_IMPL_IDS
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
#undef NS_IMPL_IDS
|
||||
|
||||
#include "prmem.h"
|
||||
#include "prprf.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsIComponentManager.h"
|
||||
#include "nsSaveAsCharset.h"
|
||||
|
||||
//
|
||||
// guids
|
||||
//
|
||||
static NS_DEFINE_IID(kISaveAsCharsetIID, NS_ISAVEASCHARSET_IID);
|
||||
static NS_DEFINE_IID(kIFactoryIID, NS_IFACTORY_IID);
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
static NS_DEFINE_CID(kEntityConverterCID, NS_ENTITYCONVERTER_CID);
|
||||
|
||||
|
||||
//
|
||||
// nsISupports methods
|
||||
//
|
||||
NS_IMPL_ISUPPORTS(nsSaveAsCharset, kISaveAsCharsetIID)
|
||||
|
||||
|
||||
//
|
||||
// nsSaveAsCharset
|
||||
//
|
||||
nsSaveAsCharset::nsSaveAsCharset()
|
||||
{
|
||||
NS_INIT_REFCNT();
|
||||
|
||||
mAttribute = attr_htmlTextDefault;
|
||||
mEntityVersion = 0;
|
||||
mEncoder = NULL;
|
||||
mEntityConverter = NULL;
|
||||
}
|
||||
|
||||
nsSaveAsCharset::~nsSaveAsCharset()
|
||||
{
|
||||
NS_IF_RELEASE(mEncoder);
|
||||
NS_IF_RELEASE(mEntityConverter);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::Init(const char *charset, PRUint32 attr, PRUint32 entityVersion)
|
||||
{
|
||||
nsresult rv = NS_OK;
|
||||
|
||||
nsString aCharset(charset);
|
||||
mAttribute = attr;
|
||||
mEntityVersion = entityVersion;
|
||||
|
||||
// set up unicode encoder
|
||||
NS_WITH_SERVICE(nsICharsetConverterManager, ccm, kCharsetConverterManagerCID, &rv);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
if (NULL == ccm) return NS_ERROR_FAILURE;
|
||||
|
||||
rv = ccm->GetUnicodeEncoder(&aCharset, &mEncoder);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
if (NULL == mEncoder) return NS_ERROR_FAILURE;
|
||||
|
||||
// set up entity converter
|
||||
if (attr_EntityNone != MASK_ENTITY(mAttribute)) {
|
||||
rv = nsComponentManager::CreateInstance(kEntityConverterCID,
|
||||
NULL,
|
||||
nsCOMTypeInfo<nsIEntityConverter>::GetIID(),
|
||||
(void**)&mEntityConverter);
|
||||
if (NULL == mEntityConverter) return NS_ERROR_FAILURE;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::Convert(const PRUnichar *inString, char **_retval)
|
||||
{
|
||||
if (nsnull == _retval)
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
nsresult rv;
|
||||
|
||||
if (NULL == mEncoder) return NS_ERROR_FAILURE; // need to call Init() before Convert()
|
||||
|
||||
if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
|
||||
if (NULL == mEntityConverter) return NS_ERROR_FAILURE;
|
||||
PRUnichar *entity = NULL;
|
||||
// do the entity conversion first
|
||||
rv = DoEntityConversion(inString, &entity);
|
||||
if(NS_SUCCEEDED(rv) && NULL != entity) {
|
||||
rv = DoCharsetConversion(entity, _retval);
|
||||
nsAllocator::Free(entity);
|
||||
}
|
||||
}
|
||||
else {
|
||||
rv = DoCharsetConversion(inString, _retval);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// do the fallback, reallocate the buffer if necessary
|
||||
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::HandleFallBack(PRUnichar character, char **outString, PRInt32 *bufferLength,
|
||||
PRInt32 *currentPos, PRInt32 estimatedLength)
|
||||
{
|
||||
if((nsnull == outString ) || (nsnull == bufferLength) ||(nsnull ==currentPos))
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
char fallbackStr[256];
|
||||
nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
PRInt32 tempLen = (PRInt32) PL_strlen(fallbackStr);
|
||||
|
||||
// reallocate if the buffer is not large enough
|
||||
if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
|
||||
char *temp = (char *) PR_Realloc(*outString, *bufferLength + tempLen);
|
||||
if (NULL != temp) {
|
||||
// adjust length/pointer after realloc
|
||||
*bufferLength += tempLen;
|
||||
*outString = temp;
|
||||
} else {
|
||||
*outString = NULL;
|
||||
*bufferLength =0;
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
}
|
||||
nsCRT::memcpy((*outString + *currentPos), fallbackStr, tempLen);
|
||||
*currentPos += tempLen;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
|
||||
{
|
||||
if(nsnull == outString )
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
NS_ASSERTION(outString, "invalid input");
|
||||
|
||||
*outString = NULL;
|
||||
|
||||
nsresult rv;
|
||||
PRInt32 inStringLength = nsCRT::strlen(inString); // original input string length
|
||||
PRInt32 bufferLength; // allocated buffer length
|
||||
PRInt32 srcLength = inStringLength;
|
||||
PRInt32 dstLength;
|
||||
char *dstPtr = NULL;
|
||||
nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
|
||||
|
||||
// estimate and allocate the target buffer (reserve extra memory for fallback)
|
||||
rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
bufferLength = dstLength + 512; // reserve 512 byte for fallback.
|
||||
dstPtr = (char *) PR_Malloc(bufferLength);
|
||||
if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
|
||||
for (PRInt32 pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
|
||||
// convert from unicode
|
||||
dstLength = bufferLength - pos2;
|
||||
rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
|
||||
|
||||
pos1 += srcLength;
|
||||
pos2 += dstLength;
|
||||
dstPtr[pos2] = '\0';
|
||||
|
||||
// break: this is usually the case (no error) OR unrecoverable error
|
||||
if (NS_ERROR_UENC_NOMAPPING != rv) break;
|
||||
|
||||
// remember this happened and reset the result
|
||||
saveResult = rv;
|
||||
rv = NS_OK;
|
||||
|
||||
// reset the encoder, estimate target length again
|
||||
mEncoder->Reset();
|
||||
srcLength = inStringLength - pos1;
|
||||
|
||||
// do the fallback
|
||||
if (!ATTR_NO_FALLBACK(mAttribute)) {
|
||||
PRUnichar unMappedChar = inString[pos1];
|
||||
|
||||
rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
|
||||
if (NS_FAILED(rv))
|
||||
break;
|
||||
|
||||
rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
|
||||
if (NS_FAILED(rv))
|
||||
break;
|
||||
dstPtr[pos2] = '\0';
|
||||
}
|
||||
pos1++; // for the unmapped char
|
||||
}
|
||||
|
||||
if (NS_FAILED(rv)) {
|
||||
PR_FREEIF(dstPtr);
|
||||
return rv;
|
||||
}
|
||||
|
||||
*outString = dstPtr; // set the result string
|
||||
|
||||
// set error code so that the caller can do own fall back
|
||||
if ((NS_ERROR_UENC_NOMAPPING == saveResult) && ATTR_NO_FALLBACK(mAttribute)) {
|
||||
rv = NS_ERROR_UENC_NOMAPPING;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::DoEntityConversion(const PRUnichar *inString, PRUnichar **outString)
|
||||
{
|
||||
NS_ASSERTION(outString, "invalid input");
|
||||
if(nsnull == outString)
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
|
||||
*outString = NULL;
|
||||
|
||||
nsresult rv;
|
||||
|
||||
nsString aString(inString), tempString("");
|
||||
|
||||
for (PRInt32 i = 0; i < aString.Length(); i++) {
|
||||
PRUnichar *entity = NULL;
|
||||
rv = mEntityConverter->ConvertToEntity(inString[i], &entity);
|
||||
if (NS_SUCCEEDED(rv) && NULL != entity) {
|
||||
tempString.Append(entity);
|
||||
nsAllocator::Free(entity);
|
||||
}
|
||||
else {
|
||||
tempString.Append(inString[i]);
|
||||
}
|
||||
}
|
||||
*outString = tempString.ToNewUnicode();
|
||||
if(nsnull == outString)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
else
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::DoEntityConversion(PRUnichar inCharacter, char *outString, PRInt32 bufferLength)
|
||||
{
|
||||
NS_ASSERTION(outString, "invalid input");
|
||||
if(nsnull == outString)
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
|
||||
*outString = NULL;
|
||||
|
||||
PRUnichar *entity = NULL;
|
||||
nsresult rv;
|
||||
|
||||
rv = mEntityConverter->ConvertToEntity(inCharacter, &entity);
|
||||
if (NS_SUCCEEDED(rv) && NULL != entity) {
|
||||
nsString temp(entity);
|
||||
nsAllocator::Free(entity);
|
||||
|
||||
if (NULL == temp.ToCString(outString, bufferLength))
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsSaveAsCharset::DoConversionFallBack(PRUnichar inCharacter, char *outString, PRInt32 bufferLength)
|
||||
{
|
||||
NS_ASSERTION(outString, "invalid input");
|
||||
if(nsnull == outString )
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
|
||||
*outString = NULL;
|
||||
|
||||
nsresult rv = NS_OK;
|
||||
|
||||
if (ATTR_NO_FALLBACK(mAttribute)) {
|
||||
return NS_OK;
|
||||
}
|
||||
if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
|
||||
rv = DoEntityConversion(inCharacter, outString, bufferLength);
|
||||
if (NS_SUCCEEDED(rv)) return rv;
|
||||
}
|
||||
|
||||
switch (MASK_FALLBACK(mAttribute)) {
|
||||
case attr_FallbackQuestionMark:
|
||||
if(bufferLength>=2) {
|
||||
*outString++='?';
|
||||
*outString='\0';
|
||||
rv = NS_OK;
|
||||
} else {
|
||||
rv = NS_ERROR_FAILURE;
|
||||
}
|
||||
break;
|
||||
case attr_FallbackEscapeU:
|
||||
rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inCharacter) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
||||
break;
|
||||
case attr_FallbackDecimalNCR:
|
||||
rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inCharacter) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
||||
break;
|
||||
case attr_FallbackHexNCR:
|
||||
rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inCharacter) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
||||
break;
|
||||
case attr_FallbackNone:
|
||||
rv = NS_OK;
|
||||
break;
|
||||
default:
|
||||
rv = NS_ERROR_ILLEGAL_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
nsresult
|
||||
NS_NewSaveAsCharset(nsISupports **inst)
|
||||
{
|
||||
if(nsnull == inst )
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
*inst = (nsISupports *) new nsSaveAsCharset;
|
||||
if (NULL == *inst) return NS_ERROR_OUT_OF_MEMORY;
|
||||
return NS_OK;
|
||||
}
|
||||
78
mozilla/intl/unicharutil/src/nsSaveAsCharset.h
Normal file
78
mozilla/intl/unicharutil/src/nsSaveAsCharset.h
Normal file
@ -0,0 +1,78 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "nsIFactory.h"
|
||||
#include "nsString.h"
|
||||
#include "nsISaveAsCharset.h"
|
||||
|
||||
|
||||
#define MASK_FALLBACK(a) (nsISaveAsCharset::mask_Fallback & (a))
|
||||
#define MASK_ENTITY(a) (nsISaveAsCharset::mask_Entity & (a))
|
||||
#define ATTR_NO_FALLBACK(a) (nsISaveAsCharset::attr_FallbackNone == MASK_FALLBACK(a) && \
|
||||
nsISaveAsCharset::attr_EntityAfterCharsetConv != MASK_ENTITY(a))
|
||||
|
||||
class nsIUnicodeEncoder;
|
||||
class nsIEntityConverter;
|
||||
|
||||
class nsSaveAsCharset : public nsISaveAsCharset
|
||||
{
|
||||
public:
|
||||
|
||||
//
|
||||
// implementation methods
|
||||
//
|
||||
nsSaveAsCharset();
|
||||
virtual ~nsSaveAsCharset();
|
||||
|
||||
//
|
||||
// nsISupports
|
||||
//
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
//
|
||||
// nsIEntityConverter
|
||||
//
|
||||
NS_IMETHOD Init(const char *charset, PRUint32 attr, PRUint32 entityVersion);
|
||||
|
||||
NS_IMETHOD Convert(const PRUnichar *inString, char **_retval);
|
||||
|
||||
protected:
|
||||
|
||||
NS_IMETHOD DoCharsetConversion(const PRUnichar *inString, char **outString);
|
||||
|
||||
NS_IMETHOD DoEntityConversion(const PRUnichar *inString, PRUnichar **outString);
|
||||
|
||||
NS_IMETHOD DoEntityConversion(PRUnichar inCharacter, char *outString, PRInt32 bufferLength);
|
||||
|
||||
NS_IMETHOD DoConversionFallBack(PRUnichar inCharacter, char *outString, PRInt32 bufferLength);
|
||||
|
||||
// do the fallback, reallocate the buffer if necessary
|
||||
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
|
||||
NS_IMETHOD HandleFallBack(PRUnichar character, char **outString, PRInt32 *bufferLength,
|
||||
PRInt32 *currentPos, PRInt32 estimatedLength);
|
||||
|
||||
|
||||
PRUint32 mAttribute; // conversion attribute
|
||||
PRUint32 mEntityVersion; // see nsIEntityConverter
|
||||
nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode)
|
||||
nsIEntityConverter *mEntityConverter;
|
||||
};
|
||||
|
||||
|
||||
nsresult NS_NewSaveAsCharset(nsISupports **inst);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user