diff --git a/mozilla/config/autoconf.mk.in b/mozilla/config/autoconf.mk.in index d6598efada3..1a2fa6c75d2 100644 --- a/mozilla/config/autoconf.mk.in +++ b/mozilla/config/autoconf.mk.in @@ -281,6 +281,9 @@ NSPR_LIBS = @NSPR_LIBS@ LDAP_CFLAGS = @LDAP_CFLAGS@ LDAP_LIBS = @LDAP_LIBS@ +# UNIX98 iconv support +LIBICONV = @LIBICONV@ + # MKSHLIB_FORCE_ALL is used to force the linker to include all object # files present in an archive. MKSHLIB_UNFORCE_ALL reverts the linker # to normal behavior. Makefile's that create shared libraries out of diff --git a/mozilla/configure.in b/mozilla/configure.in index fa800c2c1dc..169c47bcb7f 100644 --- a/mozilla/configure.in +++ b/mozilla/configure.in @@ -448,7 +448,7 @@ MOZ_JS_LIBS='-L$(DIST)/bin -lmozjs' XPCOM_LIBS='-L$(DIST)/bin -lxpcom' MOZ_COMPONENT_NSPR_LIBS='-L$(DIST)/bin $(NSPR_LIBS)' -MOZ_COMPONENT_XPCOM_LIBS='-L$(DIST)/bin -lxpcom' +MOZ_COMPONENT_XPCOM_LIBS='$(XPCOM_LIBS)' _PLATFORM_DEFAULT_TOOLKIT=gtk MOZ_WIDGET_TOOLKIT_LDFLAGS='-lwidget_$(MOZ_WIDGET_TOOLKIT)' @@ -1954,6 +1954,61 @@ AC_CACHE_CHECK( if test "$ac_cv_func_gnu_get_libc_version" = "yes"; then AC_DEFINE(HAVE_GNU_GET_LIBC_VERSION) fi + +case $target_os in + os2*|msvc*|mksnt*|cygwin*|mingw*) + ;; + *) + +AC_CHECK_LIB(iconv, iconv, [_ICONV_LIBS="$_ICONV_LIBS -liconv"], + AC_CHECK_LIB(iconv, libiconv, [_ICONV_LIBS="$_ICONV_LIBS -liconv"])) +_SAVE_LIBS=$LIBS +LIBS="$LIBS $_ICONV_LIBS" +AC_CACHE_CHECK( + [for iconv()], + ac_cv_func_iconv, + [AC_TRY_LINK([ + #include + #include + ], + [ + iconv_t h = iconv_open("", ""); + iconv(h, NULL, NULL, NULL, NULL); + iconv_close(h); + ], + [ac_cv_func_iconv=yes], + [ac_cv_func_iconv=no] + )] + ) +if test "$ac_cv_func_iconv" = "yes"; then + AC_DEFINE(HAVE_ICONV) + XPCOM_LIBS="$XPCOM_LIBS $_ICONV_LIBS" + LIBICONV="$_ICONV_LIBS" + AC_CACHE_CHECK( + [for iconv() with const input], + ac_cv_func_const_iconv, + [AC_TRY_COMPILE([ + #include + ], + [ + const char *input = "testing"; + iconv_t h = iconv_open("", ""); + iconv(h, &input, NULL, NULL, NULL); + iconv_close(h); + ], + [ac_cv_func_const_iconv=yes], + [ac_cv_func_const_iconv=no] + )] + ) + if test "$ac_cv_func_const_iconv" = "yes"; then + AC_DEFINE(HAVE_ICONV_WITH_CONST_INPUT) + fi +fi +LIBS=$_SAVE_LIBS + + ;; +esac + AC_LANG_C dnl Does this platform require array notation to assign to a va_list? @@ -4328,6 +4383,7 @@ AC_SUBST(MOZ_LDAP_XPCOM) AC_SUBST(MOZ_LDAP_XPCOM_EXPERIMENTAL) AC_SUBST(LDAP_CFLAGS) AC_SUBST(LDAP_LIBS) +AC_SUBST(LIBICONV) AC_SUBST(HAVE_XIE) AC_SUBST(MOZ_XIE_LIBS) diff --git a/mozilla/xpcom/build/Makefile.in b/mozilla/xpcom/build/Makefile.in index d9ee981c4f0..7fa704a6576 100644 --- a/mozilla/xpcom/build/Makefile.in +++ b/mozilla/xpcom/build/Makefile.in @@ -110,6 +110,9 @@ ifeq ($(MOZ_OS2_TOOLS),VACPP) OS_LIBS += libconv.lib libuls.lib endif +# UNIX98 iconv support +OS_LIBS += $(LIBICONV) + include $(topsrcdir)/config/rules.mk DEFINES += \ diff --git a/mozilla/xpcom/build/nsXPComInit.cpp b/mozilla/xpcom/build/nsXPComInit.cpp index 0f2c0e1ba95..b93b0815703 100644 --- a/mozilla/xpcom/build/nsXPComInit.cpp +++ b/mozilla/xpcom/build/nsXPComInit.cpp @@ -83,6 +83,7 @@ #include "nsILocalFile.h" #include "nsLocalFile.h" +#include "nsNativeCharsetUtils.h" #include "nsDirectoryService.h" #include "nsDirectoryServiceDefs.h" #include "nsICategoryManager.h" @@ -332,6 +333,7 @@ nsresult NS_COM NS_InitXPCOM2(nsIServiceManager* *result, rv = nsMemoryImpl::Startup(); if (NS_FAILED(rv)) return rv; + NS_StartupNativeCharsetUtils(); NS_StartupLocalFile(); StartupSpecialSystemDirectory(); @@ -585,6 +587,7 @@ nsresult NS_COM NS_ShutdownXPCOM(nsIServiceManager* servMgr) // Shutdown nsLocalFile string conversion NS_ShutdownLocalFile(); + NS_ShutdownNativeCharsetUtils(); // Shutdown the timer thread and all timers that might still be alive before // shutting down the component manager diff --git a/mozilla/xpcom/io/Makefile.in b/mozilla/xpcom/io/Makefile.in index 3d67cb4eb70..f305bcc9c03 100644 --- a/mozilla/xpcom/io/Makefile.in +++ b/mozilla/xpcom/io/Makefile.in @@ -55,6 +55,7 @@ CPPSRCS = \ nsSpecialSystemDirectory.cpp \ nsStorageStream.cpp \ nsUnicharInputStream.cpp \ + nsNativeCharsetUtils.cpp \ $(NULL) #ifneq ($(MOZ_WIDGET_TOOLKIT),os2) diff --git a/mozilla/xpcom/io/nsLocalFileUnix.cpp b/mozilla/xpcom/io/nsLocalFileUnix.cpp index bd8d5f0dd5e..281545850ac 100644 --- a/mozilla/xpcom/io/nsLocalFileUnix.cpp +++ b/mozilla/xpcom/io/nsLocalFileUnix.cpp @@ -65,19 +65,7 @@ #include "nsISimpleEnumerator.h" #include "nsITimelineService.h" -// nl_langinfo support -#ifdef HAVE_NL_TYPES_H -#include -#endif -#ifdef HAVE_NL_LANGINFO -#include -#endif - -// wchar_t support -#include // wctomb/mbtowc on some platforms -#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) -#include // wcrtomb/mbrtowc on some platforms -#endif +#include "nsNativeCharsetUtils.h" // On some platforms file/directory name comparisons need to // be case-blind. @@ -1547,120 +1535,13 @@ NS_NewNativeLocalFile(const nsACString &path, PRBool followSymlinks, nsILocalFil // unicode support //----------------------------------------------------------------------------- -#define TOLERATE_UCONV_FAILURE 1 - -static int -convert_ucs2_to_native(const nsAString &input, char *result, unsigned resultLen) -{ - // this function assumes that |result| is big enough - NS_ASSERTION(resultLen == PATH_MAX, "unexpected resultLen"); -#ifdef HAVE_WCRTOMB - mbstate_t ps = {0}; -#endif - char *cursor = result; - int i = 0; - - nsAString::const_iterator start, end; - input.BeginReading(start); - input.EndReading(end); - PRUint32 size; - - for ( ; start != end; start.advance(size)) { - size = start.size_forward(); - - const PRUnichar *p = start.get(); - - for (PRUint32 j = 0; j < size; ++j, ++p) { -#ifdef HAVE_WCRTOMB - i = (int) wcrtomb(cursor, (wchar_t) *p, &ps); -#else - // XXX is this thread-safe? - i = (int) wctomb(cursor, (wchar_t) *p); -#endif - if (i < 0) { - NS_WARNING("wctomb failed: possible charset mismatch"); -#ifdef TOLERATE_UCONV_FAILURE - *cursor = (unsigned char) *p; // truncate - i = 1; -#else - return -1; -#endif - } - // most likely we're dead anyways if this assertion should fire - NS_ASSERTION(cursor + i <= result + resultLen, "wrote beyond end of string"); - cursor += i; - if (cursor >= result + resultLen - 1) { - cursor = result + resultLen - 1; // fixup cursor - break; - } - } - } - *cursor = '\0'; - return cursor - result; -} - -static int -convert_native_to_ucs2(const char *input, unsigned inputLen, nsAString &result) -{ -#ifdef HAVE_MBRTOWC - mbstate_t ps = {0}; -#endif - PRUnichar *p; - int i, resultLen = 0; - - result.Truncate(); - - // allocate space for largest possible result - result.SetLength(inputLen); - - nsAString::iterator start; - result.BeginWriting(start); - - p = start.get(); - if (!p) { - NS_ERROR("memory allocation failed"); - return -1; - } - - // cannot use wchar_t here since it may have been redefined (e.g., - // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP. - unsigned int tmp = 0; - while (*input) { -#ifdef HAVE_MBRTOWC - i = (int) mbrtowc((wchar_t *) &tmp, input, inputLen, &ps); -#else - // XXX is this thread-safe? - i = (int) mbtowc((wchar_t *) &tmp, input, inputLen); -#endif - if (i < 0) { - NS_WARNING("mbtowc failed: possible charset mismatch"); -#ifdef TOLERATE_UCONV_FAILURE - // truncate and hope for the best - tmp = (unsigned char) *input; - i = 1; -#else - nsMemory::Free(*result); - *result = nsnull; - return -1; -#endif - } - *p = (PRUnichar) tmp; - input += i; - inputLen -= i; - p++; - resultLen++; - } - result.SetLength(resultLen); - return 0; -} - #define SET_UCS(func, ucsArg) \ { \ - char buf[PATH_MAX]; \ - int i = convert_ucs2_to_native(ucsArg, buf, PATH_MAX); \ - if (i == -1) \ - return NS_ERROR_FAILURE; \ - return (func)(nsDependentCString(buf, PRUint32(i))); \ + nsCAutoString buf; \ + nsresult rv = NS_CopyUnicodeToNative(ucsArg, buf); \ + if (NS_FAILED(rv)) \ + return rv; \ + return (func)(buf); \ } #define GET_UCS(func, ucsArg) \ @@ -1668,19 +1549,16 @@ convert_native_to_ucs2(const char *input, unsigned inputLen, nsAString &result) nsCAutoString buf; \ nsresult rv = (func)(buf); \ if (NS_FAILED(rv)) return rv; \ - int i = convert_native_to_ucs2(buf.get(), buf.Length(), ucsArg); \ - if (i == -1) \ - return NS_ERROR_FAILURE; \ - return NS_OK; \ + return NS_CopyNativeToUnicode(buf, ucsArg); \ } #define SET_UCS_2ARGS_2(func, opaqueArg, ucsArg) \ { \ - char buf[PATH_MAX]; \ - int i = convert_ucs2_to_native(ucsArg, buf, PATH_MAX); \ - if (i == -1) \ - return NS_ERROR_FAILURE; \ - return (func)(opaqueArg, nsDependentCString(buf, PRUint32(i))); \ + nsCAutoString buf; \ + nsresult rv = NS_CopyUnicodeToNative(ucsArg, buf); \ + if (NS_FAILED(rv)) \ + return rv; \ + return (func)(opaqueArg, buf); \ } // Unicode interface Wrapper @@ -1712,10 +1590,7 @@ nsLocalFile::SetLeafName(const nsAString &aLeafName) nsresult nsLocalFile::GetPath(nsAString &_retval) { - int i = convert_native_to_ucs2(mPath.get(), mPath.Length(), _retval); - if (i == -1) - return NS_ERROR_FAILURE; - return NS_OK; + return NS_CopyNativeToUnicode(mPath, _retval); } nsresult nsLocalFile::CopyTo(nsIFile *newParentDir, const nsAString &newName) @@ -1740,11 +1615,11 @@ nsLocalFile::GetTarget(nsAString &_retval) nsresult NS_NewLocalFile(const nsAString &path, PRBool followLinks, nsILocalFile* *result) { - char buf[PATH_MAX]; - int i = convert_ucs2_to_native(path, buf, PATH_MAX); - if (i == -1) - return NS_ERROR_FAILURE; - return NS_NewNativeLocalFile(nsDependentCString(buf, PRUint32(i)), followLinks, result); + nsCAutoString buf; + nsresult rv = NS_CopyUnicodeToNative(path, buf); + if (NS_FAILED(rv)) + return rv; + return NS_NewNativeLocalFile(buf, followLinks, result); } //----------------------------------------------------------------------------- @@ -1754,8 +1629,6 @@ NS_NewLocalFile(const nsAString &path, PRBool followLinks, nsILocalFile* *result void nsLocalFile::GlobalInit() { - // need to initialize the locale or else charset conversion will fail. - setlocale(LC_CTYPE, ""); } void diff --git a/mozilla/xpcom/io/nsNativeCharsetUtils.cpp b/mozilla/xpcom/io/nsNativeCharsetUtils.cpp new file mode 100644 index 00000000000..193cfbeade4 --- /dev/null +++ b/mozilla/xpcom/io/nsNativeCharsetUtils.cpp @@ -0,0 +1,772 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2002 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Darin Fisher + * Brian Stell + * Frank Tang + * Brendan Eich + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#if defined(XP_UNIX) + +#include // mbtowc, wctomb +#include // setlocale +#include "nscore.h" +#include "prlock.h" +#include "nsAString.h" + +// +// choose a conversion library. under linux we prefer using wcrtomb/mbrtowc +// to improve performance. other platforms in which wchar_t is unicode might +// benefit from this optimization as well. +// +#if defined(__linux) && defined(HAVE_WCRTOMB) && defined(HAVE_MBRTOWC) +#define USE_STDCONV 1 +#elif defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_NL_LANGINFO) +#define USE_ICONV 1 +#else +#define USE_STDCONV 1 +#endif + +static void +isolatin1_to_ucs2(const char **input, PRUint32 *inputLeft, PRUnichar **output, PRUint32 *outputLeft) +{ + while (*inputLeft && *outputLeft) { + **output = (unsigned char) **input; + (*input)++; + (*inputLeft)--; + (*output)++; + (*outputLeft)--; + } +} + +static void +ucs2_to_isolatin1(const PRUnichar **input, PRUint32 *inputLeft, char **output, PRUint32 *outputLeft) +{ + while (*inputLeft && *outputLeft) { + **output = (unsigned char) **input; + (*input)++; + (*inputLeft)--; + (*output)++; + (*outputLeft)--; + } +} + +//----------------------------------------------------------------------------- +// conversion using iconv +//----------------------------------------------------------------------------- +#if defined(USE_ICONV) +#include // CODESET +#include // nl_langinfo +#include // iconv_open, iconv, iconv_close +#include + +#if defined(HAVE_ICONV_WITH_CONST_INPUT) +#define ICONV_INPUT(x) (x) +#else +#define ICONV_INPUT(x) ((char **)x) +#endif + +// solaris definitely needs this, but we'll enable it by default +// just in case... +#define ENABLE_UTF8_FALLBACK_SUPPORT + +#define INVALID_ICONV_T ((iconv_t) -1) + +static inline size_t +xp_iconv(iconv_t converter, + const char **input, + size_t *inputLeft, + char **output, + size_t *outputLeft) +{ + size_t res, outputAvail = outputLeft ? *outputLeft : 0; + res = iconv(converter, ICONV_INPUT(input), inputLeft, output, outputLeft); + if (res == (size_t) -1) { + // on some platforms (e.g., linux) iconv will fail with + // E2BIG if it cannot convert _all_ of its input. it'll + // still adjust all of the in/out params correctly, so we + // can ignore this error. the assumption is that we will + // be called again to complete the conversion. + if ((errno == E2BIG) && (*outputLeft < outputAvail)) + res = 0; + } + return res; +} + +static inline iconv_t +xp_iconv_open(const char **to_list, const char **from_list) +{ + iconv_t res; + const char **from_name; + const char **to_name; + + // try all possible combinations to locate a converter. + to_name = to_list; + while (*to_name) { + if (**to_name) { + from_name = from_list; + while (*from_name) { + if (**from_name) { + res = iconv_open(*to_name, *from_name); + if (res != INVALID_ICONV_T) + return res; + } + from_name++; + } + } + to_name++; + } + + return INVALID_ICONV_T; +} + +static const char *UCS_2_NAMES[] = { + "UCS-2", + "UCS2", + "UCS_2", + "ucs-2", + "ucs2", + "ucs_2", + NULL +}; + +static const char *UTF_8_NAMES[] = { + "UTF-8", + "UTF8", + "UTF_8", + "utf-8", + "utf8", + "utf_8", + NULL +}; + +static const char *ISO_8859_1_NAMES[] = { + "ISO-8859-1", + "ISO8859-1", + "ISO88591", + "ISO_8859_1", + "ISO8859_1", + "iso-8859-1", + "iso8859-1", + "iso88591", + "iso_8859_1", + "iso8859_1", + NULL +}; + +class nsNativeCharsetConverter +{ +public: + nsNativeCharsetConverter(); + ~nsNativeCharsetConverter(); + + nsresult NativeToUnicode(const char **input , PRUint32 *inputLeft, + PRUnichar **output, PRUint32 *outputLeft); + nsresult UnicodeToNative(const PRUnichar **input , PRUint32 *inputLeft, + char **output, PRUint32 *outputLeft); + + static void GlobalInit(); + static void GlobalShutdown(); + +private: + static iconv_t gNativeToUnicode; + static iconv_t gUnicodeToNative; +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) + static iconv_t gNativeToUTF8; + static iconv_t gUTF8ToNative; + static iconv_t gUnicodeToUTF8; + static iconv_t gUTF8ToUnicode; +#endif + static PRLock *gLock; + static PRBool gInitialized; + + static void LazyInit(); + + static void Lock() { if (gLock) PR_Lock(gLock); } + static void Unlock() { if (gLock) PR_Unlock(gLock); } +}; + +iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T; +iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T; +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) +iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T; +iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T; +iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T; +iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T; +#endif +PRLock *nsNativeCharsetConverter::gLock = nsnull; +PRBool nsNativeCharsetConverter::gInitialized = PR_FALSE; + +void +nsNativeCharsetConverter::LazyInit() +{ + const char *blank_list[] = { "", NULL }; + const char **native_charset_list = blank_list; + const char *native_charset = nl_langinfo(CODESET); + if (native_charset == nsnull) { + NS_ERROR("native charset is unknown"); + // fallback to ISO-8859-1 + native_charset_list = ISO_8859_1_NAMES; + } + else + native_charset_list[0] = native_charset; + + gNativeToUnicode = xp_iconv_open(UCS_2_NAMES, native_charset_list); + gUnicodeToNative = xp_iconv_open(native_charset_list, UCS_2_NAMES); + +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) + if (gNativeToUnicode == INVALID_ICONV_T) { + gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list); + gUTF8ToUnicode = xp_iconv_open(UCS_2_NAMES, UTF_8_NAMES); + NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter"); + NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to ucs-2 converter"); + } + if (gUnicodeToNative == INVALID_ICONV_T) { + gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UCS_2_NAMES); + gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES); + NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no unicode to utf-8 converter"); + NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter"); + } +#else + NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to ucs-2 converter"); + NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no ucs-2 to native converter"); +#endif + + gInitialized = PR_TRUE; +} + +void +nsNativeCharsetConverter::GlobalInit() +{ + gLock = PR_NewLock(); + NS_ASSERTION(gLock, "lock creation failed"); +} + +void +nsNativeCharsetConverter::GlobalShutdown() +{ + if (gLock) { + PR_DestroyLock(gLock); + gLock = nsnull; + } + + if (gNativeToUnicode != INVALID_ICONV_T) { + iconv_close(gNativeToUnicode); + gNativeToUnicode = INVALID_ICONV_T; + } + + if (gUnicodeToNative != INVALID_ICONV_T) { + iconv_close(gUnicodeToNative); + gUnicodeToNative = INVALID_ICONV_T; + } + +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) + if (gNativeToUTF8 != INVALID_ICONV_T) { + iconv_close(gNativeToUTF8); + gNativeToUTF8 = INVALID_ICONV_T; + } + if (gUTF8ToNative != INVALID_ICONV_T) { + iconv_close(gUTF8ToNative); + gUTF8ToNative = INVALID_ICONV_T; + } + if (gUnicodeToUTF8 != INVALID_ICONV_T) { + iconv_close(gUnicodeToUTF8); + gUnicodeToUTF8 = INVALID_ICONV_T; + } + if (gUTF8ToUnicode != INVALID_ICONV_T) { + iconv_close(gUTF8ToUnicode); + gUTF8ToUnicode = INVALID_ICONV_T; + } +#endif + + gInitialized = PR_FALSE; +} + +nsNativeCharsetConverter::nsNativeCharsetConverter() +{ + Lock(); + if (!gInitialized) + LazyInit(); +} + +nsNativeCharsetConverter::~nsNativeCharsetConverter() +{ + // reset converters for next time + if (gNativeToUnicode != INVALID_ICONV_T) + xp_iconv(gNativeToUnicode, NULL, NULL, NULL, NULL); + if (gUnicodeToNative != INVALID_ICONV_T) + xp_iconv(gUnicodeToNative, NULL, NULL, NULL, NULL); +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) + if (gNativeToUTF8 != INVALID_ICONV_T) + xp_iconv(gNativeToUTF8, NULL, NULL, NULL, NULL); + if (gUTF8ToNative != INVALID_ICONV_T) + xp_iconv(gUTF8ToNative, NULL, NULL, NULL, NULL); + if (gUnicodeToUTF8 != INVALID_ICONV_T) + xp_iconv(gUnicodeToUTF8, NULL, NULL, NULL, NULL); + if (gUTF8ToUnicode != INVALID_ICONV_T) + xp_iconv(gUTF8ToUnicode, NULL, NULL, NULL, NULL); +#endif + Unlock(); +} + +nsresult +nsNativeCharsetConverter::NativeToUnicode(const char **input, + PRUint32 *inputLeft, + PRUnichar **output, + PRUint32 *outputLeft) +{ + size_t res = 0; + size_t inLeft = (size_t) *inputLeft; + size_t outLeft = (size_t) *outputLeft * 2; + + if (gNativeToUnicode != INVALID_ICONV_T) { + + res = xp_iconv(gNativeToUnicode, input, &inLeft, (char **) output, &outLeft); + + if (res != (size_t) -1) { + *inputLeft = inLeft; + *outputLeft = outLeft / 2; + return NS_OK; + } + + NS_WARNING("conversion from native to ucs-2 failed"); + + // reset converter + xp_iconv(gNativeToUnicode, NULL, NULL, NULL, NULL); + } +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) + else if ((gNativeToUTF8 != INVALID_ICONV_T) && + (gUTF8ToUnicode != INVALID_ICONV_T)) { + // convert first to UTF8, then from UTF8 to UCS2 + const char *in = *input; + + char ubuf[1024]; + + // we assume we're always called with enough space in |output|, + // so convert many chars at a time... + while (inLeft) { + char *p = ubuf; + size_t n = sizeof(ubuf); + res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n); + if (res == (size_t) -1) { + NS_ERROR("conversion from native to utf-8 failed"); + break; + } + NS_ASSERTION(outLeft > 0, "bad assumption"); + p = ubuf; + n = sizeof(ubuf) - n; + res = xp_iconv(gUTF8ToUnicode, (const char **) &p, &n, (char **) output, &outLeft); + if (res == (size_t) -1) { + NS_ERROR("conversion from utf-8 to ucs-2 failed"); + break; + } + } + + if (res != (size_t) -1) { + (*input) += (*inputLeft - inLeft); + *inputLeft = inLeft; + *outputLeft = outLeft / 2; + return NS_OK; + } + + // reset converters + xp_iconv(gNativeToUTF8, NULL, NULL, NULL, NULL); + xp_iconv(gUTF8ToUnicode, NULL, NULL, NULL, NULL); + } +#endif + + // fallback: zero-pad and hope for the best + isolatin1_to_ucs2(input, inputLeft, output, outputLeft); + + return NS_OK; +} + +nsresult +nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input, + PRUint32 *inputLeft, + char **output, + PRUint32 *outputLeft) +{ + size_t res = 0; + size_t inLeft = (size_t) *inputLeft * 2; + size_t outLeft = (size_t) *outputLeft; + + if (gUnicodeToNative != INVALID_ICONV_T) { + res = xp_iconv(gUnicodeToNative, (const char **) input, &inLeft, output, &outLeft); + + if (res != (size_t) -1) { + *inputLeft = inLeft / 2; + *outputLeft = outLeft; + return NS_OK; + } + + NS_ERROR("iconv failed"); + + // reset converter + xp_iconv(gUnicodeToNative, NULL, NULL, NULL, NULL); + } +#if defined(ENABLE_UTF8_FALLBACK_SUPPORT) + else if ((gUnicodeToUTF8 != INVALID_ICONV_T) && + (gUTF8ToNative != INVALID_ICONV_T)) { + const char *in = (const char *) *input; + + char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes) + + // convert one uchar at a time... + while (inLeft && outLeft) { + char *p = ubuf; + size_t n = sizeof(ubuf), one_uchar = sizeof(PRUnichar); + res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n); + if (res == (size_t) -1) { + NS_ERROR("conversion from ucs-2 to utf-8 failed"); + break; + } + p = ubuf; + n = sizeof(ubuf) - n; + res = xp_iconv(gUTF8ToNative, (const char **) &p, &n, output, &outLeft); + if (res == (size_t) -1) { + if (errno == E2BIG) { + // not enough room for last uchar... back up and return. + in -= sizeof(PRUnichar); + res = 0; + } + else + NS_ERROR("conversion from utf-8 to native failed"); + break; + } + inLeft -= sizeof(PRUnichar); + } + + if (res != (size_t) -1) { + (*input) += (*inputLeft - inLeft/2); + *inputLeft = inLeft/2; + *outputLeft = outLeft; + return NS_OK; + } + + // reset converters + xp_iconv(gUnicodeToUTF8, NULL, NULL, NULL, NULL); + xp_iconv(gUTF8ToNative, NULL, NULL, NULL, NULL); + } +#endif + + // fallback: truncate and hope for the best + ucs2_to_isolatin1(input, inputLeft, output, outputLeft); + + return NS_OK; +} + +#endif // USE_ICONV + +//----------------------------------------------------------------------------- +// conversion using mb[r]towc/wc[r]tomb +//----------------------------------------------------------------------------- +#if defined(USE_STDCONV) +#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) +#include // mbrtowc, wcrtomb +#endif + +class nsNativeCharsetConverter +{ +public: + nsNativeCharsetConverter(); + + nsresult NativeToUnicode(const char **input , PRUint32 *inputLeft, + PRUnichar **output, PRUint32 *outputLeft); + nsresult UnicodeToNative(const PRUnichar **input , PRUint32 *inputLeft, + char **output, PRUint32 *outputLeft); + + static void GlobalInit(); + static void GlobalShutdown() { } + +private: + static PRBool gWCharIsUnicode; + +#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) + mbstate_t ps; +#endif +}; + +PRBool nsNativeCharsetConverter::gWCharIsUnicode = PR_FALSE; + +nsNativeCharsetConverter::nsNativeCharsetConverter() +{ +#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) + memset(&ps, 0, sizeof(ps)); +#endif +} + +void +nsNativeCharsetConverter::GlobalInit() +{ + // verify that wchar_t for the current locale is actually unicode. + // if it is not, then we should avoid calling mbtowc/wctomb and + // just fallback on zero-pad/truncation conversion. + // + // this test cannot be done at build time because the encoding of + // wchar_t may depend on the runtime locale. sad, but true!! + // + // so, if wchar_t is unicode then converting an ASCII character + // to wchar_t should not change its numeric value. we'll just + // check what happens with the ASCII 'a' character. + // + // this test is not perfect... obviously, it could yield false + // positives, but then at least ASCII text would be converted + // properly (or maybe just the 'a' character) -- oh well :( + + char a = 'a'; + unsigned int w = 0; + + int res = mbtowc((wchar_t *) &w, &a, 1); + + gWCharIsUnicode = (res != -1 && w == 'a'); + +#ifdef DEBUG + if (!gWCharIsUnicode) + NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)"); +#endif +} + +nsresult +nsNativeCharsetConverter::NativeToUnicode(const char **input, + PRUint32 *inputLeft, + PRUnichar **output, + PRUint32 *outputLeft) +{ + if (gWCharIsUnicode) { + int incr; + + // cannot use wchar_t here since it may have been redefined (e.g., + // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP. + unsigned int tmp = 0; + while (*inputLeft && *outputLeft) { +#ifdef HAVE_MBRTOWC + incr = (int) mbrtowc((wchar_t *) &tmp, *input, *inputLeft, &ps); +#else + // XXX is this thread-safe? + incr = (int) mbtowc((wchar_t *) &tmp, *input, *inputLeft); +#endif + if (incr < 0) { + NS_WARNING("mbtowc failed: possible charset mismatch"); + // zero-pad and hope for the best + tmp = (unsigned char) **input; + incr = 1; + } + **output = (PRUnichar) tmp; + (*input) += incr; + (*inputLeft) -= incr; + (*output)++; + (*outputLeft)--; + } + } + else { + // wchar_t isn't unicode, so the best we can do is treat the + // input as if it is isolatin1 :( + isolatin1_to_ucs2(input, inputLeft, output, outputLeft); + } + + return NS_OK; +} + +nsresult +nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input, + PRUint32 *inputLeft, + char **output, + PRUint32 *outputLeft) +{ + if (gWCharIsUnicode) { + int incr; + + while (*inputLeft && *outputLeft >= MB_CUR_MAX) { +#ifdef HAVE_WCRTOMB + incr = (int) wcrtomb(*output, (wchar_t) **input, &ps); +#else + // XXX is this thread-safe? + incr = (int) wctomb(*output, (wchar_t) **input); +#endif + if (incr < 0) { + NS_WARNING("mbtowc failed: possible charset mismatch"); + **output = (unsigned char) **input; // truncate + incr = 1; + } + // most likely we're dead anyways if this assertion should fire + NS_ASSERTION(PRUint32(incr) <= *outputLeft, "wrote beyond end of string"); + (*output) += incr; + (*outputLeft) -= incr; + (*input)++; + (*inputLeft)--; + } + } + else { + // wchar_t isn't unicode, so the best we can do is treat the + // input as if it is isolatin1 :( + ucs2_to_isolatin1(input, inputLeft, output, outputLeft); + } + + return NS_OK; +} + +#endif // USE_STDCONV + +//----------------------------------------------------------------------------- +// API implementation +//----------------------------------------------------------------------------- + +NS_COM nsresult +NS_CopyNativeToUnicode(const nsACString &input, nsAString &output) +{ + nsNativeCharsetConverter conv; + nsresult rv; + + PRUint32 inputLen = input.Length(); + + output.Truncate(); + + nsACString::const_iterator iter, end; + input.BeginReading(iter); + input.EndReading(end); + + // + // OPTIMIZATION: preallocate space for largest possible result; convert + // directly into the result buffer to avoid intermediate buffer copy. + // + // this will generally result in a larger allocation, but that seems + // better than an extra buffer copy. + // + output.SetLength(inputLen); + nsAString::iterator out_iter; + output.BeginWriting(out_iter); + + PRUnichar *result = out_iter.get(); + PRUint32 resultLeft = inputLen; + + PRUint32 size; + for (; iter != end; iter.advance(size)) { + const char *buf = iter.get(); + PRUint32 bufLeft = size = iter.size_forward(); + + rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft); + if (NS_FAILED(rv)) return rv; + + NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer"); + } + output.SetLength(inputLen - resultLeft); + return NS_OK; +} + +NS_COM nsresult +NS_CopyUnicodeToNative(const nsAString &input, nsACString &output) +{ + nsNativeCharsetConverter conv; + nsresult rv; + + output.Truncate(); + + nsAString::const_iterator iter, end; + input.BeginReading(iter); + input.EndReading(end); + + // cannot easily avoid intermediate buffer copy. + char temp[4096]; + + PRUint32 size; + for (; iter != end; iter.advance(size)) { + const PRUnichar *buf = iter.get(); + PRUint32 bufLeft = size = iter.size_forward(); + while (bufLeft) { + char *p = temp; + PRUint32 tempLeft = sizeof(temp); + + rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft); + if (NS_FAILED(rv)) return rv; + + if (tempLeft < sizeof(temp)) + output.Append(temp, sizeof(temp) - tempLeft); + } + } + return NS_OK; +} + +void +NS_StartupNativeCharsetUtils() +{ + // + // need to initialize the locale or else charset conversion will fail. + // better not delay this in case some other component alters the locale + // settings. + // + // XXX we assume that we are called early enough that we should + // always be the first to care about the locale's charset. + // + setlocale(LC_CTYPE, ""); + + nsNativeCharsetConverter::GlobalInit(); +} + +void +NS_ShutdownNativeCharsetUtils() +{ + nsNativeCharsetConverter::GlobalShutdown(); +} + +#else + +// XXX add non XP_UNIX implementations here... + +NS_COM nsresult +NS_CopyNativeToUnicode(const nsACString &input, nsAString &output) +{ + NS_NOTREACHED("NS_CopyNativeToUnicode"); + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_COM nsresult +NS_CopyUnicodeToNative(const nsAString &input, nsACString &output) +{ + NS_NOTREACHED("NS_CopyUnicodeToNative"); + return NS_ERROR_NOT_IMPLEMENTED; +} + +void +NS_StartupNativeCharsetUtils() +{ +} + +void +NS_ShutdownNativeCharsetUtils() +{ +} + +#endif diff --git a/mozilla/xpcom/io/nsNativeCharsetUtils.h b/mozilla/xpcom/io/nsNativeCharsetUtils.h new file mode 100644 index 00000000000..691c6a79e20 --- /dev/null +++ b/mozilla/xpcom/io/nsNativeCharsetUtils.h @@ -0,0 +1,70 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2002 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Darin Fisher + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef nsNativeCharsetUtils_h__ +#define nsNativeCharsetUtils_h__ + + +/*****************************************************************************\ + * * + * **** NOTICE **** * + * * + * *** THESE ARE NOT GENERAL PURPOSE CONVERTERS *** * + * * + * NS_CopyNativeToUnicode / NS_CopyUnicodeToNative should only be used * + * by XPCOM for converting *FILENAMES* between native and unicode. They * + * are not designed or tested for general encoding converter use. * + * * +\*****************************************************************************/ + + +// XXX XXX XXX XXX only implemented for XP_UNIX XXX XXX XXX XXX + + +/** + * thread-safe conversion routines that do not depend on uconv libraries. + */ +NS_COM nsresult NS_CopyNativeToUnicode(const nsACString &input, nsAString &output); +NS_COM nsresult NS_CopyUnicodeToNative(const nsAString &input, nsACString &output); + +/** + * internal + */ +void NS_StartupNativeCharsetUtils(); +void NS_ShutdownNativeCharsetUtils(); + +#endif // nsNativeCharsetUtils_h__