From ffb44bd2e8ebc927ce991ae439cca8303480b7ef Mon Sep 17 00:00:00 2001 From: "yokoyama%netscape.com" Date: Wed, 18 Jul 2001 21:49:21 +0000 Subject: [PATCH] Bug 88944 for jshin. r=ftang sr=blizzard@mozilla.org adding converter support for x-windows-949 and x-johab git-svn-id: svn://10.0.0.236/trunk@99526 18797224-902f-48f8-a5cc-f745e15eee43 --- mozilla/intl/uconv/public/uconvutil.h | 5 +- mozilla/intl/uconv/src/charsetData.properties | 4 + .../intl/uconv/src/charsetTitles.properties | 2 + .../intl/uconv/src/charsetalias.properties | 9 ++- mozilla/intl/uconv/src/ugen.c | 53 +++++++----- mozilla/intl/uconv/src/uscan.c | 71 +++++++++++----- mozilla/intl/uconv/ucvko/Makefile.in | 2 + mozilla/intl/uconv/ucvko/makefile.win | 6 +- mozilla/intl/uconv/ucvko/nsCP949ToUnicode.cpp | 81 +++++++++++++++++++ mozilla/intl/uconv/ucvko/nsCP949ToUnicode.h | 34 ++++++++ mozilla/intl/uconv/ucvko/nsEUCKRToUnicode.cpp | 12 ++- mozilla/intl/uconv/ucvko/nsUCvKOCID.h | 11 +++ mozilla/intl/uconv/ucvko/nsUCvKoModule.cpp | 36 +++++++++ mozilla/intl/uconv/ucvko/nsUnicodeToCP949.cpp | 62 ++++++++++++++ mozilla/intl/uconv/ucvko/nsUnicodeToCP949.h | 34 ++++++++ mozilla/intl/uconv/ucvko/nsUnicodeToEUCKR.cpp | 6 +- mozilla/intl/uconv/ucvko/nsUnicodeToJohab.cpp | 15 +++- .../intl/uconv/ucvko/nsUnicodeToKSC5601.cpp | 6 +- .../locale/en-US/navigator.properties | 2 +- 19 files changed, 394 insertions(+), 57 deletions(-) diff --git a/mozilla/intl/uconv/public/uconvutil.h b/mozilla/intl/uconv/public/uconvutil.h index 533ad7f5e4f..2f22c8c6158 100644 --- a/mozilla/intl/uconv/public/uconvutil.h +++ b/mozilla/intl/uconv/public/uconvutil.h @@ -52,11 +52,12 @@ typedef enum { u2BytesGRPrefix8EA6Charset, u2BytesGRPrefix8EA7Charset, u1ByteGLCharset, - uComposedHangulCharset, - uComposedHangulGLCharset, + uDecomposedHangulCharset, + uDecomposedHangulGLCharset, uJohabHangulCharset, uJohabSymbolCharset, u4BytesGB18030Charset, + u2BytesGR128Charset, uNumOfCharsetType } uScanClassID; diff --git a/mozilla/intl/uconv/src/charsetData.properties b/mozilla/intl/uconv/src/charsetData.properties index ef099c0e8bc..fc933155df6 100644 --- a/mozilla/intl/uconv/src/charsetData.properties +++ b/mozilla/intl/uconv/src/charsetData.properties @@ -123,6 +123,8 @@ x-mac-ukrainian.LangGroup = x-cyrillic x-user-defined.LangGroup = x-user-def x-x11johab.LangGroup = ko x-johab.LangGroup = ko +x-johab-noascii.LangGroup = ko +x-windows-949.LangGroup = ko utf-8.MIMEHeaderEncodingMethod = B utf-8.MIMEMailCharset = utf-8 @@ -138,5 +140,7 @@ gb2312.isMultibyte = true hz-gb-2312.isMultibyte = true iso-2022-kr.isMultibyte = true euc-kr.isMultibyte = true +x-johab.isMultibyte = true +x-windows-949.isMultibyte = true utf-7.isMultibyte = true utf-8.isMultibyte = true diff --git a/mozilla/intl/uconv/src/charsetTitles.properties b/mozilla/intl/uconv/src/charsetTitles.properties index c5611788dfa..207d322b37e 100644 --- a/mozilla/intl/uconv/src/charsetTitles.properties +++ b/mozilla/intl/uconv/src/charsetTitles.properties @@ -57,6 +57,8 @@ hz-gb-2312.title = Chinese Simplified (HZ) x-gbk.title = Chinese Simplified (GBK) iso-2022-cn.title = Chinese Simplified (ISO-2022-CN) euc-kr.title = Korean (EUC-KR) +x-johab.title = Korean (JOHAB) +x-windows-949.title = Korean (UHC) utf-7.title = Unicode (UTF-7) utf-8.title = Unicode (UTF-8) iso-8859-5.title = Cyrillic (ISO-8859-5) diff --git a/mozilla/intl/uconv/src/charsetalias.properties b/mozilla/intl/uconv/src/charsetalias.properties index 419a4531b19..a473de01357 100644 --- a/mozilla/intl/uconv/src/charsetalias.properties +++ b/mozilla/intl/uconv/src/charsetalias.properties @@ -129,6 +129,8 @@ x-iso-10646-ucs-4-be=UTF-32BE x-iso-10646-ucs-4-le=UTF-32LE x-user-defined=x-user-defined x-u-escaped=x-u-escaped +x-johab=x-johab +x-windows-949=x-windows-949 ## ## Aliases for ISO-8859-1 ## @@ -288,7 +290,7 @@ x-x-big5=Big5 ## csueckr=EUC-KR # The following are really not aliases EUC-KR, add them only for MS FrontPage -ks_c_5601-1987=EUC-KR +#ks_c_5601-1987=EUC-KR iso-ir-149=EUC-KR ks_c_5601-1989=EUC-KR ksc_5601=EUC-KR @@ -297,6 +299,11 @@ korean=EUC-KR csksc56011987=EUC-KR 5601=EUC-KR ## +## Aliases for X-Windows-949, CP949, Unified Hangul Code (UHC) +## +# Microsoft uses ks_c_5601-1987 to mean Windows-949 or its subset EUC-KR. +ks_c_5601-1987=x-windows-949 +## ## Aliases for GB2312 ## # The following are really not aliases GB2312, add them only for MS FrontPage diff --git a/mozilla/intl/uconv/src/ugen.c b/mozilla/intl/uconv/src/ugen.c index 43bf94ff48b..e133e480d8c 100644 --- a/mozilla/intl/uconv/src/ugen.c +++ b/mozilla/intl/uconv/src/ugen.c @@ -176,7 +176,7 @@ PRIVATE PRBool uCheckAndGenAlways1ByteShiftGL( PRUint32* outlen ); -PRIVATE PRBool uCnGAlways8BytesComposedHangul( +PRIVATE PRBool uCnGAlways8BytesDecomposedHangul( uShiftTable *shift, PRInt32* state, PRUint16 in, @@ -185,7 +185,7 @@ PRIVATE PRBool uCnGAlways8BytesComposedHangul( PRUint32* outlen ); -PRIVATE PRBool uCnGAlways8BytesGLComposedHangul( +PRIVATE PRBool uCnGAlways6BytesGLDecomposedHangul( uShiftTable *shift, PRInt32* state, PRUint16 in, @@ -223,14 +223,15 @@ PRIVATE PRBool uCheckAndGen4BytesGB18030( ); -PRIVATE PRBool uGenComposedHangulCommon( +PRIVATE PRBool uGenDecomposedHangulCommon( uShiftTable *shift, PRInt32* state, PRUint16 in, unsigned char* out, PRUint32 outbuflen, PRUint32* outlen, - PRUint8 mask + PRUint8 mask, + PRUint16 nbyte ); PRIVATE PRBool uGenAlways2Byte( @@ -277,11 +278,12 @@ PRIVATE uGeneratorFunc m_generator[uNumOfCharsetType] = uCheckAndGen2ByteGRPrefix8EA6, uCheckAndGen2ByteGRPrefix8EA7, uCheckAndGenAlways1ByteShiftGL, - uCnGAlways8BytesComposedHangul, - uCnGAlways8BytesGLComposedHangul, + uCnGAlways8BytesDecomposedHangul, + uCnGAlways6BytesGLDecomposedHangul, uCheckAndGenJohabHangul, uCheckAndGenJohabSymbol, - uCheckAndGen4BytesGB18030 + uCheckAndGen4BytesGB18030, + uCheckAndGenAlways2Byte /* place-holder for GR128 */ }; /*================================================================================= @@ -751,14 +753,15 @@ PRIVATE PRBool uCheckAndGenAlways1ByteShiftGL( /*================================================================================= =================================================================================*/ -PRIVATE PRBool uGenComposedHangulCommon( +PRIVATE PRBool uGenDecomposedHangulCommon( uShiftTable *shift, PRInt32* state, PRUint16 in, unsigned char* out, PRUint32 outbuflen, PRUint32* outlen, - PRUint8 mask + PRUint8 mask, + PRUint16 nbyte ) { if(outbuflen < 8) @@ -776,6 +779,7 @@ PRIVATE PRBool uGenComposedHangulCommon( 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe }; PRUint16 SIndex, LIndex, VIndex, TIndex; + PRUint16 offset; /* the following line are copy from Unicode 2.0 page 3-13 */ /* item 1 of Hangul Syllabel Decomposition */ SIndex = in - SBase; @@ -786,16 +790,22 @@ PRIVATE PRBool uGenComposedHangulCommon( VIndex = (SIndex % NCount) / TCount; TIndex = SIndex % TCount; - *outlen = 8; - out[0] = out[2] = out[4] = out[6] = (0xa4 & mask); - out[1] = 0xd4 & mask; - out[3] = lMap[LIndex] & mask; - out[5] = (VIndex + 0xbf) & mask; - out[7] = tMap[TIndex] & mask; + *outlen = nbyte; + offset = nbyte == 6 ? 0 : 2; + out[0] = out[2] = out[4] = 0xa4 & mask; + out[1+offset] = lMap[LIndex] & mask; + out[3+offset] = (VIndex + 0xbf) & mask; + out[5+offset] = tMap[TIndex] & mask; + if ( nbyte == 8 ) + { + out[6] = 0xa4 & mask; + out[1] = 0xd4 & mask; + } + return PR_TRUE; } } -PRIVATE PRBool uCnGAlways8BytesComposedHangul( +PRIVATE PRBool uCnGAlways8BytesDecomposedHangul( uShiftTable *shift, PRInt32* state, PRUint16 in, @@ -804,9 +814,14 @@ PRIVATE PRBool uCnGAlways8BytesComposedHangul( PRUint32* outlen ) { - return uGenComposedHangulCommon(shift,state,in,out,outbuflen,outlen,0xff); + return uGenDecomposedHangulCommon(shift,state,in,out,outbuflen,outlen,0xff,8); } -PRIVATE PRBool uCnGAlways8BytesGLComposedHangul( + /* + For rendering of Hangul in X11 with fonts with glyphs for only + 2350 syllables, drop the first 2bytes anchoring the representation + of Hangul syllables with 8byte sequence. + */ +PRIVATE PRBool uCnGAlways6BytesGLDecomposedHangul( uShiftTable *shift, PRInt32* state, PRUint16 in, @@ -815,7 +830,7 @@ PRIVATE PRBool uCnGAlways8BytesGLComposedHangul( PRUint32* outlen ) { - return uGenComposedHangulCommon(shift,state,in,out,outbuflen,outlen,0x7f); + return uGenDecomposedHangulCommon(shift,state,in,out,outbuflen,outlen,0x7f,6); } PRIVATE PRBool uCheckAndGenJohabHangul( uShiftTable *shift, diff --git a/mozilla/intl/uconv/src/uscan.c b/mozilla/intl/uconv/src/uscan.c index 2b02bb84b77..c4253f00916 100644 --- a/mozilla/intl/uconv/src/uscan.c +++ b/mozilla/intl/uconv/src/uscan.c @@ -74,6 +74,14 @@ PRIVATE PRBool uCheckAndScanAlways2ByteShiftGR( PRUint32 inbuflen, PRUint32* inscanlen ); +PRIVATE PRBool uCheckAndScanAlways2ByteGR128( + uShiftTable *shift, + PRInt32* state, + unsigned char *in, + PRUint16 *out, + PRUint32 inbuflen, + PRUint32* inscanlen + ); PRIVATE PRBool uCheckAndScanByTable( uShiftTable *shift, PRInt32* state, @@ -171,7 +179,8 @@ PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL( PRUint32 inbuflen, PRUint32* inscanlen ); -PRIVATE PRBool uCnSAlways8BytesComposedHangul( + +PRIVATE PRBool uCnSAlways8BytesDecomposedHangul( uShiftTable *shift, PRInt32* state, unsigned char *in, @@ -179,8 +188,7 @@ PRIVATE PRBool uCnSAlways8BytesComposedHangul( PRUint32 inbuflen, PRUint32* inscanlen ); - -PRIVATE PRBool uCnSAlways8BytesGLComposedHangul( +PRIVATE PRBool uCnSAlways8BytesGLDecomposedHangul( uShiftTable *shift, PRInt32* state, unsigned char *in, @@ -189,7 +197,7 @@ PRIVATE PRBool uCnSAlways8BytesGLComposedHangul( PRUint32* inscanlen ); -PRIVATE PRBool uScanComposedHangulCommon( +PRIVATE PRBool uScanDecomposedHangulCommon( uShiftTable *shift, PRInt32* state, unsigned char *in, @@ -268,11 +276,12 @@ PRIVATE uScannerFunc m_scanner[uNumOfCharsetType] = uCheckAndScan2ByteGRPrefix8EA6, uCheckAndScan2ByteGRPrefix8EA7, uCheckAndScanAlways1ByteShiftGL, - uCnSAlways8BytesComposedHangul, - uCnSAlways8BytesGLComposedHangul, + uCnSAlways8BytesDecomposedHangul, + uCnSAlways8BytesGLDecomposedHangul, uCheckAndScanJohabHangul, uCheckAndScanJohabSymbol, - uCheckAndScan4BytesGB18030 + uCheckAndScan4BytesGB18030, + uCheckAndScanAlways2ByteGR128 }; /*================================================================================= @@ -439,6 +448,32 @@ PRIVATE PRBool uCheckAndScanAlways2ByteShiftGR( } /*================================================================================= +=================================================================================*/ +PRIVATE PRBool uCheckAndScanAlways2ByteGR128( + uShiftTable *shift, + PRInt32* state, + unsigned char *in, + PRUint16 *out, + PRUint32 inbuflen, + PRUint32* inscanlen + ) +{ + /* + * The first byte should be in [0xa1,0xfe] + * and the second byte can take any value with MSB = 1. + * Used by CP949 -> Unicode converter. + */ + if(inbuflen < 2 || ! CHK_GR94(in[0]) || ! in[1] & 0x80 ) + return PR_FALSE; + else + { + *inscanlen = 2; + *out = (in[0] << 8) | in[1]; + return PR_TRUE; + } +} +/*================================================================================= + =================================================================================*/ PRIVATE PRBool uCheckAndScanByTable( uShiftTable *shift, @@ -713,7 +748,7 @@ PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL( #define VCount 21 #define TCount 28 #define NCount (VCount * TCount) -PRIVATE PRBool uScanComposedHangulCommon( +PRIVATE PRBool uScanDecomposedHangulCommon( uShiftTable *shift, PRInt32* state, unsigned char *in, @@ -792,7 +827,7 @@ PRIVATE PRBool uScanComposedHangulCommon( /*================================================================================= =================================================================================*/ -PRIVATE PRBool uCnSAlways8BytesComposedHangul( +PRIVATE PRBool uCnSAlways8BytesDecomposedHangul( uShiftTable *shift, PRInt32* state, unsigned char *in, @@ -801,12 +836,12 @@ PRIVATE PRBool uCnSAlways8BytesComposedHangul( PRUint32* inscanlen ) { - return uScanComposedHangulCommon(shift,state,in,out,inbuflen,inscanlen,0xff); + return uScanDecomposedHangulCommon(shift,state,in,out,inbuflen,inscanlen,0xff); } /*================================================================================= =================================================================================*/ -PRIVATE PRBool uCnSAlways8BytesGLComposedHangul( +PRIVATE PRBool uCnSAlways8BytesGLDecomposedHangul( uShiftTable *shift, PRInt32* state, unsigned char *in, @@ -815,7 +850,7 @@ PRIVATE PRBool uCnSAlways8BytesGLComposedHangul( PRUint32* inscanlen ) { - return uScanComposedHangulCommon(shift,state,in,out,inbuflen,inscanlen,0x7f); + return uScanDecomposedHangulCommon(shift,state,in,out,inbuflen,inscanlen,0x7f); } PRIVATE PRBool uCheckAndScanJohabHangul( uShiftTable *shift, @@ -828,7 +863,6 @@ PRIVATE PRBool uCheckAndScanJohabHangul( { /* since we don't have code to convert Johab to Unicode right now * * make this part of code #if 0 to save space untill we fully test it */ -#if 0 if(inbuflen < 2) return PR_FALSE; else { @@ -867,11 +901,9 @@ PRIVATE PRBool uCheckAndScanJohabHangul( return PR_FALSE; /* the following line is from Unicode 2.0 page 3-13 item 5 */ *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; + *inscanlen = 2; return PR_TRUE; } -#else - return PR_FALSE; -#endif } PRIVATE PRBool uCheckAndScanJohabSymbol( uShiftTable *shift, @@ -882,9 +914,6 @@ PRIVATE PRBool uCheckAndScanJohabSymbol( PRUint32* inscanlen ) { -/* since we don't have code to convert Johab to Unicode right now - * make this part of code #if 0 to save space untill we fully test it */ -#if 0 if(inbuflen < 2) return PR_FALSE; else { @@ -928,11 +957,9 @@ PRIVATE PRBool uCheckAndScanJohabSymbol( (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) | (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : 128)); + *inscanlen = 2; return PR_TRUE; } -#else - return PR_FALSE; -#endif } PRIVATE PRBool uCheckAndScan4BytesGB18030( uShiftTable *shift, diff --git a/mozilla/intl/uconv/ucvko/Makefile.in b/mozilla/intl/uconv/ucvko/Makefile.in index f55737334c9..5f6842ac0e0 100644 --- a/mozilla/intl/uconv/ucvko/Makefile.in +++ b/mozilla/intl/uconv/ucvko/Makefile.in @@ -43,6 +43,8 @@ CPPSRCS = \ nsUnicodeToCP949.cpp \ nsUnicodeToX11Johab.cpp \ nsUnicodeToJohab.cpp \ + nsJohabToUnicode.cpp \ + nsUnicodeToJohabNoAscii.cpp \ nsUCvKOSupport.cpp \ nsUCvKoModule.cpp \ $(NULL) diff --git a/mozilla/intl/uconv/ucvko/makefile.win b/mozilla/intl/uconv/ucvko/makefile.win index b15ed53ce80..aa95a794b1b 100644 --- a/mozilla/intl/uconv/ucvko/makefile.win +++ b/mozilla/intl/uconv/ucvko/makefile.win @@ -36,7 +36,9 @@ CPPSRCS = \ nsCP949ToUnicode.cpp \ nsUnicodeToCP949.cpp \ nsUnicodeToX11Johab.cpp \ - nsUnicodeToJohab.cpp \ + nsJohabToUnicode.cpp \ + nsUnicodeToJohab.cpp \ + nsUnicodeToJohabNoAscii.cpp \ nsUCvKOSupport.cpp \ nsUCvKoModule.cpp \ $(NULL) @@ -50,7 +52,9 @@ CPP_OBJS= \ .\$(OBJDIR)\nsCP949ToUnicode.obj \ .\$(OBJDIR)\nsUnicodeToCP949.obj \ .\$(OBJDIR)\nsUnicodeToX11Johab.obj \ + .\$(OBJDIR)\nsJohabToUnicode.obj \ .\$(OBJDIR)\nsUnicodeToJohab.obj \ + .\$(OBJDIR)\nsUnicodeToJohabNoAscii.obj \ .\$(OBJDIR)\nsUCvKOSupport.obj \ .\$(OBJDIR)\nsUCvKoModule.obj \ $(NULL) diff --git a/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.cpp b/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.cpp index a460cef02de..cabda8df3cd 100644 --- a/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.cpp +++ b/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.cpp @@ -19,3 +19,84 @@ * * Contributor(s): */ + +#include "nsCP949ToUnicode.h" +#include "nsUCvKODll.h" + +//---------------------------------------------------------------------- +// Global functions and data [declaration] + +static const PRUint16 g_ASCIIShiftTable[] = { + 0, u1ByteCharset, + ShiftCell(0,0,0,0,0,0,0,0) +}; + + +static const PRUint16 g_EUCKRShiftTable[] = { + 0, u2BytesGRCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0) +}; + +static const PRUint16 g_CP949HighShiftTable[] = { + 0, u2BytesGR128Charset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0) +}; + +static const PRUint16 g_CP949LowShiftTable[] = { + 0, u2BytesCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0) +}; + +// CP949(non-EUC-KR portion) to Unicode +static const PRUint16 g_utCP949NoKSCHangulMapping[] = { +#include "u20cp949hangul.ut" +}; + +static const uRange g_CP949Ranges[] = { + { 0x00, 0x7E }, + { 0xA1, 0xFE }, + { 0xA1, 0xFE }, + { 0x80, 0xA0 } +}; + + +static const PRUint16 *g_CP949ShiftTableSet [] = { + g_ASCIIShiftTable, + g_EUCKRShiftTable, + g_CP949HighShiftTable, + g_CP949LowShiftTable +}; + +static const PRUint16 *g_CP949MappingTableSet [] ={ + g_AsciiMapping, + g_utKSC5601Mapping, + g_utCP949NoKSCHangulMapping, + g_utCP949NoKSCHangulMapping +//g_CP949HighMapping, +//g_CP949LowMapping +}; + + +//---------------------------------------------------------------------- +// Class nsCP949ToUnicode [implementation] + +nsCP949ToUnicode::nsCP949ToUnicode() +: nsMultiTableDecoderSupport(4, + (uRange*) &g_CP949Ranges, + (uShiftTable**) &g_CP949ShiftTableSet, + (uMappingTable**) &g_CP949MappingTableSet) +{ +} + +//---------------------------------------------------------------------- +// Subclassing of nsTablesDecoderSupport class [implementation] + +NS_IMETHODIMP nsCP949ToUnicode::GetMaxLength(const char * aSrc, + PRInt32 aSrcLength, + PRInt32 * aDestLength) +{ + // we are a single byte to Unicode converter, so... + *aDestLength = aSrcLength; + return NS_OK_UDEC_EXACTLENGTH; +} + diff --git a/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.h b/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.h index a460cef02de..325024bc3aa 100644 --- a/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.h +++ b/mozilla/intl/uconv/ucvko/nsCP949ToUnicode.h @@ -19,3 +19,37 @@ * * Contributor(s): */ + +#ifndef nsCP949ToUnicode_h___ +#define nsCP949ToUnicode_h___ + +#include "nsUCvKOSupport.h" + +//---------------------------------------------------------------------- +// Class nsCP949ToUnicode [declaration] + +/** + * A character set converter from CP949 to Unicode. + * + * @created 06/Apr/1999 + * @author Catalin Rotaru [CATA] + */ +class nsCP949ToUnicode : public nsMultiTableDecoderSupport +{ +public: + + /** + * Class constructor. + */ + nsCP949ToUnicode(); + +protected: + + //-------------------------------------------------------------------- + // Subclassing of nsDecoderSupport class [declaration] + + NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength, + PRInt32 * aDestLength); +}; + +#endif /* nsCP949ToUnicode_h___ */ diff --git a/mozilla/intl/uconv/ucvko/nsEUCKRToUnicode.cpp b/mozilla/intl/uconv/ucvko/nsEUCKRToUnicode.cpp index facebdbe57d..09f608f3c4f 100644 --- a/mozilla/intl/uconv/ucvko/nsEUCKRToUnicode.cpp +++ b/mozilla/intl/uconv/ucvko/nsEUCKRToUnicode.cpp @@ -54,18 +54,28 @@ static const PRUint16 g_EUCKRShiftTable[] = { static const uRange g_EUCKRRanges[] = { { 0x00, 0x7E }, + { 0xA4, 0xA4 }, // 8byte seq. for Hangul syllables not available + // in pre-composed form in KS X 1001 { 0xA1, 0xFE } }; #endif +static const PRUint16 g_DecomposedHangulShiftTable[] = { + 0, uDecomposedHangulCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), +}; + + static const PRUint16 *g_EUCKRShiftTableSet [] = { g_ASCIIShiftTable, + g_DecomposedHangulShiftTable, g_EUCKRShiftTable }; static const PRUint16 *g_EUCKRMappingTableSet [] ={ g_AsciiMapping, + g_HangulNullMapping, g_utKSC5601Mapping }; @@ -74,7 +84,7 @@ static const PRUint16 *g_EUCKRMappingTableSet [] ={ // Class nsEUCKRToUnicode [implementation] nsEUCKRToUnicode::nsEUCKRToUnicode() -: nsMultiTableDecoderSupport(2, +: nsMultiTableDecoderSupport(3, (uRange*) &g_EUCKRRanges, (uShiftTable**) &g_EUCKRShiftTableSet, (uMappingTable**) &g_EUCKRMappingTableSet) diff --git a/mozilla/intl/uconv/ucvko/nsUCvKOCID.h b/mozilla/intl/uconv/ucvko/nsUCvKOCID.h index 2fa07c4de0a..6f3b397f277 100644 --- a/mozilla/intl/uconv/ucvko/nsUCvKOCID.h +++ b/mozilla/intl/uconv/ucvko/nsUCvKOCID.h @@ -63,7 +63,18 @@ { 0x21dd6a01, 0x413c, 0x11d3, {0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}} // Class ID for our UnicodeToJohab charset converter +// {D9B1F97E-CFA0-80b6-FB92-9972E48E3DCC} #define NS_UNICODETOJOHAB_CID \ + { 0xd9b1f97e, 0xcfa0, 0x80b6, {0xfb, 0x92, 0x99, 0x72, 0xe4, 0x8e, 0x3d, 0xcc}} + +// Class ID for our JohabToUnicode charset converter +// {D9B1F97F-CFA0-80b6-FB92-9972E48E3DCC} +#define NS_JOHABTOUNICODE_CID \ + { 0xd9b1f97f, 0xcfa0, 0x80b6, {0xfb, 0x92, 0x99, 0x72, 0xe4, 0x8e, 0x3d, 0xcc}} + +// Class ID for our UnicodeToJohabNoAscii charset converter +// {7090544B-C885-4c52-95F8-3C8F0C2FDE67} +#define NS_UNICODETOJOHABNOASCII_CID \ { 0x7090544b, 0xc885, 0x4c52, {0x95, 0xf8, 0x3c, 0x8f, 0xc, 0x2f, 0xde, 0x67}} #endif /* nsUCvKOCID_h___ */ diff --git a/mozilla/intl/uconv/ucvko/nsUCvKoModule.cpp b/mozilla/intl/uconv/ucvko/nsUCvKoModule.cpp index 14d8fe190f3..c2364f2ce0d 100644 --- a/mozilla/intl/uconv/ucvko/nsUCvKoModule.cpp +++ b/mozilla/intl/uconv/ucvko/nsUCvKoModule.cpp @@ -41,7 +41,11 @@ #include "nsUnicodeToEUCKR.h" #include "nsUnicodeToKSC5601.h" #include "nsUnicodeToX11Johab.h" +#include "nsJohabToUnicode.h" #include "nsUnicodeToJohab.h" +#include "nsUnicodeToJohabNoAscii.h" +#include "nsCP949ToUnicode.h" +#include "nsUnicodeToCP949.h" //---------------------------------------------------------------------------- // Global functions and data [declaration] @@ -72,13 +76,21 @@ NS_UCONV_REG_UNREG(nsEUCKRToUnicode, "EUC-KR", "Unicode" , NS_EUCKRTOUNICODE_CID NS_UCONV_REG_UNREG(nsUnicodeToEUCKR, "Unicode", "EUC-KR", NS_UNICODETOEUCKR_CID); NS_UCONV_REG_UNREG(nsUnicodeToKSC5601, "Unicode", "ks_c_5601-1987", NS_UNICODETOKSC5601_CID); NS_UCONV_REG_UNREG(nsUnicodeToX11Johab, "Unicode", "x-x11johab", NS_UNICODETOX11JOHAB_CID); +NS_UCONV_REG_UNREG(nsJohabToUnicode, "x-johab", "Unicode" , NS_JOHABTOUNICODE_CID); NS_UCONV_REG_UNREG(nsUnicodeToJohab, "Unicode", "x-johab", NS_UNICODETOJOHAB_CID); +NS_UCONV_REG_UNREG(nsUnicodeToJohabNoAscii, "Unicode", "x-johab-noascii", NS_UNICODETOJOHABNOASCII_CID); +NS_UCONV_REG_UNREG(nsCP949ToUnicode, "x-windows-949", "Unicode" , NS_CP949TOUNICODE_CID); +NS_UCONV_REG_UNREG(nsUnicodeToCP949, "Unicode", "x-windows-949", NS_UNICODETOCP949_CID); NS_GENERIC_FACTORY_CONSTRUCTOR(nsEUCKRToUnicode); NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToEUCKR); NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToKSC5601); NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToX11Johab); +NS_GENERIC_FACTORY_CONSTRUCTOR(nsJohabToUnicode); NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToJohab); +NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToJohabNoAscii); +NS_GENERIC_FACTORY_CONSTRUCTOR(nsCP949ToUnicode); +NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToCP949); static nsModuleComponentInfo components[] = { @@ -106,11 +118,35 @@ static nsModuleComponentInfo components[] = nsUnicodeToX11JohabConstructor, nsUnicodeToX11JohabRegSelf, nsUnicodeToX11JohabUnRegSelf }, + { + DECODER_NAME_BASE "x-johab" , NS_JOHABTOUNICODE_CID, + NS_UNICODEDECODER_CONTRACTID_BASE "x-johab", + nsJohabToUnicodeConstructor , + nsJohabToUnicodeRegSelf , nsJohabToUnicodeUnRegSelf + }, { ENCODER_NAME_BASE "x-johab" , NS_UNICODETOJOHAB_CID, NS_UNICODEENCODER_CONTRACTID_BASE "x-johab", nsUnicodeToJohabConstructor, nsUnicodeToJohabRegSelf, nsUnicodeToJohabUnRegSelf + }, + { + ENCODER_NAME_BASE "x-johab-noascii", NS_UNICODETOJOHABNOASCII_CID, + NS_UNICODEENCODER_CONTRACTID_BASE "x-johab-noascii", + nsUnicodeToJohabNoAsciiConstructor, + nsUnicodeToJohabNoAsciiRegSelf, nsUnicodeToJohabNoAsciiUnRegSelf + }, + { + DECODER_NAME_BASE "x-windows-949" , NS_CP949TOUNICODE_CID, + NS_UNICODEDECODER_CONTRACTID_BASE "x-windows-949", + nsCP949ToUnicodeConstructor , + nsCP949ToUnicodeRegSelf , nsCP949ToUnicodeUnRegSelf + }, + { + ENCODER_NAME_BASE "x-windows-949" , NS_UNICODETOCP949_CID, + NS_UNICODEENCODER_CONTRACTID_BASE "x-windows-949", + nsUnicodeToCP949Constructor, + nsUnicodeToCP949RegSelf, nsUnicodeToCP949UnRegSelf } }; diff --git a/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.cpp b/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.cpp index a460cef02de..5427826b201 100644 --- a/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.cpp +++ b/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.cpp @@ -19,3 +19,65 @@ * * Contributor(s): */ + + +#include "nsUnicodeToCP949.h" +#include "nsUCvKODll.h" + +//---------------------------------------------------------------------- +// Global functions and data [declaration] + + +static const PRUint16 gAsciiShiftTable[] = { + 0, u1ByteCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), +}; + +static const PRUint16 gKSC5601ShiftTable[] = { + 0, u2BytesGRCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), +}; +static const PRUint16 gCP949ShiftTable[] = { + 0, u2BytesCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), +}; + +// Unicode Hangul syllables (not enumerated in KS X 1001) to CP949 : 8822 of them +static const PRUint16 g_ufCP949NoKSCHangulMapping[] = { +#include "u20cp949hangul.uf" +}; + + + +static const PRUint16 *g_CP949MappingTable[3] = { + g_AsciiMapping, + g_ufKSC5601Mapping, + g_ufCP949NoKSCHangulMapping +}; + +static const PRUint16 *g_CP949ShiftTable[3] = { + gAsciiShiftTable, + gKSC5601ShiftTable, + gCP949ShiftTable +}; + +//---------------------------------------------------------------------- +// Class nsUnicodeToEUCKR [implementation] + +nsUnicodeToCP949::nsUnicodeToCP949() +: nsMultiTableEncoderSupport(3, + (uShiftTable**) g_CP949ShiftTable, + (uMappingTable**) g_CP949MappingTable) +{ +} + +//---------------------------------------------------------------------- +// Subclassing of nsTableEncoderSupport class [implementation] + +NS_IMETHODIMP nsUnicodeToCP949::GetMaxLength(const PRUnichar * aSrc, + PRInt32 aSrcLength, + PRInt32 * aDestLength) +{ + *aDestLength = aSrcLength * 2; + return NS_OK; +} diff --git a/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.h b/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.h index a460cef02de..c0b9f81739f 100644 --- a/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.h +++ b/mozilla/intl/uconv/ucvko/nsUnicodeToCP949.h @@ -19,3 +19,37 @@ * * Contributor(s): */ + +#ifndef nsUnicodeToCP949_h___ +#define nsUnicodeToCP949_h___ + +#include "nsUCvKOSupport.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToCP949 [declaration] + +/** + * A character set converter from Unicode to CP949. + * + * @created 14/May/2001 (patterned after Unicode to EUCKR converter + * @author Jungshik Shin + */ +class nsUnicodeToCP949 : public nsMultiTableEncoderSupport +{ +public: + + /** + * Class constructor. + */ + nsUnicodeToCP949(); + +protected: + + //-------------------------------------------------------------------- + // Subclassing of nsEncoderSupport class [declaration] + + NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength, + PRInt32 * aDestLength); +}; + +#endif /* nsUnicodeToCP949_h___ */ diff --git a/mozilla/intl/uconv/ucvko/nsUnicodeToEUCKR.cpp b/mozilla/intl/uconv/ucvko/nsUnicodeToEUCKR.cpp index 4f7d1476f8c..ed2f77d6138 100644 --- a/mozilla/intl/uconv/ucvko/nsUnicodeToEUCKR.cpp +++ b/mozilla/intl/uconv/ucvko/nsUnicodeToEUCKR.cpp @@ -36,8 +36,8 @@ static const PRUint16 gKSC5601ShiftTable[] = { 0, u2BytesGRCharset, ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), }; -static const PRUint16 gComposedHangulShiftTable[] = { - 0, uComposedHangulCharset, +static const PRUint16 gDecomposedHangulShiftTable[] = { + 0, uDecomposedHangulCharset, ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), }; @@ -50,7 +50,7 @@ static const PRUint16 *g_EUCKRMappingTable[3] = { static const PRUint16 *g_EUCKRShiftTable[3] = { gAsciiShiftTable, gKSC5601ShiftTable, - gComposedHangulShiftTable + gDecomposedHangulShiftTable }; //---------------------------------------------------------------------- diff --git a/mozilla/intl/uconv/ucvko/nsUnicodeToJohab.cpp b/mozilla/intl/uconv/ucvko/nsUnicodeToJohab.cpp index ef898024b99..15802d9c744 100644 --- a/mozilla/intl/uconv/ucvko/nsUnicodeToJohab.cpp +++ b/mozilla/intl/uconv/ucvko/nsUnicodeToJohab.cpp @@ -27,6 +27,11 @@ // Global functions and data [declaration] +static const PRUint16 gAsciiShiftTable[] = { + 0, u1ByteCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), +}; + static const PRUint16 gJohabSymbolShiftTable[] = { 0, uJohabSymbolCharset, ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), @@ -36,21 +41,23 @@ static const PRUint16 gJohabHangulShiftTable[] = { ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), }; -static const PRUint16 *g_JohabMappingTable[2] = { +static const PRUint16 *g_JohabMappingTable[3] = { + g_AsciiMapping, g_HangulNullMapping, g_ufKSC5601Mapping }; -static const PRUint16 *g_JohabShiftTable[2] = { +static const PRUint16 *g_JohabShiftTable[3] = { + gAsciiShiftTable, gJohabHangulShiftTable, gJohabSymbolShiftTable }; //---------------------------------------------------------------------- -// Class nsUnicodeToEUCKR [implementation] +// Class nsUnicodeToJohab [implementation] nsUnicodeToJohab::nsUnicodeToJohab() -: nsMultiTableEncoderSupport(2, +: nsMultiTableEncoderSupport(3, (uShiftTable**) g_JohabShiftTable, (uMappingTable**) g_JohabMappingTable) { diff --git a/mozilla/intl/uconv/ucvko/nsUnicodeToKSC5601.cpp b/mozilla/intl/uconv/ucvko/nsUnicodeToKSC5601.cpp index bd4d01c63dc..0f8c06bdf9b 100644 --- a/mozilla/intl/uconv/ucvko/nsUnicodeToKSC5601.cpp +++ b/mozilla/intl/uconv/ucvko/nsUnicodeToKSC5601.cpp @@ -31,8 +31,8 @@ static const PRUint16 g2BytesShiftTable[] = { ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), }; -static const PRUint16 gComposedHangulShiftTable[] = { - 0, uComposedHangulGLCharset, +static const PRUint16 gDecomposedHangulShiftTable[] = { + 0, uDecomposedHangulGLCharset, ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), }; @@ -43,7 +43,7 @@ static const PRUint16 *g_MappingTable[3] = { static const PRUint16 *g_ShiftTable[3] = { g2BytesShiftTable, - gComposedHangulShiftTable + gDecomposedHangulShiftTable }; //---------------------------------------------------------------------- diff --git a/mozilla/xpfe/browser/resources/locale/en-US/navigator.properties b/mozilla/xpfe/browser/resources/locale/en-US/navigator.properties index 188b4d98c3e..0cf813d6c04 100644 --- a/mozilla/xpfe/browser/resources/locale/en-US/navigator.properties +++ b/mozilla/xpfe/browser/resources/locale/en-US/navigator.properties @@ -20,7 +20,7 @@ intl.accept_languages=en-us intl.charsetmenu.browser.static=iso-8859-1 intl.charsetmenu.browser.more1=iso-8859-1, iso-8859-15, ibm850, x-mac-roman, windows-1252, iso-8859-14, iso-8859-7, x-mac-greek, windows-1253, x-mac-icelandic, iso-8859-10, iso-8859-3 intl.charsetmenu.browser.more2=iso-8859-4, iso-8859-13, windows-1257, ibm852, iso-8859-2, x-mac-ce, windows-1250, x-mac-croatian, ibm855, iso-8859-5, iso-ir-111, koi8-r, x-mac-cyrillic, windows-1251, ibm866, koi8-u, x-mac-ukrainian, x-mac-romanian -intl.charsetmenu.browser.more3=gb2312, x-gbk, gb18030, hz-gb-2312, big5, big5-hkscs, x-euc-tw, euc-jp, iso-2022-jp, shift_jis, euc-kr +intl.charsetmenu.browser.more3=gb2312, x-gbk, gb18030, hz-gb-2312, big5, big5-hkscs, x-euc-tw, euc-jp, iso-2022-jp, shift_jis, euc-kr, x-windows-949, x-johab intl.charsetmenu.browser.more4=armscii-8, tis-620, ibm857, iso-8859-9, x-mac-turkish, windows-1254, x-viet-tcvn5712, viscii, x-viet-vps, windows-1258, x-mac-devanagari, x-mac-gujarati, x-mac-gurmukhi intl.charsetmenu.browser.more5=iso-8859-6, windows-1256, ibm864, x-mac-arabic, x-mac-farsi, iso-8859-8-i, windows-1255, iso-8859-8, ibm862, x-mac-hebrew intl.charset.default=ISO-8859-1