Display URLs containing unassigned Unicode code points in punycode. Bug 479413, r=jduell, sr=jst, a1.9.0.10=dveditz

git-svn-id: svn://10.0.0.236/trunk@256941 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
smontagu%smontagu.org 2009-04-17 07:13:00 +00:00
parent 37880521fa
commit d8c919a77c
3 changed files with 132 additions and 25 deletions

View File

@ -1,4 +1,4 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK ***** /* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
* *
@ -163,6 +163,11 @@ nsIDNService::~nsIDNService()
/* ACString ConvertUTF8toACE (in AUTF8String input); */ /* ACString ConvertUTF8toACE (in AUTF8String input); */
NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace) NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
{
return UTF8toACE(input, ace, PR_TRUE);
}
nsresult nsIDNService::UTF8toACE(const nsACString & input, nsACString & ace, PRBool allowUnassigned)
{ {
nsresult rv; nsresult rv;
NS_ConvertUTF8toUTF16 ustr(input); NS_ConvertUTF8toUTF16 ustr(input);
@ -185,7 +190,8 @@ NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACStrin
while (start != end) { while (start != end) {
len++; len++;
if (*start++ == (PRUnichar)'.') { if (*start++ == (PRUnichar)'.') {
rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf); rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf,
allowUnassigned);
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
ace.Append(encodedBuf); ace.Append(encodedBuf);
@ -200,7 +206,8 @@ NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACStrin
ace.AppendLiteral("mltbd."); ace.AppendLiteral("mltbd.");
// encode the last node if non ASCII // encode the last node if non ASCII
if (len) { if (len) {
rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf); rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf,
allowUnassigned);
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
ace.Append(encodedBuf); ace.Append(encodedBuf);
@ -211,6 +218,12 @@ NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACStrin
/* AUTF8String convertACEtoUTF8(in ACString input); */ /* AUTF8String convertACEtoUTF8(in ACString input); */
NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval) NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
{
return ACEtoUTF8(input, _retval, PR_TRUE);
}
nsresult nsIDNService::ACEtoUTF8(const nsACString & input, nsACString & _retval,
PRBool allowUnassigned)
{ {
// RFC 3490 - 4.2 ToUnicode // RFC 3490 - 4.2 ToUnicode
// ToUnicode never fails. If any step fails, then the original input // ToUnicode never fails. If any step fails, then the original input
@ -233,7 +246,8 @@ NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACStrin
while (start != end) { while (start != end) {
len++; len++;
if (*start++ == '.') { if (*start++ == '.') {
if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf))) { if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf,
allowUnassigned))) {
_retval.Assign(input); _retval.Assign(input);
return NS_OK; return NS_OK;
} }
@ -246,7 +260,8 @@ NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACStrin
} }
// decode the last node // decode the last node
if (len) { if (len) {
if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf))) if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf,
allowUnassigned)))
_retval.Assign(input); _retval.Assign(input);
else else
_retval.Append(decodedBuf); _retval.Append(decodedBuf);
@ -295,7 +310,7 @@ NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & out
while (start != end) { while (start != end) {
len++; len++;
if (*start++ == PRUnichar('.')) { if (*start++ == PRUnichar('.')) {
rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel); rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel, PR_TRUE);
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
outUTF16.Append(outLabel); outUTF16.Append(outLabel);
@ -305,7 +320,7 @@ NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & out
} }
} }
if (len) { if (len) {
rv = stringPrep(Substring(inUTF16, offset, len), outLabel); rv = stringPrep(Substring(inUTF16, offset, len), outLabel, PR_TRUE);
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
outUTF16.Append(outLabel); outUTF16.Append(outLabel);
@ -334,9 +349,9 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, PRBool
IsACE(_retval, &isACE); IsACE(_retval, &isACE);
if (isACE && !mShowPunycode && isInWhitelist(_retval)) { if (isACE && !mShowPunycode && isInWhitelist(_retval)) {
// ConvertACEtoUTF8() can't fail, but might return the original ACE string // ACEtoUTF8() can't fail, but might return the original ACE string
nsCAutoString temp(_retval); nsCAutoString temp(_retval);
ConvertACEtoUTF8(temp, _retval); ACEtoUTF8(temp, _retval, PR_FALSE);
*_isASCII = IsASCII(_retval); *_isASCII = IsASCII(_retval);
} else { } else {
*_isASCII = PR_TRUE; *_isASCII = PR_TRUE;
@ -492,7 +507,12 @@ static nsresult encodeToRACE(const char* prefix, const nsAString& in, nsACString
// for bidirectional strings. If the string does not satisfy the requirements // for bidirectional strings. If the string does not satisfy the requirements
// for bidirectional strings, return an error. This is described in section 6. // for bidirectional strings, return an error. This is described in section 6.
// //
nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out) // 5) Check unassigned code points -- If allowUnassigned is false, check for
// any unassigned Unicode points and if any are found return an error.
// This is described in section 7.
//
nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
PRBool allowUnassigned)
{ {
if (!mNamePrepHandle || !mNormalizer) if (!mNamePrepHandle || !mNormalizer)
return NS_ERROR_FAILURE; return NS_ERROR_FAILURE;
@ -534,6 +554,14 @@ nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out)
if (idn_err != idn_success || found) if (idn_err != idn_success || found)
return NS_ERROR_FAILURE; return NS_ERROR_FAILURE;
if (!allowUnassigned) {
// check unassigned code points
idn_err = idn_nameprep_isunassigned(mNamePrepHandle,
(const PRUint32 *) ucs4Buf, &found);
if (idn_err != idn_success || found)
return NS_ERROR_FAILURE;
}
// set the result string // set the result string
out.Assign(normlizedStr); out.Assign(normlizedStr);
@ -550,7 +578,8 @@ nsresult nsIDNService::encodeToACE(const nsAString& in, nsACString& out)
return punycode(mACEPrefix, in, out); return punycode(mACEPrefix, in, out);
} }
nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out) nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
PRBool allowUnassigned)
{ {
nsresult rv = NS_OK; nsresult rv = NS_OK;
@ -565,7 +594,7 @@ nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out)
LossyCopyUTF16toASCII(in, out); LossyCopyUTF16toASCII(in, out);
else { else {
nsAutoString strPrep; nsAutoString strPrep;
rv = stringPrep(in, strPrep); rv = stringPrep(in, strPrep, allowUnassigned);
if (NS_SUCCEEDED(rv)) { if (NS_SUCCEEDED(rv)) {
if (IsASCII(strPrep)) if (IsASCII(strPrep))
LossyCopyUTF16toASCII(strPrep, out); LossyCopyUTF16toASCII(strPrep, out);
@ -610,7 +639,8 @@ void nsIDNService::normalizeFullStops(nsAString& s)
} }
} }
nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out) nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
PRBool allowUnassigned)
{ {
PRBool isAce; PRBool isAce;
IsACE(in, &isAce); IsACE(in, &isAce);
@ -646,7 +676,7 @@ nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
// Validation: encode back to ACE and compare the strings // Validation: encode back to ACE and compare the strings
nsCAutoString ace; nsCAutoString ace;
nsresult rv = ConvertUTF8toACE(out, ace); nsresult rv = UTF8toACE(out, ace, allowUnassigned);
NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_SUCCESS(rv, rv);
if (!ace.Equals(in, nsCaseInsensitiveCStringComparator())) if (!ace.Equals(in, nsCaseInsensitiveCStringComparator()))
@ -658,8 +688,14 @@ nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
PRBool nsIDNService::isInWhitelist(const nsACString &host) PRBool nsIDNService::isInWhitelist(const nsACString &host)
{ {
if (mIDNWhitelistPrefBranch) { if (mIDNWhitelistPrefBranch) {
// truncate trailing dots first
nsCAutoString tld(host); nsCAutoString tld(host);
// make sure the host is ACE for lookup and check that there are no
// unassigned codepoints
if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, PR_FALSE))) {
return PR_FALSE;
}
// truncate trailing dots first
tld.Trim("."); tld.Trim(".");
PRInt32 pos = tld.RFind("."); PRInt32 pos = tld.RFind(".");
if (pos == kNotFound) if (pos == kNotFound)
@ -667,11 +703,6 @@ PRBool nsIDNService::isInWhitelist(const nsACString &host)
tld.Cut(0, pos + 1); tld.Cut(0, pos + 1);
// make sure the TLD is ACE for lookup.
if (!IsASCII(tld) &&
NS_FAILED(ConvertUTF8toACE(tld, tld)))
return PR_FALSE;
PRBool safe; PRBool safe;
if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe))) if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe)))
return safe; return safe;

View File

@ -1,4 +1,4 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK ***** /* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
* *
@ -71,14 +71,21 @@ public:
private: private:
void normalizeFullStops(nsAString& s); void normalizeFullStops(nsAString& s);
nsresult stringPrepAndACE(const nsAString& in, nsACString& out); nsresult stringPrepAndACE(const nsAString& in, nsACString& out,
PRBool allowUnassigned);
nsresult encodeToACE(const nsAString& in, nsACString& out); nsresult encodeToACE(const nsAString& in, nsACString& out);
nsresult stringPrep(const nsAString& in, nsAString& out); nsresult stringPrep(const nsAString& in, nsAString& out,
nsresult decodeACE(const nsACString& in, nsACString& out); PRBool allowUnassigned);
nsresult decodeACE(const nsACString& in, nsACString& out,
PRBool allowUnassigned);
nsresult UTF8toACE(const nsACString& in, nsACString& out,
PRBool allowUnassigned);
nsresult ACEtoUTF8(const nsACString& in, nsACString& out,
PRBool allowUnassigned);
PRBool isInWhitelist(const nsACString &host); PRBool isInWhitelist(const nsACString &host);
void prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref); void prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref);
PRBool mMultilingualTestBed; // if true generates extra node for mulitlingual testbed PRBool mMultilingualTestBed; // if true generates extra node for multilingual testbed
idn_nameprep_t mNamePrepHandle; idn_nameprep_t mNamePrepHandle;
nsCOMPtr<nsIUnicodeNormalizer> mNormalizer; nsCOMPtr<nsIUnicodeNormalizer> mNormalizer;
char mACEPrefix[kACEPrefixLen+1]; char mACEPrefix[kACEPrefixLen+1];

View File

@ -0,0 +1,69 @@
/**
* Test for unassigned code points in IDNs (RFC 3454 section 7)
*/
const Cc = Components.classes;
const Ci = Components.interfaces;
var idnService;
function expected_pass(inputIDN)
{
var isASCII = {};
var displayIDN = idnService.convertToDisplayIDN(inputIDN, isASCII);
do_check_eq(displayIDN, inputIDN);
}
function expected_fail(inputIDN)
{
var isASCII = {};
var displayIDN = "";
try {
displayIDN = idnService.convertToDisplayIDN(inputIDN, isASCII);
}
catch(e) {}
do_check_neq(displayIDN, inputIDN);
}
function run_test() {
// add an IDN whitelist pref
var pbi = Cc["@mozilla.org/preferences-service;1"]
.getService(Ci.nsIPrefBranch2);
var whitelistPref = "network.IDN.whitelist.com";
var savedPrefValue = false;
var prefExists = false;
try {
savedPrefValue = pbi.getBoolPref(whitelistPref);
prefExists = true;
} catch(e) { }
pbi.setBoolPref(whitelistPref, true);
idnService = Cc["@mozilla.org/network/idn-service;1"]
.getService(Ci.nsIIDNService);
// assigned code point
expected_pass("foo\u0101bar.com");
// assigned code point in punycode. Should *fail* because the URL will be
// converted to Unicode for display
expected_fail("xn--foobar-5za.com");
// unassigned code point
expected_fail("foo\u3040bar.com");
// unassigned code point in punycode. Should *pass* because the URL will not
// be converted to Unicode
expected_pass("xn--foobar-533e.com");
// code point assigned since Unicode 3.0
// XXX This test will unexpectedly pass when we update to IDNAbis
expected_fail("foo\u0370bar.com");
// reset the pref
if (prefExists)
pbi.setBoolPref(whitelistPref, savedPrefValue);
else
pbi.clearUserPref(whitelistPref);
}