Bug 363153: Turn on the utf8 bit on all strings in Bugzilla that contain

non-ASCII data, if the utf8 parameter is on. This means that string functions like substr() work properly on multi-byte languages, now. Patch By Max Kanat-Alexander <mkanat@bugzilla.org> r=wurblzap, a=mkanat git-svn-id: svn://10.0.0.236/trunk@239856 18797224-902f-48f8-a5cc-f745e15eee43
2007-11-23 06:58:36 +00:00 · 2007-11-23 06:58:36 +00:00 · eff047bd4f
commit eff047bd4f
parent 83d7299fb2
8 changed files with 44 additions and 10 deletions
--- a/mozilla/webtools/bugzilla/Bugzilla.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla.pm
@ -81,6 +81,7 @@ use constant SHUTDOWNHTML_EXIT_SILENTLY => [

 # Note that this is a raw subroutine, not a method, so $class isn't available.
 sub init_page {
+    (binmode STDOUT, ':utf8') if Bugzilla->params->{'utf8'};

    # Some environment variables are not taint safe
    delete @::ENV{'PATH', 'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
--- a/mozilla/webtools/bugzilla/Bugzilla/CGI.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla/CGI.pm
@ -233,6 +233,27 @@ sub header {
    return $self->SUPER::header(@_) || "";
 }

+# CGI.pm is not utf8-aware and passes data as bytes instead of UTF-8 strings.
+sub param {
+    my $self = shift;
+    if (Bugzilla->params->{'utf8'} && scalar(@_) == 1) {
+        if (wantarray) {
+            return map { _fix_utf8($_) } $self->SUPER::param(@_);
+        }
+        else {
+            return _fix_utf8(scalar $self->SUPER::param(@_));
+        }
+    }
+    return $self->SUPER::param(@_);
+}
+
+sub _fix_utf8 {
+    my $input = shift;
+    # The is_utf8 is here in case CGI gets smart about utf8 someday.
+    utf8::decode($input) if defined $input && !utf8::is_utf8($input);
+    return $input;
+}
+
 # The various parts of Bugzilla which create cookies don't want to have to
 # pass them around to all of the callers. Instead, store them locally here,
 # and then output as required from |header|.
--- a/mozilla/webtools/bugzilla/Bugzilla/Constants.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla/Constants.pm
@ -382,10 +382,10 @@ use constant DB_MODULE => {
                dbd => { 
                    package => 'DBD-mysql',
                    module  => 'DBD::mysql',
-                    version => '2.9003',
-                    # Certain versions are broken, development versions are
-                    # always disallowed.
-                    blacklist => ['^3\.000[3-6]', '_'],
+                    # Disallow development versions
+                    blacklist => ['_'],
+                    # For UTF-8 support
+                    version => '4.00',
                },
                name => 'MySQL'},
    'pg'    => {db => 'Bugzilla::DB::Pg', db_version => '8.00.0000',
--- a/mozilla/webtools/bugzilla/Bugzilla/DB/Mysql.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla/DB/Mysql.pm
@ -58,8 +58,10 @@ sub new {
    my $dsn = "DBI:mysql:host=$host;database=$dbname";
    $dsn .= ";port=$port" if $port;
    $dsn .= ";mysql_socket=$sock" if $sock;
+
+    my $attrs = { mysql_enable_utf8 => Bugzilla->params->{'utf8'} };
    
-    my $self = $class->db_new($dsn, $user, $pass);
+    my $self = $class->db_new($dsn, $user, $pass, $attrs);

    # This makes sure that if the tables are encoded as UTF-8, we
    # return their data correctly.
--- a/mozilla/webtools/bugzilla/Bugzilla/DB/Pg.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla/DB/Pg.pm
@ -68,7 +68,9 @@ sub new {
    # creating tables.
    $dsn .= ";options='-c client_min_messages=warning'";

-    my $self = $class->db_new($dsn, $user, $pass);
+    my $attrs = { pg_enable_utf8 => Bugzilla->params->{'utf8'} };
+
+    my $self = $class->db_new($dsn, $user, $pass, $attrs);

    # all class local variables stored in DBI derived class needs to have
    # a prefix 'private_'. See DBI documentation.
--- a/mozilla/webtools/bugzilla/Bugzilla/Mailer.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla/Mailer.pm
@ -67,7 +67,9 @@ sub MessageToMTA {
    # Encode the headers correctly in quoted-printable
    foreach my $header qw(From To Cc Reply-To Sender Errors-To Subject) {
        if (my $value = $email->header($header)) {
-            $value = Encode::decode("UTF-8", $value) if Bugzilla->params->{'utf8'};
+            if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($value)) {
+                $value = utf8::decode($value);
+            }
            my $encoded = encode('MIME-Q', $value);
            $email->header_set($header, $encoded);
        }
--- a/mozilla/webtools/bugzilla/Bugzilla/Util.pm
+++ b/mozilla/webtools/bugzilla/Bugzilla/Util.pm
@ -185,6 +185,8 @@ sub html_light_quote {
 # This originally came from CGI.pm, by Lincoln D. Stein
 sub url_quote {
    my ($toencode) = (@_);
+    utf8::encode($toencode) # The below regex works only on bytes
+        if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode);
    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
    return $toencode;
 }
@ -206,6 +208,10 @@ sub xml_quote {
    return $var;
 }

+# This function must not be relied upon to return a valid string to pass to
+# the DB or the user in UTF-8 situations. The only thing you  can rely upon
+# it for is that if you url_decode a string, it will url_encode back to the 
+# exact same thing.
 sub url_decode {
    my ($todecode) = (@_);
    $todecode =~ tr/+/ /;       # pluses become spaces
--- a/mozilla/webtools/bugzilla/email_in.pl
+++ b/mozilla/webtools/bugzilla/email_in.pl
@ -38,7 +38,7 @@ use Email::MIME;
 use Email::MIME::Attachment::Stripper;
 use Getopt::Long qw(:config bundling);
 use Pod::Usage;
-use Encode qw(encode decode);
+use Encode;

 use Bugzilla;
 use Bugzilla::Bug qw(ValidateBugID);
@ -306,8 +306,8 @@ sub get_text_alternative {
        debug_print("Part Character Encoding: $charset", 2);
        if (!$ct || $ct =~ /^text\/plain/i) {
            $body = $part->body;
-            if (Bugzilla->params->{'utf8'}) {
-                $body = encode('UTF-8', decode($charset, $body));
+            if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($body)) {
+                $body = Encode::decode($charset, $body);
            }
            last;
        }