must go first:
$text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S) ) }
{
my $result = _has_multiple_underscores($2) ? $1 : "$2";
$result;
}gsxe;
$text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{$1}gsx;
$text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (?!\S) ) }
{
my $result = _has_multiple_underscores($2) ? $1 : "$2";
$result;
}gsxe;
$text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{$1}gsx;
# And now, a second pass to catch nested strong and emphasis special cases
$text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (\S*) ) }
{
my $result = _has_multiple_underscores($3) ? $1 : "$2$3";
$result;
}gsxe;
$text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{$1}gsx;
$text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (\S*) ) }
{
my $result = _has_multiple_underscores($3) ? $1 : "$2$3";
$result;
}gsxe;
$text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{$1}gsx;
return $text;
}
sub _DoStrikethroughs {
my ($self, $text) = @_;
$text =~ s{ ^ ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{$1}gsx;
$text =~ s{ (?<=_|[^~\w]) ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{$1}gsx;
return $text;
}
# The original _DoCodeSpans() uses the 's' modifier in its regex
# which prevents _DoCodeBlocks() to match GFM fenced code blocks.
# We copy the code from the original implementation and remove the
# 's' modifier from it.
sub _DoCodeSpans {
my ($self, $text) = @_;
$text =~ s@
(?_EncodeCode($c);
"$c";
@egx;
return $text;
}
# Override to add GFM Fenced Code Blocks
sub _DoCodeBlocks {
my ($self, $text) = @_;
$text =~ s{
^ `{3,} [\s\t]* \n
( # $1 = the entire code block
(?: .* \n+)+?
)
`{3,} [\s\t]* $
}{
my $codeblock = $1;
my $result;
$codeblock = $self->_EncodeCode($codeblock);
$codeblock = $self->_Detab($codeblock);
$codeblock =~ s/\n\z//; # remove the trailing newline
$result = "\n\n" . $codeblock . "
\n\n";
$result;
}egmx;
# And now do the standard code blocks
$text = $self->SUPER::_DoCodeBlocks($text);
return $text;
}
sub _DoBlockQuotes {
my ($self, $text) = @_;
$text =~ s{
( # Wrap whole match in $1
(?:
^[ \t]*>[ \t]? # '>' at the start of a line
.+\n # rest of the first line
(?:.+\n)* # subsequent consecutive lines
\n* # blanks
)+
)
}{
my $bq = $1;
$bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
$bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
$bq = $self->_RunBlockGamut($bq, {wrap_in_p_tags => 1}); # recurse
$bq =~ s/^/ /mg;
# These leading spaces screw with content, so we need to fix that:
$bq =~ s{(\s*.+?
)}{
my $pre = $1;
$pre =~ s/^ //mg;
$pre;
}egs;
"\n$bq\n
\n\n";
}egmx;
return $text;
}
sub _EncodeCode {
my ($self, $text) = @_;
# We need to unescape the escaped HTML characters in code blocks.
# These are the reverse of the escapings done in Bugzilla::Util::html_quote()
$text =~ s/<//g;
$text =~ s/"/"/g;
$text =~ s/@/@/g;
# '&' substitution must be the last one, otherwise a literal like '>'
# will turn to '>' because '&' is already changed to '&' in Bugzilla::Util::html_quote().
# In other words, html_quote() will change '>' to '>' and then we will
# change '>' -> '>' -> '>' if we write this substitution as the first one.
$text =~ s/&/&/g;
$text =~ s{ \1 }{$1}xmgi;
$text = $self->SUPER::_EncodeCode($text);
$text =~ s/~/$g_escape_table{'~'}/go;
# Encode '<' to prevent URLs from getting linkified in code spans
$text =~ s/</$g_escape_table{'<'}/go;
return $text;
}
sub _EncodeBackslashEscapes {
my ($self, $text) = @_;
$text = $self->SUPER::_EncodeBackslashEscapes($text);
$text =~ s/\\~/$g_escape_table{'~'}/go;
return $text;
}
sub _UnescapeSpecialChars {
my ($self, $text) = @_;
$text = $self->SUPER::_UnescapeSpecialChars($text);
$text =~ s/$g_escape_table{'~'}/~/go;
$text =~ s/$g_escape_table{'<'}/</go;
return $text;
}
# Check if the passed string is of the form multiple_underscores_in_a_word.
# To check that, we first need to make sure that the string does not contain
# any white-space. Then, if the string is composed of non-space chunks which
# are bound together with underscores, the string has the desired form.
sub _has_multiple_underscores {
my $string = shift;
return 0 unless defined($string) && length($string);
return 0 if $string =~ /[\t\s]+/;
return 1 if scalar (split /_/, $string) > 1;
return 0;
}
1;
__END__
=head1 NAME
Bugzilla::Markdown - Generates HTML output from structured plain-text input.
=head1 SYNOPSIS
use Bugzilla::Markdown;
my $markdown = Bugzilla::Markdown->new();
print $markdown->markdown($text);
=head1 DESCRIPTION
Bugzilla::Markdown implements a Markdown engine that produces
an HTML-based output from a given plain-text input.
The majority of the implementation is done by C
CPAN module. It also applies the linkifications done in L
to the input resulting in an output which is a combination of both Markdown
structures and those defined by Bugzilla itself.
=head2 Accessors
=over
=item C
C Produces an HTML-based output string based on the structures
and format defined in the given plain-text input.
=over
=item B
=over
=item C
C A plain-text string which includes Markdown structures.
=back
=back
=back