- Add a prettyprint flag to the html-to-text sink (false by default).
When prettyprinting is on, we will attempt to make the plaintext mimic the look of the html; when off (as for copy/paste), we just dump the text with no formatting information. git-svn-id: svn://10.0.0.236/trunk@36966 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
@@ -61,12 +61,8 @@ static PRBool IsBlockLevel(eHTMLTags aTag);
|
||||
*/
|
||||
nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
|
||||
{
|
||||
|
||||
|
||||
|
||||
nsresult res = NS_OK;
|
||||
|
||||
|
||||
// If the converter is ucs2, then do not use a converter
|
||||
nsString ucs2("ucs2");
|
||||
if (aCharset.Equals(ucs2))
|
||||
@@ -114,8 +110,6 @@ nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called as part of our COM-like interfaces.
|
||||
* Its purpose is to create an interface to parser object
|
||||
@@ -163,13 +157,15 @@ NS_IMPL_RELEASE(nsHTMLToTXTSinkStream)
|
||||
NS_HTMLPARS nsresult
|
||||
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
|
||||
nsIOutputStream* aStream,
|
||||
const nsString* aCharsetOverride) {
|
||||
|
||||
const nsString* aCharsetOverride,
|
||||
PRBool aPrettyPrint)
|
||||
{
|
||||
NS_ASSERTION(aStream != nsnull, "a valid stream is required");
|
||||
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(aStream,nsnull);
|
||||
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(aStream, nsnull);
|
||||
if (nsnull == it) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
it->DoPrettyPrint(aPrettyPrint);
|
||||
if (aCharsetOverride != nsnull)
|
||||
it->SetCharsetOverride(aCharsetOverride);
|
||||
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
|
||||
@@ -184,27 +180,29 @@ NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
|
||||
*/
|
||||
NS_HTMLPARS nsresult
|
||||
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
|
||||
nsString* aString) {
|
||||
|
||||
nsString* aString, PRBool aPrettyPrint)
|
||||
{
|
||||
NS_ASSERTION(aString != nsnull, "a valid stream is required");
|
||||
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull,aString);
|
||||
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull, aString);
|
||||
if (nsnull == it) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
it->DoPrettyPrint(aPrettyPrint);
|
||||
nsString ucs2("ucs2");
|
||||
it->SetCharsetOverride(&ucs2);
|
||||
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Construct a content sink stream.
|
||||
* @update gpk02/03/99
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString* aString) {
|
||||
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream,
|
||||
nsString* aString)
|
||||
{
|
||||
NS_INIT_REFCNT();
|
||||
mStream = aStream;
|
||||
mColPos = 0;
|
||||
@@ -216,21 +214,30 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString*
|
||||
mUnicodeEncoder = nsnull;
|
||||
mStream = aStream;
|
||||
mString = aString;
|
||||
mPrettyPrint = PR_FALSE;
|
||||
mPreformatted = PR_FALSE;
|
||||
mWrapColumn = 72; // XXX magic number, obviously needs to be settable
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gpk02/03/99
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
|
||||
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream()
|
||||
{
|
||||
delete [] mBuffer;
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::DoPrettyPrint(PRBool aDoPrettyPrint)
|
||||
{
|
||||
mPrettyPrint = aDoPrettyPrint;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
@@ -580,7 +587,10 @@ nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
|
||||
* @return
|
||||
*/
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode){
|
||||
nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode)
|
||||
{
|
||||
// Skip comments in plaintext output
|
||||
mDoOutput = PR_FALSE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@@ -594,7 +604,8 @@ nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode){
|
||||
* @return PR_TRUE if successful.
|
||||
*/
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
|
||||
{
|
||||
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
@@ -617,6 +628,23 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
|
||||
if (type == eHTMLTag_body)
|
||||
mDoOutput = PR_TRUE;
|
||||
|
||||
else if (mDoOutput && type == eHTMLTag_li)
|
||||
{
|
||||
nsString temp("*");
|
||||
Write(temp);
|
||||
mColPos++;
|
||||
}
|
||||
else if (type == eHTMLTag_pre)
|
||||
{
|
||||
mPreformatted = PR_TRUE;
|
||||
nsString temp(NS_LINEBREAK);
|
||||
Write(temp);
|
||||
mColPos = 0;
|
||||
}
|
||||
else if (type == eHTMLTag_blockquote)
|
||||
mIndent += gTabSize;
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@@ -629,26 +657,40 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
* @return PR_TRUE if successful.
|
||||
*/
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
|
||||
nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode)
|
||||
{
|
||||
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
||||
//const nsString& name = aNode.GetText();
|
||||
|
||||
if (type == eHTMLTag_body)
|
||||
mDoOutput = PR_FALSE;
|
||||
|
||||
else if (type == eHTMLTag_comment)
|
||||
{
|
||||
mDoOutput = PR_TRUE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
else if (type == eHTMLTag_pre)
|
||||
mPreformatted = PR_FALSE;
|
||||
|
||||
else if (type == eHTMLTag_blockquote)
|
||||
mIndent -= gTabSize;
|
||||
|
||||
if (IsBlockLevel(type))
|
||||
{
|
||||
if (mColPos != 0)
|
||||
{
|
||||
nsString temp("\n");
|
||||
Write(temp);
|
||||
mColPos = 0;
|
||||
{
|
||||
if (mPrettyPrint)
|
||||
{
|
||||
nsString temp(NS_LINEBREAK);
|
||||
Write(temp);
|
||||
mColPos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method is used to add a leaf to the currently
|
||||
* open container.
|
||||
@@ -658,7 +700,8 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
|
||||
* @return PR_TRUE if successful.
|
||||
*/
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
|
||||
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode)
|
||||
{
|
||||
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
||||
|
||||
nsString text = aNode.GetText();
|
||||
@@ -666,9 +709,22 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
|
||||
if (mDoOutput == PR_FALSE)
|
||||
return NS_OK;
|
||||
|
||||
if (type == eHTMLTag_text) {
|
||||
Write(text);
|
||||
mColPos += text.Length();
|
||||
if (type == eHTMLTag_text)
|
||||
{
|
||||
if (mColPos > mIndent)
|
||||
{
|
||||
nsString temp(" ");
|
||||
Write(temp);
|
||||
mColPos++;
|
||||
}
|
||||
|
||||
if (mPrettyPrint)
|
||||
WriteWrapped(text);
|
||||
else
|
||||
{
|
||||
Write(text);
|
||||
mColPos += text.Length();
|
||||
}
|
||||
}
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
@@ -682,32 +738,113 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
|
||||
|
||||
mColPos++;
|
||||
}
|
||||
else if (type == eHTMLTag_whitespace)
|
||||
else if (type == eHTMLTag_br)
|
||||
{
|
||||
if (PR_TRUE)
|
||||
if (mPrettyPrint)
|
||||
{
|
||||
nsString temp (NS_LINEBREAK);
|
||||
Write(temp);
|
||||
mColPos = 0;
|
||||
}
|
||||
}
|
||||
// The only time we want to pass along whitespace from the original
|
||||
// html source is if we're prettyprinting and we're inside a <pre>.
|
||||
// Otherwise, either we're collapsing to minimal text, or we're
|
||||
// prettyprinting to mimic the html format, and in neither case
|
||||
// does the formatting of the html source help us.
|
||||
else if (mPrettyPrint && mPreformatted && type == eHTMLTag_whitespace)
|
||||
{
|
||||
if (mPrettyPrint)
|
||||
{
|
||||
text = aNode.GetText();
|
||||
Write(text);
|
||||
mColPos += text.Length();
|
||||
}
|
||||
}
|
||||
else if (type == eHTMLTag_br)
|
||||
else if (mPrettyPrint && mPreformatted && type == eHTMLTag_newline)
|
||||
{
|
||||
nsString temp("\n");
|
||||
nsString temp(NS_LINEBREAK);
|
||||
Write(temp);
|
||||
mColPos++;
|
||||
mColPos = 0;
|
||||
}
|
||||
else if (type == eHTMLTag_newline)
|
||||
{
|
||||
nsString temp("\n");
|
||||
Write(text);
|
||||
mColPos++;
|
||||
}
|
||||
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
//
|
||||
// Write a string, wrapping appropriately to mWrapColumn.
|
||||
//
|
||||
void
|
||||
nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
{
|
||||
int totLen = aString.Length();
|
||||
int charsLeft = totLen;
|
||||
while (charsLeft > 0) // Loop over lines
|
||||
{
|
||||
// Indent at the beginning of the line, if necessary
|
||||
if (mColPos == 0 && mIndent > 0)
|
||||
{
|
||||
char* spaces = new char[mIndent+1];
|
||||
for (int i=0; i<mIndent; ++i)
|
||||
spaces[i] = ' ';
|
||||
spaces[mIndent] = '\0';
|
||||
nsString temp(spaces);
|
||||
Write (temp);
|
||||
mColPos += mIndent;
|
||||
delete[] spaces;
|
||||
}
|
||||
|
||||
// Write whatever chunk of the string we can fit:
|
||||
int bol = totLen - charsLeft;
|
||||
int eol = bol + mWrapColumn - mColPos;
|
||||
if (eol > totLen)
|
||||
eol = totLen;
|
||||
else
|
||||
{
|
||||
// We need to wrap, so search backward to find last IsSpace char:
|
||||
int lastSpace = eol;
|
||||
while (lastSpace > bol && !nsString::IsSpace(aString[lastSpace]))
|
||||
--lastSpace;
|
||||
if (lastSpace == bol)
|
||||
{
|
||||
// If we reached the bol, it might just be because we were close
|
||||
// to the end already and should have wrapped last time.
|
||||
// In that case, write a linebreak and come around again.
|
||||
if (mColPos > mIndent)
|
||||
{
|
||||
nsAutoString linebreak(NS_LINEBREAK);
|
||||
Write(linebreak);
|
||||
mColPos = 0;
|
||||
continue;
|
||||
}
|
||||
// Else apparently we really can't break this line at whitespace --
|
||||
// so scan forward to the next space (if any) and dump a long line:
|
||||
while (eol > totLen && !nsString::IsSpace(aString[lastSpace]))
|
||||
++eol;
|
||||
}
|
||||
else if (lastSpace > bol && lastSpace < eol)
|
||||
eol = lastSpace+1;
|
||||
#ifdef DEBUG_akkana
|
||||
else
|
||||
printf("Wrapping: bol = %d, eol = %d, lastSpace = %d, totLen = %d\n",
|
||||
bol, eol, lastSpace, totLen);
|
||||
#endif
|
||||
}
|
||||
|
||||
nsAutoString lineStr;
|
||||
aString.Mid(lineStr, bol, eol-bol);
|
||||
|
||||
if (eol != totLen) // we're wrapping
|
||||
{
|
||||
lineStr.Append(NS_LINEBREAK);
|
||||
mColPos = 0;
|
||||
}
|
||||
else
|
||||
mColPos += lineStr.Length();
|
||||
Write(lineStr);
|
||||
charsLeft = totLen - eol;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called when the parser begins the process
|
||||
@@ -804,6 +941,7 @@ PRBool IsInline(eHTMLTags aTag)
|
||||
case eHTMLTag_td:
|
||||
case eHTMLTag_textarea:
|
||||
case eHTMLTag_tt:
|
||||
case eHTMLTag_u:
|
||||
case eHTMLTag_var:
|
||||
case eHTMLTag_wbr:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user