- Add a prettyprint flag to the html-to-text sink (false by default).

When prettyprinting is on, we will attempt to make the plaintext
  mimic the look of the html; when off (as for copy/paste), we just
  dump the text with no formatting information.


git-svn-id: svn://10.0.0.236/trunk@36966 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
akkana%netscape.com
1999-06-26 00:01:11 +00:00
parent 1ae05240c6
commit 2300ca567a
7 changed files with 508 additions and 240 deletions

View File

@@ -61,12 +61,8 @@ static PRBool IsBlockLevel(eHTMLTags aTag);
*/
nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
{
nsresult res = NS_OK;
// If the converter is ucs2, then do not use a converter
nsString ucs2("ucs2");
if (aCharset.Equals(ucs2))
@@ -114,8 +110,6 @@ nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
}
/**
* This method gets called as part of our COM-like interfaces.
* Its purpose is to create an interface to parser object
@@ -163,13 +157,15 @@ NS_IMPL_RELEASE(nsHTMLToTXTSinkStream)
NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
nsIOutputStream* aStream,
const nsString* aCharsetOverride) {
const nsString* aCharsetOverride,
PRBool aPrettyPrint)
{
NS_ASSERTION(aStream != nsnull, "a valid stream is required");
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(aStream,nsnull);
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(aStream, nsnull);
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
it->DoPrettyPrint(aPrettyPrint);
if (aCharsetOverride != nsnull)
it->SetCharsetOverride(aCharsetOverride);
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
@@ -184,27 +180,29 @@ NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
*/
NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
nsString* aString) {
nsString* aString, PRBool aPrettyPrint)
{
NS_ASSERTION(aString != nsnull, "a valid stream is required");
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull,aString);
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull, aString);
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
it->DoPrettyPrint(aPrettyPrint);
nsString ucs2("ucs2");
it->SetCharsetOverride(&ucs2);
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString* aString) {
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream,
nsString* aString)
{
NS_INIT_REFCNT();
mStream = aStream;
mColPos = 0;
@@ -216,21 +214,30 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString*
mUnicodeEncoder = nsnull;
mStream = aStream;
mString = aString;
mPrettyPrint = PR_FALSE;
mPreformatted = PR_FALSE;
mWrapColumn = 72; // XXX magic number, obviously needs to be settable
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream()
{
delete [] mBuffer;
NS_IF_RELEASE(mUnicodeEncoder);
}
NS_IMETHODIMP
nsHTMLToTXTSinkStream::DoPrettyPrint(PRBool aDoPrettyPrint)
{
mPrettyPrint = aDoPrettyPrint;
return NS_OK;
}
/**
*
@@ -580,7 +587,10 @@ nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
* @return
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode){
nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode)
{
// Skip comments in plaintext output
mDoOutput = PR_FALSE;
return NS_OK;
}
@@ -594,7 +604,8 @@ nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode){
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
if (name.Equals("XIF_DOC_INFO"))
@@ -617,6 +628,23 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
if (type == eHTMLTag_body)
mDoOutput = PR_TRUE;
else if (mDoOutput && type == eHTMLTag_li)
{
nsString temp("*");
Write(temp);
mColPos++;
}
else if (type == eHTMLTag_pre)
{
mPreformatted = PR_TRUE;
nsString temp(NS_LINEBREAK);
Write(temp);
mColPos = 0;
}
else if (type == eHTMLTag_blockquote)
mIndent += gTabSize;
return NS_OK;
}
@@ -629,26 +657,40 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
//const nsString& name = aNode.GetText();
if (type == eHTMLTag_body)
mDoOutput = PR_FALSE;
else if (type == eHTMLTag_comment)
{
mDoOutput = PR_TRUE;
return NS_OK;
}
else if (type == eHTMLTag_pre)
mPreformatted = PR_FALSE;
else if (type == eHTMLTag_blockquote)
mIndent -= gTabSize;
if (IsBlockLevel(type))
{
if (mColPos != 0)
{
nsString temp("\n");
Write(temp);
mColPos = 0;
{
if (mPrettyPrint)
{
nsString temp(NS_LINEBREAK);
Write(temp);
mColPos = 0;
}
}
}
return NS_OK;
}
/**
* This method is used to add a leaf to the currently
* open container.
@@ -658,7 +700,8 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
nsString text = aNode.GetText();
@@ -666,9 +709,22 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
if (mDoOutput == PR_FALSE)
return NS_OK;
if (type == eHTMLTag_text) {
Write(text);
mColPos += text.Length();
if (type == eHTMLTag_text)
{
if (mColPos > mIndent)
{
nsString temp(" ");
Write(temp);
mColPos++;
}
if (mPrettyPrint)
WriteWrapped(text);
else
{
Write(text);
mColPos += text.Length();
}
}
else if (type == eHTMLTag_entity)
{
@@ -682,32 +738,113 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
mColPos++;
}
else if (type == eHTMLTag_whitespace)
else if (type == eHTMLTag_br)
{
if (PR_TRUE)
if (mPrettyPrint)
{
nsString temp (NS_LINEBREAK);
Write(temp);
mColPos = 0;
}
}
// The only time we want to pass along whitespace from the original
// html source is if we're prettyprinting and we're inside a <pre>.
// Otherwise, either we're collapsing to minimal text, or we're
// prettyprinting to mimic the html format, and in neither case
// does the formatting of the html source help us.
else if (mPrettyPrint && mPreformatted && type == eHTMLTag_whitespace)
{
if (mPrettyPrint)
{
text = aNode.GetText();
Write(text);
mColPos += text.Length();
}
}
else if (type == eHTMLTag_br)
else if (mPrettyPrint && mPreformatted && type == eHTMLTag_newline)
{
nsString temp("\n");
nsString temp(NS_LINEBREAK);
Write(temp);
mColPos++;
mColPos = 0;
}
else if (type == eHTMLTag_newline)
{
nsString temp("\n");
Write(text);
mColPos++;
}
return NS_OK;
}
//
// Write a string, wrapping appropriately to mWrapColumn.
//
void
nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
{
int totLen = aString.Length();
int charsLeft = totLen;
while (charsLeft > 0) // Loop over lines
{
// Indent at the beginning of the line, if necessary
if (mColPos == 0 && mIndent > 0)
{
char* spaces = new char[mIndent+1];
for (int i=0; i<mIndent; ++i)
spaces[i] = ' ';
spaces[mIndent] = '\0';
nsString temp(spaces);
Write (temp);
mColPos += mIndent;
delete[] spaces;
}
// Write whatever chunk of the string we can fit:
int bol = totLen - charsLeft;
int eol = bol + mWrapColumn - mColPos;
if (eol > totLen)
eol = totLen;
else
{
// We need to wrap, so search backward to find last IsSpace char:
int lastSpace = eol;
while (lastSpace > bol && !nsString::IsSpace(aString[lastSpace]))
--lastSpace;
if (lastSpace == bol)
{
// If we reached the bol, it might just be because we were close
// to the end already and should have wrapped last time.
// In that case, write a linebreak and come around again.
if (mColPos > mIndent)
{
nsAutoString linebreak(NS_LINEBREAK);
Write(linebreak);
mColPos = 0;
continue;
}
// Else apparently we really can't break this line at whitespace --
// so scan forward to the next space (if any) and dump a long line:
while (eol > totLen && !nsString::IsSpace(aString[lastSpace]))
++eol;
}
else if (lastSpace > bol && lastSpace < eol)
eol = lastSpace+1;
#ifdef DEBUG_akkana
else
printf("Wrapping: bol = %d, eol = %d, lastSpace = %d, totLen = %d\n",
bol, eol, lastSpace, totLen);
#endif
}
nsAutoString lineStr;
aString.Mid(lineStr, bol, eol-bol);
if (eol != totLen) // we're wrapping
{
lineStr.Append(NS_LINEBREAK);
mColPos = 0;
}
else
mColPos += lineStr.Length();
Write(lineStr);
charsLeft = totLen - eol;
}
}
/**
* This method gets called when the parser begins the process
@@ -804,6 +941,7 @@ PRBool IsInline(eHTMLTags aTag)
case eHTMLTag_td:
case eHTMLTag_textarea:
case eHTMLTag_tt:
case eHTMLTag_u:
case eHTMLTag_var:
case eHTMLTag_wbr: