1999-08-28 00:58:57 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
|
|
* The contents of this file are subject to the Netscape Public
|
|
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy of
|
|
|
|
* the License at http://www.mozilla.org/NPL/
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an "AS
|
|
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
|
|
* implied. See the License for the specific language governing
|
|
|
|
* rights and limitations under the License.
|
|
|
|
*
|
|
|
|
* The Original Code is Mozilla Communicator client code, released
|
|
|
|
* March 31, 1998.
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is Netscape
|
|
|
|
* Communications Corporation. Portions created by Netscape are
|
|
|
|
* Copyright (C) 1998-1999 Netscape Communications Corporation. All
|
|
|
|
* Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s): Akkana Peck.
|
|
|
|
*/
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
#include <ctype.h> // for isdigit()
|
|
|
|
|
1999-08-28 00:58:57 +04:00
|
|
|
#include "nsParserCIID.h"
|
|
|
|
#include "nsIParser.h"
|
|
|
|
#include "nsHTMLContentSinkStream.h"
|
|
|
|
#include "nsHTMLToTXTSinkStream.h"
|
|
|
|
#include "nsIComponentManager.h"
|
|
|
|
#include "CNavDTD.h"
|
1999-09-30 00:11:07 +04:00
|
|
|
#include "nsXIFDTD.h"
|
1999-08-28 00:58:57 +04:00
|
|
|
|
|
|
|
extern "C" void NS_SetupRegistry();
|
|
|
|
|
|
|
|
#ifdef XP_PC
|
1999-10-10 05:32:59 +04:00
|
|
|
#define PARSER_DLL "gkparser.dll"
|
1999-08-28 00:58:57 +04:00
|
|
|
#endif
|
|
|
|
#ifdef XP_MAC
|
|
|
|
#endif
|
|
|
|
#if defined(XP_UNIX) || defined(XP_BEOS)
|
2000-07-02 22:44:42 +04:00
|
|
|
#define PARSER_DLL "libhtmlpars"MOZ_DLL_SUFFIX
|
1999-08-28 00:58:57 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
// Class IID's
|
|
|
|
static NS_DEFINE_IID(kParserCID, NS_PARSER_IID);
|
|
|
|
|
|
|
|
// Interface IID's
|
|
|
|
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
|
|
|
|
|
1999-10-02 00:55:01 +04:00
|
|
|
int
|
|
|
|
Compare(nsString& str, nsString& aFileName)
|
1999-09-30 00:11:07 +04:00
|
|
|
{
|
1999-10-02 00:55:01 +04:00
|
|
|
// Open the file in a Unix-centric way,
|
|
|
|
// until I find out how to use nsFileSpec:
|
|
|
|
char* filename = aFileName.ToNewCString();
|
|
|
|
FILE* file = fopen(filename, "r");
|
|
|
|
if (!file)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Can't open file %s", filename);
|
|
|
|
perror(" ");
|
|
|
|
delete[] filename;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
delete[] filename;
|
|
|
|
|
|
|
|
// Inefficiently read from the file:
|
|
|
|
nsString inString;
|
|
|
|
char c;
|
2000-02-25 04:09:37 +03:00
|
|
|
int index = 0;
|
|
|
|
int different = 0;
|
1999-10-02 00:55:01 +04:00
|
|
|
while ((c = getc(file)) != EOF)
|
2000-02-25 04:09:37 +03:00
|
|
|
{
|
2000-04-26 05:13:55 +04:00
|
|
|
inString.AppendWithConversion(c);
|
2000-02-25 04:09:37 +03:00
|
|
|
// CVS isn't doing newline comparisons on these files for some reason.
|
|
|
|
// So compensate for possible newline problems in the CVS file:
|
|
|
|
if (c == '\n' && str[index] == '\r')
|
|
|
|
++index;
|
|
|
|
if (c != str[index++])
|
|
|
|
{
|
|
|
|
//printf("Comparison failed at char %d: generated was %d, file had %d\n",
|
|
|
|
// index, (int)str[index-1], (int)c);
|
|
|
|
different = index;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
1999-10-02 00:55:01 +04:00
|
|
|
if (file != stdin)
|
|
|
|
fclose(file);
|
|
|
|
|
2000-02-25 04:09:37 +03:00
|
|
|
if (!different)
|
1999-10-02 00:55:01 +04:00
|
|
|
return 0;
|
|
|
|
else
|
2000-01-19 03:24:46 +03:00
|
|
|
{
|
2000-02-25 04:09:37 +03:00
|
|
|
char* cstr = str.ToNewUTF8String();
|
|
|
|
printf("Comparison failed at char %d:\n-----\n%s\n-----\n",
|
|
|
|
different, cstr);
|
2000-01-19 03:24:46 +03:00
|
|
|
Recycle(cstr);
|
1999-10-02 00:55:01 +04:00
|
|
|
return 1;
|
2000-01-19 03:24:46 +03:00
|
|
|
}
|
1999-09-30 00:11:07 +04:00
|
|
|
}
|
|
|
|
|
1999-08-28 00:58:57 +04:00
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Convert html on stdin to either plaintext or (if toHTML) html
|
|
|
|
//----------------------------------------------------------------------
|
1999-09-30 00:11:07 +04:00
|
|
|
nsresult
|
1999-10-02 00:55:01 +04:00
|
|
|
HTML2text(nsString& inString, nsString& inType, nsString& outType,
|
|
|
|
int flags, int wrapCol, nsString& compareAgainst)
|
1999-08-28 00:58:57 +04:00
|
|
|
{
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
|
|
|
nsString outString;
|
|
|
|
|
1999-09-15 02:43:18 +04:00
|
|
|
// Create a parser
|
1999-08-28 00:58:57 +04:00
|
|
|
nsIParser* parser;
|
|
|
|
rv = nsComponentManager::CreateInstance(kParserCID, nsnull,
|
|
|
|
kIParserIID,(void**)&parser);
|
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
|
|
|
printf("Unable to create a parser : 0x%x\n", rv);
|
|
|
|
return NS_ERROR_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
nsIHTMLContentSink* sink = nsnull;
|
|
|
|
|
1999-09-15 02:43:18 +04:00
|
|
|
// Create the appropriate output sink
|
2000-04-26 05:13:55 +04:00
|
|
|
if (outType.EqualsWithConversion("text/html"))
|
1999-10-02 00:55:01 +04:00
|
|
|
rv = NS_New_HTML_ContentSinkStream(&sink, &outString, flags);
|
1999-08-28 00:58:57 +04:00
|
|
|
|
|
|
|
else // default to plaintext
|
2000-01-11 02:04:24 +03:00
|
|
|
rv = NS_New_HTMLToTXT_SinkStream(&sink, &outString, wrapCol, flags);
|
1999-08-28 00:58:57 +04:00
|
|
|
|
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
|
|
|
printf("Couldn't create new content sink: 0x%x\n", rv);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
parser->SetContentSink(sink);
|
|
|
|
nsIDTD* dtd = nsnull;
|
2000-04-26 05:13:55 +04:00
|
|
|
if (inType.EqualsWithConversion("text/xif"))
|
1999-09-30 00:11:07 +04:00
|
|
|
rv = NS_NewXIFDTD(&dtd);
|
|
|
|
else
|
|
|
|
rv = NS_NewNavHTMLDTD(&dtd);
|
1999-08-28 00:58:57 +04:00
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
1999-09-18 00:09:42 +04:00
|
|
|
printf("Couldn't create new HTML DTD: 0x%x\n", rv);
|
1999-08-28 00:58:57 +04:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
parser->RegisterDTD(dtd);
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
char* inTypeStr = inType.ToNewCString();
|
2000-04-26 05:13:55 +04:00
|
|
|
rv = parser->Parse(inString, 0, NS_ConvertASCIItoUCS2(inTypeStr), PR_FALSE, PR_TRUE);
|
1999-09-30 00:11:07 +04:00
|
|
|
delete[] inTypeStr;
|
1999-09-18 00:09:42 +04:00
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
|
|
|
printf("Parse() failed! 0x%x\n", rv);
|
|
|
|
return rv;
|
|
|
|
}
|
1999-08-28 00:58:57 +04:00
|
|
|
|
|
|
|
NS_IF_RELEASE(dtd);
|
|
|
|
NS_IF_RELEASE(sink);
|
|
|
|
NS_RELEASE(parser);
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
if (compareAgainst.Length() > 0)
|
|
|
|
return Compare(outString, compareAgainst);
|
|
|
|
|
|
|
|
char* charstar = outString.ToNewUTF8String();
|
1999-10-02 00:55:01 +04:00
|
|
|
printf("Output string is:\n--------------------\n%s--------------------\n",
|
1999-09-30 00:11:07 +04:00
|
|
|
charstar);
|
|
|
|
delete[] charstar;
|
|
|
|
|
|
|
|
return NS_OK;
|
1999-08-28 00:58:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
int main(int argc, char** argv)
|
|
|
|
{
|
2000-04-26 05:13:55 +04:00
|
|
|
nsString inType; inType.AssignWithConversion("text/html");
|
|
|
|
nsString outType; outType.AssignWithConversion("text/plain");
|
1999-09-30 00:11:07 +04:00
|
|
|
int wrapCol = 72;
|
1999-10-02 00:55:01 +04:00
|
|
|
int flags = 0;
|
1999-09-30 00:11:07 +04:00
|
|
|
nsString compareAgainst;
|
|
|
|
|
|
|
|
|
|
|
|
// Skip over progname arg:
|
|
|
|
const char* progname = argv[0];
|
|
|
|
--argc; ++argv;
|
|
|
|
|
|
|
|
// Process flags
|
|
|
|
while (argc > 0 && argv[0][0] == '-')
|
|
|
|
{
|
|
|
|
switch (argv[0][1])
|
|
|
|
{
|
|
|
|
case 'h':
|
|
|
|
printf("\
|
1999-10-02 00:55:01 +04:00
|
|
|
Usage: %s [-i intype] [-o outtype] [-f flags] [-w wrapcol] [-c comparison_file] infile\n\
|
1999-09-30 00:11:07 +04:00
|
|
|
\tIn/out types are mime types (e.g. text/html)\n\
|
|
|
|
\tcomparison_file is a file against which to compare the output\n\
|
1999-11-03 05:44:44 +03:00
|
|
|
\n\
|
1999-10-02 00:55:01 +04:00
|
|
|
\tDefaults are -i text/html -o text/plain -f 0 -w 72 [stdin]\n",
|
1999-09-30 00:11:07 +04:00
|
|
|
progname);
|
|
|
|
exit(0);
|
|
|
|
|
|
|
|
case 'i':
|
|
|
|
if (argv[0][2] != '\0')
|
2000-04-26 05:13:55 +04:00
|
|
|
inType.AssignWithConversion(argv[0]+2);
|
1999-09-30 00:11:07 +04:00
|
|
|
else {
|
2000-04-26 05:13:55 +04:00
|
|
|
inType.AssignWithConversion(argv[1]);
|
1999-09-30 00:11:07 +04:00
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'o':
|
|
|
|
if (argv[0][2] != '\0')
|
2000-04-26 05:13:55 +04:00
|
|
|
outType.AssignWithConversion(argv[0]+2);
|
1999-09-30 00:11:07 +04:00
|
|
|
else {
|
2000-04-26 05:13:55 +04:00
|
|
|
outType.AssignWithConversion(argv[1]);
|
1999-09-30 00:11:07 +04:00
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'w':
|
|
|
|
if (isdigit(argv[0][2]))
|
|
|
|
wrapCol = atoi(argv[0]+2);
|
|
|
|
else {
|
|
|
|
wrapCol = atoi(argv[1]);
|
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
1999-10-02 00:55:01 +04:00
|
|
|
case 'f':
|
|
|
|
if (isdigit(argv[0][2]))
|
|
|
|
flags = atoi(argv[0]+2);
|
|
|
|
else {
|
|
|
|
flags = atoi(argv[1]);
|
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
case 'c':
|
|
|
|
if (argv[0][2] != '\0')
|
2000-04-26 05:13:55 +04:00
|
|
|
compareAgainst.AssignWithConversion(argv[0]+2);
|
1999-09-30 00:11:07 +04:00
|
|
|
else {
|
2000-04-26 05:13:55 +04:00
|
|
|
compareAgainst.AssignWithConversion(argv[1]);
|
1999-09-30 00:11:07 +04:00
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++argv;
|
|
|
|
--argc;
|
|
|
|
}
|
|
|
|
|
|
|
|
FILE* file = 0;
|
|
|
|
if (argc > 0) // read from a file
|
|
|
|
{
|
|
|
|
// Open the file in a Unix-centric way,
|
|
|
|
// until I find out how to use nsFileSpec:
|
|
|
|
file = fopen(argv[0], "r");
|
|
|
|
if (!file)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Can't open file %s", argv[0]);
|
|
|
|
perror(" ");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else file = stdin;
|
|
|
|
|
1999-09-15 02:43:18 +04:00
|
|
|
nsComponentManager::AutoRegister(nsIComponentManager::NS_Startup, 0);
|
1999-08-28 00:58:57 +04:00
|
|
|
NS_SetupRegistry();
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
// Read in the string: very inefficient, but who cares?
|
|
|
|
nsString inString;
|
|
|
|
char c;
|
|
|
|
while ((c = getc(file)) != EOF)
|
2000-04-26 05:13:55 +04:00
|
|
|
inString.AppendWithConversion(c);
|
1999-09-30 00:11:07 +04:00
|
|
|
|
|
|
|
if (file != stdin)
|
|
|
|
fclose(file);
|
|
|
|
|
1999-10-02 00:55:01 +04:00
|
|
|
return HTML2text(inString, inType, outType, flags, wrapCol, compareAgainst);
|
1999-08-28 00:58:57 +04:00
|
|
|
}
|