1999-08-28 00:58:57 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
|
|
* The contents of this file are subject to the Netscape Public
|
|
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy of
|
|
|
|
* the License at http://www.mozilla.org/NPL/
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an "AS
|
|
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
|
|
* implied. See the License for the specific language governing
|
|
|
|
* rights and limitations under the License.
|
|
|
|
*
|
|
|
|
* The Original Code is Mozilla Communicator client code, released
|
|
|
|
* March 31, 1998.
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is Netscape
|
|
|
|
* Communications Corporation. Portions created by Netscape are
|
|
|
|
* Copyright (C) 1998-1999 Netscape Communications Corporation. All
|
|
|
|
* Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s): Akkana Peck.
|
|
|
|
*/
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
#include <ctype.h> // for isdigit()
|
|
|
|
|
1999-08-28 00:58:57 +04:00
|
|
|
#include "nsParserCIID.h"
|
|
|
|
#include "nsIParser.h"
|
2000-08-31 21:23:22 +04:00
|
|
|
#include "nsIHTMLContentSink.h"
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
#include "nsIContentSerializer.h"
|
|
|
|
#include "nsLayoutCID.h"
|
|
|
|
#include "nsIHTMLToTextSink.h"
|
1999-08-28 00:58:57 +04:00
|
|
|
#include "nsIComponentManager.h"
|
2001-09-29 12:28:41 +04:00
|
|
|
#include "nsReadableUtils.h"
|
1999-08-28 00:58:57 +04:00
|
|
|
|
|
|
|
extern "C" void NS_SetupRegistry();
|
|
|
|
|
|
|
|
#ifdef XP_PC
|
1999-10-10 05:32:59 +04:00
|
|
|
#define PARSER_DLL "gkparser.dll"
|
1999-08-28 00:58:57 +04:00
|
|
|
#endif
|
|
|
|
#ifdef XP_MAC
|
|
|
|
#endif
|
|
|
|
#if defined(XP_UNIX) || defined(XP_BEOS)
|
2000-07-02 22:44:42 +04:00
|
|
|
#define PARSER_DLL "libhtmlpars"MOZ_DLL_SUFFIX
|
1999-08-28 00:58:57 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
|
2001-03-11 00:02:12 +03:00
|
|
|
static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);
|
1999-08-28 00:58:57 +04:00
|
|
|
|
1999-10-02 00:55:01 +04:00
|
|
|
int
|
|
|
|
Compare(nsString& str, nsString& aFileName)
|
1999-09-30 00:11:07 +04:00
|
|
|
{
|
1999-10-02 00:55:01 +04:00
|
|
|
// Open the file in a Unix-centric way,
|
|
|
|
// until I find out how to use nsFileSpec:
|
2001-09-29 12:28:41 +04:00
|
|
|
char* filename = ToNewCString(aFileName);
|
1999-10-02 00:55:01 +04:00
|
|
|
FILE* file = fopen(filename, "r");
|
|
|
|
if (!file)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Can't open file %s", filename);
|
|
|
|
perror(" ");
|
|
|
|
delete[] filename;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
delete[] filename;
|
|
|
|
|
|
|
|
// Inefficiently read from the file:
|
|
|
|
nsString inString;
|
2001-02-14 04:23:23 +03:00
|
|
|
int c;
|
2000-02-25 04:09:37 +03:00
|
|
|
int index = 0;
|
|
|
|
int different = 0;
|
1999-10-02 00:55:01 +04:00
|
|
|
while ((c = getc(file)) != EOF)
|
2000-02-25 04:09:37 +03:00
|
|
|
{
|
2001-02-14 04:23:23 +03:00
|
|
|
inString.AppendWithConversion((char)c);
|
2000-02-25 04:09:37 +03:00
|
|
|
// CVS isn't doing newline comparisons on these files for some reason.
|
|
|
|
// So compensate for possible newline problems in the CVS file:
|
|
|
|
if (c == '\n' && str[index] == '\r')
|
|
|
|
++index;
|
|
|
|
if (c != str[index++])
|
|
|
|
{
|
|
|
|
//printf("Comparison failed at char %d: generated was %d, file had %d\n",
|
|
|
|
// index, (int)str[index-1], (int)c);
|
|
|
|
different = index;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
1999-10-02 00:55:01 +04:00
|
|
|
if (file != stdin)
|
|
|
|
fclose(file);
|
|
|
|
|
2000-02-25 04:09:37 +03:00
|
|
|
if (!different)
|
1999-10-02 00:55:01 +04:00
|
|
|
return 0;
|
|
|
|
else
|
2000-01-19 03:24:46 +03:00
|
|
|
{
|
2000-12-13 00:58:14 +03:00
|
|
|
nsAutoString left;
|
|
|
|
str.Left(left, different);
|
2001-09-29 12:28:41 +04:00
|
|
|
char* cstr = ToNewUTF8String(left);
|
2000-02-25 04:09:37 +03:00
|
|
|
printf("Comparison failed at char %d:\n-----\n%s\n-----\n",
|
|
|
|
different, cstr);
|
2000-01-19 03:24:46 +03:00
|
|
|
Recycle(cstr);
|
1999-10-02 00:55:01 +04:00
|
|
|
return 1;
|
2000-01-19 03:24:46 +03:00
|
|
|
}
|
1999-09-30 00:11:07 +04:00
|
|
|
}
|
|
|
|
|
1999-08-28 00:58:57 +04:00
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Convert html on stdin to either plaintext or (if toHTML) html
|
|
|
|
//----------------------------------------------------------------------
|
1999-09-30 00:11:07 +04:00
|
|
|
nsresult
|
1999-10-02 00:55:01 +04:00
|
|
|
HTML2text(nsString& inString, nsString& inType, nsString& outType,
|
|
|
|
int flags, int wrapCol, nsString& compareAgainst)
|
1999-08-28 00:58:57 +04:00
|
|
|
{
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
|
|
|
nsString outString;
|
|
|
|
|
1999-09-15 02:43:18 +04:00
|
|
|
// Create a parser
|
1999-08-28 00:58:57 +04:00
|
|
|
nsIParser* parser;
|
|
|
|
rv = nsComponentManager::CreateInstance(kParserCID, nsnull,
|
|
|
|
kIParserIID,(void**)&parser);
|
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
|
|
|
printf("Unable to create a parser : 0x%x\n", rv);
|
|
|
|
return NS_ERROR_FAILURE;
|
|
|
|
}
|
|
|
|
|
1999-09-15 02:43:18 +04:00
|
|
|
// Create the appropriate output sink
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
#ifdef USE_SERIALIZER
|
|
|
|
nsCAutoString progId(NS_CONTENTSERIALIZER_CONTRACTID_PREFIX);
|
|
|
|
progId.AppendWithConversion(outType);
|
1999-08-28 00:58:57 +04:00
|
|
|
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
// The syntax used here doesn't work
|
|
|
|
nsCOMPtr<nsIContentSerializer> mSerializer;
|
|
|
|
mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId));
|
|
|
|
NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED);
|
1999-08-28 00:58:57 +04:00
|
|
|
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
mSerializer->Init(flags, wrapCol);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIHTMLContentSink> sink (do_QueryInterface(mSerializer));
|
|
|
|
if (!sink)
|
1999-08-28 00:58:57 +04:00
|
|
|
{
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
printf("Couldn't get content sink!\n");
|
|
|
|
return NS_ERROR_UNEXPECTED;
|
|
|
|
}
|
|
|
|
#else /* USE_SERIALIZER */
|
|
|
|
nsCOMPtr<nsIContentSink> sink;
|
|
|
|
if (inType != NS_LITERAL_STRING("text/html")
|
|
|
|
|| outType != NS_LITERAL_STRING("text/plain"))
|
|
|
|
{
|
2001-09-29 12:28:41 +04:00
|
|
|
char* in = ToNewCString(inType);
|
|
|
|
char* out = ToNewCString(outType);
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
printf("Don't know how to convert from %s to %s\n", in, out);
|
|
|
|
Recycle(in);
|
|
|
|
Recycle(out);
|
|
|
|
return NS_ERROR_FAILURE;
|
1999-08-28 00:58:57 +04:00
|
|
|
}
|
|
|
|
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
|
|
|
|
NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
|
|
|
|
NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
|
|
|
|
|
|
|
|
textSink->Initialize(&outString, flags, wrapCol);
|
|
|
|
#endif /* USE_SERIALIZER */
|
|
|
|
|
1999-08-28 00:58:57 +04:00
|
|
|
parser->SetContentSink(sink);
|
2001-01-25 22:37:23 +03:00
|
|
|
nsCOMPtr<nsIDTD> dtd;
|
|
|
|
if (inType.EqualsWithConversion("text/html")) {
|
|
|
|
static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
|
|
|
|
rv=nsComponentManager::CreateInstance(kNavDTDCID,nsnull,NS_GET_IID(nsIDTD),getter_AddRefs(dtd));
|
|
|
|
}
|
Checking in for bug 50742, this change removes the use of XIF in mozilla and replaces the XIF converter with a HTML (and XML) serializer.
Contextual information added to HTML copy and intelligence added to HTML paste in the editor (fixes bugs 47014, 50568 and 46554, and partly (at least) fixes bug 53188).
Code written by vidur, jfrancis, jst, akkana. Tested by jfrancis, akkana, vidur, jst, kin. Reviwed (and super reviewed) by waterson, vidur, kin, jfrancis, jst
2000-10-07 14:57:30 +04:00
|
|
|
else
|
|
|
|
{
|
|
|
|
printf("Don't know how to deal with non-html input!\n");
|
|
|
|
return NS_ERROR_NOT_IMPLEMENTED;
|
|
|
|
}
|
1999-08-28 00:58:57 +04:00
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
1999-09-18 00:09:42 +04:00
|
|
|
printf("Couldn't create new HTML DTD: 0x%x\n", rv);
|
1999-08-28 00:58:57 +04:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
parser->RegisterDTD(dtd);
|
|
|
|
|
2001-09-29 12:28:41 +04:00
|
|
|
char* inTypeStr = ToNewCString(inType);
|
2000-04-26 05:13:55 +04:00
|
|
|
rv = parser->Parse(inString, 0, NS_ConvertASCIItoUCS2(inTypeStr), PR_FALSE, PR_TRUE);
|
1999-09-30 00:11:07 +04:00
|
|
|
delete[] inTypeStr;
|
1999-09-18 00:09:42 +04:00
|
|
|
if (NS_FAILED(rv))
|
|
|
|
{
|
|
|
|
printf("Parse() failed! 0x%x\n", rv);
|
|
|
|
return rv;
|
|
|
|
}
|
1999-08-28 00:58:57 +04:00
|
|
|
NS_RELEASE(parser);
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
if (compareAgainst.Length() > 0)
|
|
|
|
return Compare(outString, compareAgainst);
|
|
|
|
|
2001-09-29 12:28:41 +04:00
|
|
|
char* charstar = ToNewUTF8String(outString);
|
1999-10-02 00:55:01 +04:00
|
|
|
printf("Output string is:\n--------------------\n%s--------------------\n",
|
1999-09-30 00:11:07 +04:00
|
|
|
charstar);
|
|
|
|
delete[] charstar;
|
|
|
|
|
|
|
|
return NS_OK;
|
1999-08-28 00:58:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
int main(int argc, char** argv)
|
|
|
|
{
|
2000-04-26 05:13:55 +04:00
|
|
|
nsString inType; inType.AssignWithConversion("text/html");
|
|
|
|
nsString outType; outType.AssignWithConversion("text/plain");
|
1999-09-30 00:11:07 +04:00
|
|
|
int wrapCol = 72;
|
1999-10-02 00:55:01 +04:00
|
|
|
int flags = 0;
|
1999-09-30 00:11:07 +04:00
|
|
|
nsString compareAgainst;
|
|
|
|
|
|
|
|
|
|
|
|
// Skip over progname arg:
|
|
|
|
const char* progname = argv[0];
|
|
|
|
--argc; ++argv;
|
|
|
|
|
|
|
|
// Process flags
|
|
|
|
while (argc > 0 && argv[0][0] == '-')
|
|
|
|
{
|
|
|
|
switch (argv[0][1])
|
|
|
|
{
|
|
|
|
case 'h':
|
|
|
|
printf("\
|
1999-10-02 00:55:01 +04:00
|
|
|
Usage: %s [-i intype] [-o outtype] [-f flags] [-w wrapcol] [-c comparison_file] infile\n\
|
1999-09-30 00:11:07 +04:00
|
|
|
\tIn/out types are mime types (e.g. text/html)\n\
|
|
|
|
\tcomparison_file is a file against which to compare the output\n\
|
1999-11-03 05:44:44 +03:00
|
|
|
\n\
|
1999-10-02 00:55:01 +04:00
|
|
|
\tDefaults are -i text/html -o text/plain -f 0 -w 72 [stdin]\n",
|
1999-09-30 00:11:07 +04:00
|
|
|
progname);
|
|
|
|
exit(0);
|
|
|
|
|
|
|
|
case 'i':
|
|
|
|
if (argv[0][2] != '\0')
|
2000-04-26 05:13:55 +04:00
|
|
|
inType.AssignWithConversion(argv[0]+2);
|
1999-09-30 00:11:07 +04:00
|
|
|
else {
|
2000-04-26 05:13:55 +04:00
|
|
|
inType.AssignWithConversion(argv[1]);
|
1999-09-30 00:11:07 +04:00
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'o':
|
|
|
|
if (argv[0][2] != '\0')
|
2000-04-26 05:13:55 +04:00
|
|
|
outType.AssignWithConversion(argv[0]+2);
|
1999-09-30 00:11:07 +04:00
|
|
|
else {
|
2000-04-26 05:13:55 +04:00
|
|
|
outType.AssignWithConversion(argv[1]);
|
1999-09-30 00:11:07 +04:00
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'w':
|
|
|
|
if (isdigit(argv[0][2]))
|
|
|
|
wrapCol = atoi(argv[0]+2);
|
|
|
|
else {
|
|
|
|
wrapCol = atoi(argv[1]);
|
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
1999-10-02 00:55:01 +04:00
|
|
|
case 'f':
|
|
|
|
if (isdigit(argv[0][2]))
|
|
|
|
flags = atoi(argv[0]+2);
|
|
|
|
else {
|
|
|
|
flags = atoi(argv[1]);
|
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
case 'c':
|
|
|
|
if (argv[0][2] != '\0')
|
2000-04-26 05:13:55 +04:00
|
|
|
compareAgainst.AssignWithConversion(argv[0]+2);
|
1999-09-30 00:11:07 +04:00
|
|
|
else {
|
2000-04-26 05:13:55 +04:00
|
|
|
compareAgainst.AssignWithConversion(argv[1]);
|
1999-09-30 00:11:07 +04:00
|
|
|
--argc;
|
|
|
|
++argv;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++argv;
|
|
|
|
--argc;
|
|
|
|
}
|
|
|
|
|
|
|
|
FILE* file = 0;
|
|
|
|
if (argc > 0) // read from a file
|
|
|
|
{
|
|
|
|
// Open the file in a Unix-centric way,
|
|
|
|
// until I find out how to use nsFileSpec:
|
|
|
|
file = fopen(argv[0], "r");
|
|
|
|
if (!file)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Can't open file %s", argv[0]);
|
|
|
|
perror(" ");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else file = stdin;
|
|
|
|
|
1999-09-15 02:43:18 +04:00
|
|
|
nsComponentManager::AutoRegister(nsIComponentManager::NS_Startup, 0);
|
1999-08-28 00:58:57 +04:00
|
|
|
NS_SetupRegistry();
|
|
|
|
|
1999-09-30 00:11:07 +04:00
|
|
|
// Read in the string: very inefficient, but who cares?
|
|
|
|
nsString inString;
|
2001-02-14 04:23:23 +03:00
|
|
|
int c;
|
1999-09-30 00:11:07 +04:00
|
|
|
while ((c = getc(file)) != EOF)
|
2001-02-14 04:23:23 +03:00
|
|
|
inString.AppendWithConversion((char)c);
|
1999-09-30 00:11:07 +04:00
|
|
|
|
|
|
|
if (file != stdin)
|
|
|
|
fclose(file);
|
|
|
|
|
1999-10-02 00:55:01 +04:00
|
|
|
return HTML2text(inString, inType, outType, flags, wrapCol, compareAgainst);
|
1999-08-28 00:58:57 +04:00
|
|
|
}
|