/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Mozilla Communicator client code, released * March 31, 1998. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998-1999 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): Akkana Peck. */ #include // for isdigit() #include "CNavDTD.h" #include "nsParserCIID.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsIContentSerializer.h" #include "nsLayoutCID.h" #include "nsIHTMLToTextSink.h" #include "nsIComponentManager.h" extern "C" void NS_SetupRegistry(); #ifdef XP_PC #define PARSER_DLL "gkparser.dll" #endif #ifdef XP_MAC #endif #if defined(XP_UNIX) || defined(XP_BEOS) #define PARSER_DLL "libhtmlpars"MOZ_DLL_SUFFIX #endif static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID); static NS_DEFINE_CID(kParserCID, NS_PARSER_IID); // don't panic. NS_PARSER_IID just has the wrong name. int Compare(nsString& str, nsString& aFileName) { // Open the file in a Unix-centric way, // until I find out how to use nsFileSpec: char* filename = aFileName.ToNewCString(); FILE* file = fopen(filename, "r"); if (!file) { fprintf(stderr, "Can't open file %s", filename); perror(" "); delete[] filename; return 2; } delete[] filename; // Inefficiently read from the file: nsString inString; char c; int index = 0; int different = 0; while ((c = getc(file)) != EOF) { inString.AppendWithConversion(c); // CVS isn't doing newline comparisons on these files for some reason. // So compensate for possible newline problems in the CVS file: if (c == '\n' && str[index] == '\r') ++index; if (c != str[index++]) { //printf("Comparison failed at char %d: generated was %d, file had %d\n", // index, (int)str[index-1], (int)c); different = index; break; } } if (file != stdin) fclose(file); if (!different) return 0; else { char* cstr = str.ToNewUTF8String(); printf("Comparison failed at char %d:\n-----\n%s\n-----\n", different, cstr); Recycle(cstr); return 1; } } //---------------------------------------------------------------------- // Convert html on stdin to either plaintext or (if toHTML) html //---------------------------------------------------------------------- nsresult HTML2text(nsString& inString, nsString& inType, nsString& outType, int flags, int wrapCol, nsString& compareAgainst) { nsresult rv = NS_OK; nsString outString; // Create a parser nsIParser* parser; rv = nsComponentManager::CreateInstance(kParserCID, nsnull, kIParserIID,(void**)&parser); if (NS_FAILED(rv)) { printf("Unable to create a parser : 0x%x\n", rv); return NS_ERROR_FAILURE; } // Create the appropriate output sink #ifdef USE_SERIALIZER nsCAutoString progId(NS_CONTENTSERIALIZER_CONTRACTID_PREFIX); progId.AppendWithConversion(outType); // The syntax used here doesn't work nsCOMPtr mSerializer; mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId)); NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED); mSerializer->Init(flags, wrapCol); nsCOMPtr sink (do_QueryInterface(mSerializer)); if (!sink) { printf("Couldn't get content sink!\n"); return NS_ERROR_UNEXPECTED; } #else /* USE_SERIALIZER */ nsCOMPtr sink; if (inType != NS_LITERAL_STRING("text/html") || outType != NS_LITERAL_STRING("text/plain")) { char* in = inType.ToNewCString(); char* out = outType.ToNewCString(); printf("Don't know how to convert from %s to %s\n", in, out); Recycle(in); Recycle(out); return NS_ERROR_FAILURE; } sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID); NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE); nsCOMPtr textSink(do_QueryInterface(sink)); NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE); textSink->Initialize(&outString, flags, wrapCol); #endif /* USE_SERIALIZER */ parser->SetContentSink(sink); nsIDTD* dtd = nsnull; if (inType.EqualsWithConversion("text/html")) rv = NS_NewNavHTMLDTD(&dtd); else { printf("Don't know how to deal with non-html input!\n"); return NS_ERROR_NOT_IMPLEMENTED; } if (NS_FAILED(rv)) { printf("Couldn't create new HTML DTD: 0x%x\n", rv); return rv; } parser->RegisterDTD(dtd); char* inTypeStr = inType.ToNewCString(); rv = parser->Parse(inString, 0, NS_ConvertASCIItoUCS2(inTypeStr), PR_FALSE, PR_TRUE); delete[] inTypeStr; if (NS_FAILED(rv)) { printf("Parse() failed! 0x%x\n", rv); return rv; } NS_IF_RELEASE(dtd); NS_RELEASE(parser); if (compareAgainst.Length() > 0) return Compare(outString, compareAgainst); char* charstar = outString.ToNewUTF8String(); printf("Output string is:\n--------------------\n%s--------------------\n", charstar); delete[] charstar; return NS_OK; } //---------------------------------------------------------------------- int main(int argc, char** argv) { nsString inType; inType.AssignWithConversion("text/html"); nsString outType; outType.AssignWithConversion("text/plain"); int wrapCol = 72; int flags = 0; nsString compareAgainst; // Skip over progname arg: const char* progname = argv[0]; --argc; ++argv; // Process flags while (argc > 0 && argv[0][0] == '-') { switch (argv[0][1]) { case 'h': printf("\ Usage: %s [-i intype] [-o outtype] [-f flags] [-w wrapcol] [-c comparison_file] infile\n\ \tIn/out types are mime types (e.g. text/html)\n\ \tcomparison_file is a file against which to compare the output\n\ \n\ \tDefaults are -i text/html -o text/plain -f 0 -w 72 [stdin]\n", progname); exit(0); case 'i': if (argv[0][2] != '\0') inType.AssignWithConversion(argv[0]+2); else { inType.AssignWithConversion(argv[1]); --argc; ++argv; } break; case 'o': if (argv[0][2] != '\0') outType.AssignWithConversion(argv[0]+2); else { outType.AssignWithConversion(argv[1]); --argc; ++argv; } break; case 'w': if (isdigit(argv[0][2])) wrapCol = atoi(argv[0]+2); else { wrapCol = atoi(argv[1]); --argc; ++argv; } break; case 'f': if (isdigit(argv[0][2])) flags = atoi(argv[0]+2); else { flags = atoi(argv[1]); --argc; ++argv; } break; case 'c': if (argv[0][2] != '\0') compareAgainst.AssignWithConversion(argv[0]+2); else { compareAgainst.AssignWithConversion(argv[1]); --argc; ++argv; } break; } ++argv; --argc; } FILE* file = 0; if (argc > 0) // read from a file { // Open the file in a Unix-centric way, // until I find out how to use nsFileSpec: file = fopen(argv[0], "r"); if (!file) { fprintf(stderr, "Can't open file %s", argv[0]); perror(" "); exit(1); } } else file = stdin; nsComponentManager::AutoRegister(nsIComponentManager::NS_Startup, 0); NS_SetupRegistry(); // Read in the string: very inefficient, but who cares? nsString inString; char c; while ((c = getc(file)) != EOF) inString.AppendWithConversion(c); if (file != stdin) fclose(file); return HTML2text(inString, inType, outType, flags, wrapCol, compareAgainst); }