Work around misconfiguration in default Apache installs that makes it claim all

sorts of stuff as text/plain. Bug 220807, r=biesi, sr=darin
2004-01-08 03:56:02 +00:00 · 2004-01-08 03:56:02 +00:00 · 6ccc20cdbe
--- a/netwerk/build/nsNetModule.cpp
+++ b/netwerk/build/nsNetModule.cpp
@ -56,6 +56,7 @@
 #include "nsSOCKSSocketProvider.h"
 #include "nsCacheService.h"
 #include "nsIOThreadPool.h"
+#include "nsMimeTypes.h"

 #include "nsNetCID.h"

@ -263,7 +264,8 @@ nsresult NS_NewStreamConv(nsStreamConverterService **aStreamConv);
 #define MULTI_MIXED_X                "?from=multipart/x-mixed-replace&to=*/*"
 #define MULTI_MIXED                  "?from=multipart/mixed&to=*/*"
 #define MULTI_BYTERANGES             "?from=multipart/byteranges&to=*/*"
-#define UNKNOWN_CONTENT              "?from=application/x-unknown-content-type&to=*/*"
+#define UNKNOWN_CONTENT              "?from=" UNKNOWN_CONTENT_TYPE "&to=*/*"
+#define MAYBE_TEXT                   "?from=" APPLICATION_MAYBE_TEXT "&to=*/*"
 #define GZIP_TO_UNCOMPRESSED         "?from=gzip&to=uncompressed"
 #define XGZIP_TO_UNCOMPRESSED        "?from=x-gzip&to=uncompressed"
 #define COMPRESS_TO_UNCOMPRESSED     "?from=compress&to=uncompressed"
@ -283,6 +285,7 @@ static const char *const g_StreamConverterArray[] = {
        MULTI_MIXED,
        MULTI_BYTERANGES,
        UNKNOWN_CONTENT,
+        MAYBE_TEXT,
        GZIP_TO_UNCOMPRESSED,
        XGZIP_TO_UNCOMPRESSED,
        COMPRESS_TO_UNCOMPRESSED,
@ -525,6 +528,31 @@ CreateNewUnknownDecoderFactory(nsISupports *aOuter, REFNSIID aIID, void **aResul
  return rv;
 }

+static NS_IMETHODIMP
+CreateNewBinaryDetectorFactory(nsISupports *aOuter, REFNSIID aIID, void **aResult)
+{
+  nsresult rv;
+
+  if (!aResult) {
+    return NS_ERROR_NULL_POINTER;
+  }
+  *aResult = nsnull;
+
+  if (aOuter) {
+    return NS_ERROR_NO_AGGREGATION;
+  }
+
+  nsBinaryDetector* inst = new nsBinaryDetector();
+  if (!inst) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+  NS_ADDREF(inst);
+  rv = inst->QueryInterface(aIID, aResult);
+  NS_RELEASE(inst);
+
+  return rv;
+}
+
 static NS_IMETHODIMP
 CreateNewNSTXTToHTMLConvFactory(nsISupports *aOuter, REFNSIID aIID, void **aResult)
 {
@ -792,6 +820,12 @@ static const nsModuleComponentInfo gNetModuleInfo[] = {
      CreateNewUnknownDecoderFactory
    },

+    { "Binary Detector",
+      NS_BINARYDETECTOR_CID,
+      NS_ISTREAMCONVERTER_KEY MAYBE_TEXT,
+      CreateNewBinaryDetectorFactory
+    },
+
    { "HttpCompressConverter", 
      NS_HTTPCOMPRESSCONVERTER_CID,
      NS_ISTREAMCONVERTER_KEY GZIP_TO_UNCOMPRESSED,
--- a/netwerk/mime/public/nsMimeTypes.h
+++ b/netwerk/mime/public/nsMimeTypes.h
@ -189,7 +189,8 @@
 #define PARAM_FORMAT                        "format"

 #define UNKNOWN_CONTENT_TYPE                "application/x-unknown-content-type"
-
+#define APPLICATION_GUESS_FROM_EXT          "application/x-vnd.mozilla.guess-from-ext"
+#define APPLICATION_MAYBE_TEXT              "application/x-vnd.mozilla.maybe-text"
 #define VIEWSOURCE_CONTENT_TYPE             "application/x-view-source"

 #define APPLICATION_DIRECTORY				        "application/directory" /* text/x-vcard is synonym */
--- a/netwerk/streamconv/converters/nsUnknownDecoder.cpp
+++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp
@ -309,6 +309,8 @@ nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = {
  // text or whether it's data.
  SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff),

+  // XXXbz should (and can) we also include the various ways that <?xml can
+  // appear as UTF-16 and such?  See http://www.w3.org/TR/REC-xml#sec-guessing
  SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)
 };

@ -492,18 +494,35 @@ PRBool nsUnknownDecoder::SniffURI(nsIRequest* aRequest)
 }

 // This macro is based on RFC 2046 Section 4.1.2.  Treat any char 0-31
-// except the 9-13 range (\t, \n, \v, \f, \r) as non-text
+// except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
+// encodings like Shift_JIS) as non-text
 #define IS_TEXT_CHAR(ch)                                     \
-  ((((unsigned char)(ch)) & 31) != ((unsigned char)(ch)) ||    \
-   (9 <= ch && ch <= 13))
+  (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27)

 PRBool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest)
 {
  // All we can do now is try to guess whether this is text/plain or
  // application/octet-stream
-  //
-  // See if the buffer has any non-text chars.  If not, then lets just
-  // call it text/plain...
+
+  // First, check for a BOM.  If we see one, assume this is text/plain
+  // in whatever encoding.  If there is a BOM _and_ text we will
+  // always have at least 4 bytes in the buffer (since the 2-byte BOMs
+  // are for 2-byte encodings and the UTF-8 BOM is 3 bytes).
+  if (mBufferLen >= 4) {
+    const unsigned char* buf = (const unsigned char*)mBuffer;
+    if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16BE
+        (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16LE
+        (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8
+        (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF) || // UCS-4BE
+        (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFF && buf[3] == 0xFE)) { // UCS-4
+        
+      mContentType = TEXT_PLAIN;
+      return PR_TRUE;
+    }
+  }
+  
+  // Now see whether the buffer has any non-text chars.  If not, then let's
+  // just call it text/plain...
  //
  PRUint32 i;
  for (i=0; i<mBufferLen && IS_TEXT_CHAR(mBuffer[i]); i++);
@ -512,7 +531,7 @@ PRBool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest)
    mContentType = TEXT_PLAIN;
  }
  else {
-    mContentType = APPLICATION_OCTET_STREAM;
+    mContentType = APPLICATION_GUESS_FROM_EXT;
  }

  return PR_TRUE;    
@ -576,3 +595,9 @@ nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request,

  return rv;
 }
+
+void
+nsBinaryDetector::DetermineContentType(nsIRequest* aRequest)
+{
+  LastDitchSniff(aRequest);
+}
--- a/netwerk/streamconv/converters/nsUnknownDecoder.h
+++ b/netwerk/streamconv/converters/nsUnknownDecoder.h
@ -73,7 +73,7 @@ public:
 protected:
  virtual ~nsUnknownDecoder();

-  void DetermineContentType(nsIRequest* aRequest);
+  virtual void DetermineContentType(nsIRequest* aRequest);
  nsresult FireListenerNotifications(nsIRequest* request, nsISupports *aCtxt);

 protected:
@ -138,6 +138,25 @@ protected:

 };

+#define NS_BINARYDETECTOR_CID                        \
+{ /* a2027ec6-ba0d-4c72-805d-148233f5f33c */         \
+    0xa2027ec6,                                      \
+    0xba0d,                                          \
+    0x4c72,                                          \
+    {0x80, 0x5d, 0x14, 0x82, 0x33, 0xf5, 0xf3, 0x3c} \
+}
+
+/**
+ * Class that detects whether a data stream is text or binary.  This reuses
+ * most of nsUnknownDecoder except the actual content-type determination logic
+ * -- our overridden DetermineContentType simply calls LastDitchSniff and sets
+ * the type to APPLICATION_GUESS_FROM_EXT if the data is detected as binary.
+ */
+class nsBinaryDetector : public nsUnknownDecoder
+{
+protected:
+  virtual void DetermineContentType(nsIRequest* aRequest);
+};

 #endif /* nsUnknownDecoder_h__ */

--- a/uriloader/base/nsURILoader.cpp
+++ b/uriloader/base/nsURILoader.cpp
@ -75,6 +75,8 @@
 #include "nsIMIMEHeaderParam.h"
 #include "nsNetCID.h"

+#include "nsMimeTypes.h"
+
 static NS_DEFINE_CID(kStreamConverterServiceCID, NS_STREAMCONVERTERSERVICE_CID);
 #ifdef PR_LOGGING
 PRLogModuleInfo* nsURILoader::mLog = nsnull;
@ -284,6 +286,31 @@ NS_IMETHODIMP nsDocumentOpenInfo::OnStartRequest(nsIRequest *request, nsISupport
    return NS_OK;
  }

+  if (httpChannel && mContentType.IsEmpty()) {
+    // This is our initial dispatch, and this is an HTTP channel.  Check for
+    // the text/plain mess.
+    nsCAutoString contentType;
+    httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"),
+                                   contentType);
+    // Make sure to do a case-sensitive exact match comparison here.  Apache
+    // 1.x just sends text/plain for "unknown", while Apache 2.x sends
+    // text/plain with a ISO-8859-1 charset.  Debian's Apache version, just to
+    // be different, sends text/plain with iso-8859-1 charset.  Don't do
+    // general case-insensitive comparison, since we really want to apply this
+    // crap as rarely as we can.
+    if (contentType.Equals(NS_LITERAL_CSTRING("text/plain")) ||
+        contentType.Equals(
+             NS_LITERAL_CSTRING("text/plain; charset=ISO-8859-1")) ||
+        contentType.Equals(
+             NS_LITERAL_CSTRING("text/plain; charset=iso-8859-1"))) {
+      // OK, this is initial dispatch of an HTTP response and its Content-Type
+      // header is exactly "text/plain".  We need to check whether this is
+      // really text....
+      LOG(("  Possibly bogus text/plain; resetting type to " APPLICATION_MAYBE_TEXT));
+      httpChannel->SetContentType(NS_LITERAL_CSTRING(APPLICATION_MAYBE_TEXT));
+    }
+  }
+  
  rv = DispatchContent(request, aCtxt);

  LOG(("  After dispatch, m_targetStreamListener: 0x%p", m_targetStreamListener.get()));
@ -346,7 +373,8 @@ nsresult nsDocumentOpenInfo::DispatchContent(nsIRequest *request, nsISupports *
    return NS_ERROR_FAILURE;
  }

-  if (mContentType.IsEmpty()) {
+  NS_NAMED_LITERAL_CSTRING(anyType, "*/*");
+  if (mContentType.IsEmpty() || mContentType == anyType) {
    rv = aChannel->GetContentType(mContentType);
    if (NS_FAILED(rv)) return rv;
    LOG(("  Got type from channel: '%s'", mContentType.get()));
@ -512,7 +540,6 @@ nsresult nsDocumentOpenInfo::DispatchContent(nsIRequest *request, nsISupports *
  // it in our Accept header and got confused.
  // XXXbz have to be careful here; may end up in some sort of bizarre infinite
  // decoding loop.
-  NS_NAMED_LITERAL_CSTRING(anyType, "*/*");
  if (mContentType != anyType) {
    rv = ConvertData(request, m_contentListener, mContentType, anyType);
    if (NS_FAILED(rv)) {
@ -591,13 +618,12 @@ nsDocumentOpenInfo::ConvertData(nsIRequest *request,
  // Also make sure it has to look for a stream listener to pump data into.
  nextLink->m_targetStreamListener = nsnull;

-  if (aOutContentType != NS_LITERAL_CSTRING("*/*")) {
-    // Make sure that nextLink treats the data as aOutContentType when
-    // dispatching; that way even if the stream converters don't
-    // change the type on the channel we will still do the right
-    // thing.
-    nextLink->mContentType = aOutContentType;
-  }
+  // Make sure that nextLink treats the data as aOutContentType when
+  // dispatching; that way even if the stream converters don't change the type
+  // on the channel we will still do the right thing.  If aOutContentType is
+  // */*, that's OK -- that will just indicate to nextLink that it should get
+  // the type off the channel.
+  nextLink->mContentType = aOutContentType;

  // The following call sets m_targetStreamListener to the input end of the
  // stream converter and sets the output end of the stream converter to
--- a/uriloader/exthandler/nsExternalHelperAppService.cpp
+++ b/uriloader/exthandler/nsExternalHelperAppService.cpp
@ -544,7 +544,28 @@ NS_IMETHODIMP nsExternalHelperAppService::DoContent(const char *aMimeContentType

  // Try to find a mime object by looking at the mime type/extension
  nsCOMPtr<nsIMIMEInfo> mimeInfo;
-  GetFromTypeAndExtension(aMimeContentType, fileExtension.get(), getter_AddRefs(mimeInfo));
+  if (!nsCRT::strcasecmp(aMimeContentType, APPLICATION_GUESS_FROM_EXT)) {
+    nsXPIDLCString mimeType;
+    if (!fileExtension.IsEmpty()) {
+      GetFromTypeAndExtension(nsnull, fileExtension.get(), getter_AddRefs(mimeInfo));
+      if (mimeInfo) {
+        mimeInfo->GetMIMEType(getter_Copies(mimeType));
+
+        LOG(("OS-Provided mime type '%s' for extension '%s'\n", 
+             mimeType.get(), fileExtension.get()));
+      }
+    }
+
+    if (fileExtension.IsEmpty() || mimeType.IsEmpty()) {
+      // Extension lookup gave us no useful match
+      GetFromTypeAndExtension(APPLICATION_OCTET_STREAM, fileExtension.get(),
+                              getter_AddRefs(mimeInfo));
+    }
+  } 
+  else {
+    GetFromTypeAndExtension(aMimeContentType, fileExtension.get(),
+                            getter_AddRefs(mimeInfo));
+  } 
  LOG(("Type/Ext lookup found 0x%p\n", mimeInfo.get()));

  // No mimeinfo -> we can't continue. probably OOM.