From 5837cb3f3505fc7efca057f2d2a876c72050ff73 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Wed, 15 Aug 2012 09:06:54 +0200 Subject: [PATCH] Bug 730247 - Use byteslice coding for SafeBrowsing data. r=dcamp --- .../components/url-classifier/HashStore.cpp | 153 +++++++++++++----- 1 file changed, 117 insertions(+), 36 deletions(-) diff --git a/toolkit/components/url-classifier/HashStore.cpp b/toolkit/components/url-classifier/HashStore.cpp index 8f5cdf3411f2..eb31847059b4 100644 --- a/toolkit/components/url-classifier/HashStore.cpp +++ b/toolkit/components/url-classifier/HashStore.cpp @@ -40,13 +40,36 @@ #include "zlib.h" // Main store for SafeBrowsing protocol data. We store -// known add/sub chunks, prefixe and completions s in memory +// known add/sub chunks, prefixes and completions in memory // during an update, and serialize to disk. // We do not store the add prefixes, those are retrieved by // decompressing the PrefixSet cache whenever we need to apply // an update. - -// Data format: +// +// byte slicing: Many of the 4-byte values stored here are strongly +// correlated in the upper bytes, and uncorrelated in the lower +// bytes. Because zlib/DEFLATE requires match lengths of at least +// 3 to achieve good compression, and we don't get those if only +// the upper 16-bits are correlated, it is worthwhile to slice 32-bit +// values into 4 1-byte slices and compress the slices individually. +// The slices corresponding to MSBs will compress very well, and the +// slice corresponding to LSB almost nothing. Because of this, we +// only apply DEFLATE to the 3 most significant bytes, and store the +// LSB uncompressed. +// +// byte sliced (numValues) data format: +// uint32 compressed-size +// compressed-size bytes zlib DEFLATE data +// 0...numValues byte MSB of 4-byte numValues data +// uint32 compressed-size +// compressed-size bytes zlib DEFLATE data +// 0...numValues byte 2nd byte of 4-byte numValues data +// uint32 compressed-size +// compressed-size bytes zlib DEFLATE data +// 0...numValues byte 3rd byte of 4-byte numValues data +// 0...numValues byte LSB of 4-byte numValues data +// +// Store data format: // uint32 magic // uint32 version // uint32 numAddChunks @@ -55,20 +78,14 @@ // uint32 numSubPrefixes // uint32 numAddCompletes // uint32 numSubCompletes -// 0...numAddChunks uint32 addChunk -// 0...numSubChunks uint32 subChunk -// uint32 compressed-size -// compressed-size bytes zlib inflate data -// 0...numAddPrefixes uint32 addChunk -// uint32 compressed-size -// compressed-size bytes zlib inflate data -// 0...numSubPrefixes uint32 addChunk -// uint32 compressed-size -// compressed-size bytes zlib inflate data -// 0...numSubPrefixes uint32 subChunk -// 0...numSubPrefixes uint32 subPrefix -// 0...numAddCompletes 32-byte Completions -// 0...numSubCompletes 32-byte Completions +// 0...numAddChunks uint32 addChunk +// 0...numSubChunks uint32 subChunk +// byte sliced (numAddPrefixes) uint32 add chunk of AddPrefixes +// byte sliced (numSubPrefixes) uint32 sub chunk of SubPrefixes +// byte sliced (numSubPrefixes) uint32 add chunk of SubPrefixes +// byte sliced (numSubPrefixes) uint32 SubPrefixes +// 0...numAddCompletes 32-byte Completions +// 0...numSubCompletes 32-byte Completions // 16-byte MD5 of all preceding data // NSPR_LOG_MODULES=UrlClassifierDbService:5 @@ -85,7 +102,7 @@ namespace mozilla { namespace safebrowsing { const uint32 STORE_MAGIC = 0x1231af3b; -const uint32 CURRENT_VERSION = 1; +const uint32 CURRENT_VERSION = 2; void TableUpdate::NewAddPrefix(PRUint32 aAddChunk, const Prefix& aHash) @@ -642,17 +659,82 @@ nsresult InflateReadTArray(nsIInputStream* aStream, nsTArray* aOut, return NS_OK; } +static nsresult +ByteSliceWrite(nsIOutputStream* aOut, nsTArray& aData) +{ + nsTArray slice1; + nsTArray slice2; + nsTArray slice3; + nsTArray slice4; + PRUint32 count = aData.Length(); + + slice1.SetCapacity(count); + slice2.SetCapacity(count); + slice3.SetCapacity(count); + slice4.SetCapacity(count); + + for (PRUint32 i = 0; i < count; i++) { + slice1.AppendElement( aData[i] >> 24); + slice2.AppendElement((aData[i] >> 16) & 0xFF); + slice3.AppendElement((aData[i] >> 8) & 0xFF); + slice4.AppendElement( aData[i] & 0xFF); + } + + nsresult rv = DeflateWriteTArray(aOut, slice1); + NS_ENSURE_SUCCESS(rv, rv); + rv = DeflateWriteTArray(aOut, slice2); + NS_ENSURE_SUCCESS(rv, rv); + rv = DeflateWriteTArray(aOut, slice3); + NS_ENSURE_SUCCESS(rv, rv); + // The LSB slice is generally uncompressible, don't bother + // compressing it. + rv = WriteTArray(aOut, slice4); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +static nsresult +ByteSliceRead(nsIInputStream* aInStream, nsTArray* aData, PRUint32 count) +{ + nsTArray slice1; + nsTArray slice2; + nsTArray slice3; + nsTArray slice4; + + nsresult rv = InflateReadTArray(aInStream, &slice1, count); + NS_ENSURE_SUCCESS(rv, rv); + + rv = InflateReadTArray(aInStream, &slice2, count); + NS_ENSURE_SUCCESS(rv, rv); + + rv = InflateReadTArray(aInStream, &slice3, count); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadTArray(aInStream, &slice4, count); + NS_ENSURE_SUCCESS(rv, rv); + + aData->SetCapacity(count); + + for (uint32 i = 0; i < count; i++) { + aData->AppendElement((slice1[i] << 24) | (slice2[i] << 16) + | (slice3[i] << 8) | (slice4[i])); + } + + return NS_OK; +} + nsresult HashStore::ReadAddPrefixes() { - nsTArray chunks; + nsTArray chunks; PRUint32 count = mHeader.numAddPrefixes; - nsresult rv = InflateReadTArray(mInputStream, &chunks, count); + nsresult rv = ByteSliceRead(mInputStream, &chunks, count); NS_ENSURE_SUCCESS(rv, rv); mAddPrefixes.SetCapacity(count); - for (uint32 i = 0; i < count; i++) { + for (PRUint32 i = 0; i < count; i++) { AddPrefix *add = mAddPrefixes.AppendElement(); add->prefix.FromUint32(0); add->addChunk = chunks[i]; @@ -666,23 +748,23 @@ HashStore::ReadSubPrefixes() { nsTArray addchunks; nsTArray subchunks; - nsTArray prefixes; + nsTArray prefixes; PRUint32 count = mHeader.numSubPrefixes; - nsresult rv = InflateReadTArray(mInputStream, &addchunks, count); + nsresult rv = ByteSliceRead(mInputStream, &addchunks, count); NS_ENSURE_SUCCESS(rv, rv); - rv = InflateReadTArray(mInputStream, &subchunks, count); + rv = ByteSliceRead(mInputStream, &subchunks, count); NS_ENSURE_SUCCESS(rv, rv); - rv = ReadTArray(mInputStream, &prefixes, count); + rv = ByteSliceRead(mInputStream, &prefixes, count); NS_ENSURE_SUCCESS(rv, rv); mSubPrefixes.SetCapacity(count); for (uint32 i = 0; i < count; i++) { SubPrefix *sub = mSubPrefixes.AppendElement(); sub->addChunk = addchunks[i]; - sub->prefix = prefixes[i]; + sub->prefix.FromUint32(prefixes[i]); sub->subChunk = subchunks[i]; } @@ -693,7 +775,7 @@ HashStore::ReadSubPrefixes() nsresult HashStore::WriteAddPrefixes(nsIOutputStream* aOut) { - nsTArray chunks; + nsTArray chunks; PRUint32 count = mAddPrefixes.Length(); chunks.SetCapacity(count); @@ -701,7 +783,7 @@ HashStore::WriteAddPrefixes(nsIOutputStream* aOut) chunks.AppendElement(mAddPrefixes[i].Chunk()); } - nsresult rv = DeflateWriteTArray(aOut, chunks); + nsresult rv = ByteSliceWrite(aOut, chunks); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; @@ -710,9 +792,9 @@ HashStore::WriteAddPrefixes(nsIOutputStream* aOut) nsresult HashStore::WriteSubPrefixes(nsIOutputStream* aOut) { - nsTArray addchunks; - nsTArray subchunks; - nsTArray prefixes; + nsTArray addchunks; + nsTArray subchunks; + nsTArray prefixes; PRUint32 count = mSubPrefixes.Length(); addchunks.SetCapacity(count); subchunks.SetCapacity(count); @@ -720,18 +802,17 @@ HashStore::WriteSubPrefixes(nsIOutputStream* aOut) for (uint32 i = 0; i < count; i++) { addchunks.AppendElement(mSubPrefixes[i].AddChunk()); - prefixes.AppendElement(mSubPrefixes[i].PrefixHash()); + prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32()); subchunks.AppendElement(mSubPrefixes[i].Chunk()); } - nsresult rv = DeflateWriteTArray(aOut, addchunks); + nsresult rv = ByteSliceWrite(aOut, addchunks); NS_ENSURE_SUCCESS(rv, rv); - rv = DeflateWriteTArray(aOut, subchunks); + rv = ByteSliceWrite(aOut, subchunks); NS_ENSURE_SUCCESS(rv, rv); - // chunk-ordered prefixes are not compressible - rv = WriteTArray(aOut, prefixes); + rv = ByteSliceWrite(aOut, prefixes); NS_ENSURE_SUCCESS(rv, rv); return NS_OK;