diff --git a/bench/ChecksumBench.cpp b/bench/ChecksumBench.cpp index fe3fd4778..903e584c5 100644 --- a/bench/ChecksumBench.cpp +++ b/bench/ChecksumBench.cpp @@ -7,41 +7,97 @@ #include "SkBenchmark.h" #include "SkCanvas.h" #include "SkChecksum.h" -#include "SkRandom.h" +#include "SkString.h" class ComputeChecksumBench : public SkBenchmark { - enum { - U32COUNT = 256, - SIZE = U32COUNT * 4, - N = SkBENCHLOOP(100000), - }; - uint32_t fData[U32COUNT]; - public: - ComputeChecksumBench(void* param) : INHERITED(param) { - SkRandom rand; - for (int i = 0; i < U32COUNT; ++i) { - fData[i] = rand.nextU(); - } + ComputeChecksumBench(void* param, const char name[]) : INHERITED(param) { + fName.printf("compute_checksum_%s", name); } + enum { + DATA_SIZE = 1024, + N = SkBENCHLOOP(100000), + }; protected: virtual const char* onGetName() { - return "compute_checksum"; + return fName.c_str(); } virtual void onDraw(SkCanvas* canvas) { - for (int i = 0; i < N; i++) { - volatile uint32_t result = SkChecksum::Compute(fData, sizeof(fData)); - } + uint64_t data[DATA_SIZE / sizeof(uint64_t)]; + computeChecksum(data, DATA_SIZE); } + virtual void computeChecksum(const uint64_t*, size_t) = 0; + + SkString fName; private: typedef SkBenchmark INHERITED; }; +/* + * Use SkComputeChecksum32 to compute a checksum on a datablock + */ +class ComputeChecksum32Bench : public ComputeChecksumBench { +public: + ComputeChecksum32Bench(void* param) + : INHERITED(param, "32") { } + +protected: + virtual void computeChecksum(const uint64_t* data, size_t len) { + for (int i = 0; i < N; i++) { + volatile uint32_t result = SkComputeChecksum32(reinterpret_cast(data), len); + } + } + +private: + typedef ComputeChecksumBench INHERITED; +}; + +/* + * Use SkComputeChecksum64 to compute a checksum on a datablock + */ +class ComputeChecksum64Bench : public ComputeChecksumBench { +public: + ComputeChecksum64Bench(void* param) + : INHERITED(param, "64") { } + +protected: + virtual void computeChecksum(const uint64_t* data, size_t len) { + for (int i = 0; i < N; i++) { + volatile uint64_t result = SkComputeChecksum64(data, len); + } + } + +private: + typedef ComputeChecksumBench INHERITED; +}; + +/* + * Use SkComputeChecksum64 to compute a checksum on a datablock + */ +class ComputeChecksumXXBench : public ComputeChecksumBench { +public: + ComputeChecksumXXBench(void* param) : INHERITED(param, "XX") { } + +protected: + virtual void computeChecksum(const uint64_t* data, size_t len) { + for (int i = 0; i < N; i++) { + volatile uint32_t result = SkChecksum::Compute(reinterpret_cast(data), len); + } + } + +private: + typedef ComputeChecksumBench INHERITED; +}; + /////////////////////////////////////////////////////////////////////////////// -static SkBenchmark* Fact0(void* p) { return new ComputeChecksumBench(p); } +static SkBenchmark* Fact0(void* p) { return new ComputeChecksum32Bench(p); } +static SkBenchmark* Fact1(void* p) { return new ComputeChecksum64Bench(p); } +static SkBenchmark* Fact2(void* p) { return new ComputeChecksumXXBench(p); } static BenchRegistry gReg0(Fact0); +static BenchRegistry gReg1(Fact1); +static BenchRegistry gReg2(Fact2); diff --git a/include/core/SkChecksum.h b/include/core/SkChecksum.h index e66df54aa..e76767019 100644 --- a/include/core/SkChecksum.h +++ b/include/core/SkChecksum.h @@ -10,6 +10,64 @@ #include "SkTypes.h" +#if !defined(SK_PREFER_32BIT_CHECKSUM) +#define SK_PREFER_32BIT_CHECKSUM 0 +#endif + +enum { + ChecksumRotateBits = 17 +}; + +#define SkCHECKSUM_MASH(CHECKSUM, NEW_CHUNK) \ + CHECKSUM = (((CHECKSUM) >> (sizeof(CHECKSUM)*8 - ChecksumRotateBits)) + \ + ((CHECKSUM) << ChecksumRotateBits)) ^ (NEW_CHUNK); + + +/** + * Compute a 64-bit checksum for a given data block + * + * @param data Memory address of the data block to be processed. Must be + * 32-bit aligned + * @param size Size of the data block in bytes. Must be a multiple of 8. + * @return checksum result + */ +inline uint64_t SkComputeChecksum64(const uint64_t* ptr, size_t size) { + SkASSERT(SkIsAlign8(size)); + // Strict 8-byte alignment is not required on ptr. On current + // CPUs there is no measurable performance difference between 32-bit + // and 64-bit aligned access to uint64_t data + SkASSERT(SkIsAlign4((intptr_t)ptr)); + + const uint64_t* stop = ptr + (size >> 3); + uint64_t result = 0; + while (ptr < stop) { + SkCHECKSUM_MASH(result, *ptr); + ptr++; + } + return result; +} + +/** + * Compute a 32-bit checksum for a given data block + * + * @param data Memory address of the data block to be processed. Must be + * 32-bit aligned. + * @param size Size of the data block in bytes. Must be a multiple of 4. + * @return checksum result + */ +inline uint32_t SkComputeChecksum32(const uint32_t* ptr, size_t size) { + SkASSERT(SkIsAlign4(size)); + SkASSERT(SkIsAlign4((intptr_t)ptr)); + + const uint32_t* stop = ptr + (size >> 2); + uint32_t result = 0; + while (ptr < stop) { + SkCHECKSUM_MASH(result, *ptr); + ptr++; + } + return result; +} + class SkChecksum : SkNoncopyable { private: /* diff --git a/include/core/SkDescriptor.h b/include/core/SkDescriptor.h index 8675fa1e2..00bc9aa92 100644 --- a/include/core/SkDescriptor.h +++ b/include/core/SkDescriptor.h @@ -15,29 +15,34 @@ class SkDescriptor : SkNoncopyable { public: - static size_t ComputeOverhead(int entryCount) { + static size_t ComputeOverhead(int entryCount) + { SkASSERT(entryCount >= 0); return sizeof(SkDescriptor) + entryCount * sizeof(Entry); } - static SkDescriptor* Alloc(size_t length) { + static SkDescriptor* Alloc(size_t length) + { SkASSERT(SkAlign4(length) == length); SkDescriptor* desc = (SkDescriptor*)sk_malloc_throw(length); return desc; } - static void Free(SkDescriptor* desc) { + static void Free(SkDescriptor* desc) + { sk_free(desc); } - void init() { + void init() + { fLength = sizeof(SkDescriptor); fCount = 0; } uint32_t getLength() const { return fLength; } - void* addEntry(uint32_t tag, uint32_t length, const void* data = NULL) { + void* addEntry(uint32_t tag, uint32_t length, const void* data = NULL) + { SkASSERT(tag); SkASSERT(SkAlign4(length) == length); SkASSERT(this->findEntry(tag, NULL) == NULL); @@ -45,34 +50,37 @@ public: Entry* entry = (Entry*)((char*)this + fLength); entry->fTag = tag; entry->fLen = length; - if (data) { + if (data) memcpy(entry + 1, data, length); - } fCount += 1; fLength += sizeof(Entry) + length; return (entry + 1); // return its data } - void computeChecksum() { + void computeChecksum() + { fChecksum = SkDescriptor::ComputeChecksum(this); } #ifdef SK_DEBUG - void assertChecksum() const { - SkASSERT(SkDescriptor::ComputeChecksum(this) == fChecksum); + void assertChecksum() const + { + SkASSERT(fChecksum == SkDescriptor::ComputeChecksum(this)); } #endif - const void* findEntry(uint32_t tag, uint32_t* length) const { + const void* findEntry(uint32_t tag, uint32_t* length) const + { const Entry* entry = (const Entry*)(this + 1); int count = fCount; - while (--count >= 0) { - if (entry->fTag == tag) { - if (length) { + while (--count >= 0) + { + if (entry->fTag == tag) + { + if (length) *length = entry->fLen; - } return entry + 1; } entry = (const Entry*)((const char*)(entry + 1) + entry->fLen); @@ -80,13 +88,15 @@ public: return NULL; } - SkDescriptor* copy() const { + SkDescriptor* copy() const + { SkDescriptor* desc = SkDescriptor::Alloc(fLength); memcpy(desc, this, fLength); return desc; } - bool equals(const SkDescriptor& other) const { + bool equals(const SkDescriptor& other) const + { // probe to see if we have a good checksum algo // SkASSERT(a.fChecksum != b.fChecksum || memcmp(&a, &b, a.fLength) == 0); @@ -120,10 +130,11 @@ private: uint32_t fLength; // must be second uint32_t fCount; - static uint32_t ComputeChecksum(const SkDescriptor* desc) { + static uint32_t ComputeChecksum(const SkDescriptor* desc) + { const uint32_t* ptr = (const uint32_t*)desc + 1; // skip the checksum field - size_t len = desc->fLength - sizeof(uint32_t); - return SkChecksum::Compute(ptr, len); + const size_t len = desc->fLength-sizeof(uint32_t); + return SkComputeChecksum32(ptr, len); } // private so no one can create one except our factories @@ -134,20 +145,18 @@ private: class SkAutoDescriptor : SkNoncopyable { public: - SkAutoDescriptor(size_t size) { - if (size <= sizeof(fStorage)) { + SkAutoDescriptor(size_t size) + { + if (size <= sizeof(fStorage)) fDesc = (SkDescriptor*)(void*)fStorage; - } else { + else fDesc = SkDescriptor::Alloc(size); - } } - - ~SkAutoDescriptor() { - if (fDesc != (SkDescriptor*)(void*)fStorage) { + ~SkAutoDescriptor() + { + if (fDesc != (SkDescriptor*)(void*)fStorage) SkDescriptor::Free(fDesc); - } } - SkDescriptor* getDesc() const { return fDesc; } private: enum { diff --git a/src/core/SkPictureFlat.cpp b/src/core/SkPictureFlat.cpp index 2f8d1e198..ec04495db 100644 --- a/src/core/SkPictureFlat.cpp +++ b/src/core/SkPictureFlat.cpp @@ -80,6 +80,12 @@ SkFlatData* SkFlatData::Create(SkChunkAlloc* heap, const void* obj, flattenProc(buffer, obj); uint32_t size = buffer.size(); + +#if !SK_PREFER_32BIT_CHECKSUM + uint32_t unpaddedSize = size; + size = SkAlign8(size); +#endif + // allocate enough memory to hold both SkFlatData and the serialized // contents SkFlatData* result = (SkFlatData*) heap->allocThrow(size + sizeof(SkFlatData)); @@ -88,7 +94,18 @@ SkFlatData* SkFlatData::Create(SkChunkAlloc* heap, const void* obj, // put the serialized contents into the data section of the new allocation buffer.flatten(result->data()); - result->fChecksum = SkChecksum::Compute(result->data32(), size); +#if SK_PREFER_32BIT_CHECKSUM + result->fChecksum = + SkComputeChecksum32(reinterpret_cast(result->data()), size); +#else + if (size != unpaddedSize) { + // Flat data is padded: put zeros in the last 32 bits. + SkASSERT(size - 4 == unpaddedSize); + *((uint32_t*)((char*)result->data() + unpaddedSize)) = 0; + } + result->fChecksum = + SkComputeChecksum64(reinterpret_cast(result->data()), size); +#endif return result; } diff --git a/src/core/SkPictureFlat.h b/src/core/SkPictureFlat.h index 7888b2e56..591826120 100644 --- a/src/core/SkPictureFlat.h +++ b/src/core/SkPictureFlat.h @@ -156,11 +156,16 @@ public: static int Compare(const SkFlatData* a, const SkFlatData* b) { size_t bytesToCompare = sizeof(a->fChecksum) + a->fAllocSize; +#if SK_PREFER_32BIT_CHECKSUM + typedef uint32_t CompareType; SkASSERT(SkIsAlign4(bytesToCompare)); - - const uint32_t* a_ptr = &(a->fChecksum); - const uint32_t* b_ptr = &(b->fChecksum); - const uint32_t* stop = a_ptr + bytesToCompare / sizeof(uint32_t); +#else + typedef uint64_t CompareType; + SkASSERT(SkIsAlign8(bytesToCompare)); +#endif + const CompareType* a_ptr = &(a->fChecksum); + const CompareType* b_ptr = &(b->fChecksum); + const CompareType* stop = a_ptr + bytesToCompare / sizeof(CompareType); while(a_ptr < stop) { if (*a_ptr != *b_ptr) { return (*a_ptr < *b_ptr) ? -1 : 1; @@ -173,8 +178,6 @@ public: int index() const { return fIndex; } void* data() const { return (char*)this + sizeof(*this); } - // We guarantee that our data is 32bit aligned - uint32_t* data32() const { return (uint32_t*)this->data(); } #ifdef SK_DEBUG_SIZE size_t size() const { return sizeof(SkFlatData) + fAllocSize; } @@ -196,7 +199,11 @@ private: int fIndex; int32_t fAllocSize; // fChecksum must be defined last in order to be contiguous with data() +#if SK_PREFER_32BIT_CHECKSUM uint32_t fChecksum; +#else + uint64_t fChecksum; +#endif }; template