Bug 1750599 - Apply valgrind upstream patch that increases performance with --track-origins=yes. r=firefox-build-system-reviewers,mhentges

Differential Revision: https://phabricator.services.mozilla.com/D136307
This commit is contained in:
Mike Hommey 2022-01-19 22:27:47 +00:00
Родитель cce437063b
Коммит 77e267d014
2 изменённых файлов: 790 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,789 @@
diff -Nru valgrind-3.18.1/debian/changelog valgrind-3.18.1/debian/changelog
--- valgrind-3.18.1/debian/changelog 2021-10-22 19:18:53.000000000 +0900
+++ valgrind-3.18.1/debian/changelog 2022-01-19 06:20:18.000000000 +0900
@@ -1,3 +1,9 @@
+valgrind (1:3.18.1-1.deb11moz1) bullseye; urgency=medium
+
+ * Apply patch for https://bugs.kde.org/show_bug.cgi?id=446103
+
+ -- Mike Hommey <mhommey@mozilla.com> Wed, 19 Jan 2022 06:20:18 +0900
+
valgrind (1:3.18.1-1) unstable; urgency=medium
* New upstream release
diff -Nru valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch
--- valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch 1970-01-01 09:00:00.000000000 +0900
+++ valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch 2022-01-19 06:19:56.000000000 +0900
@@ -0,0 +1,764 @@
+Index: valgrind-3.18.1/coregrind/m_oset.c
+===================================================================
+--- valgrind-3.18.1.orig/coregrind/m_oset.c
++++ valgrind-3.18.1/coregrind/m_oset.c
+@@ -570,7 +570,7 @@ void VG_(OSetWord_Insert)(AvlTree* t, UW
+ /*--------------------------------------------------------------------*/
+
+ // Find the *node* in t matching k, or NULL if not found.
+-static AvlNode* avl_lookup(const AvlTree* t, const void* k)
++static inline AvlNode* avl_lookup(const AvlTree* t, const void* k)
+ {
+ Word cmpres;
+ AvlNode* curr = t->root;
+@@ -604,9 +604,10 @@ static AvlNode* avl_lookup(const AvlTree
+ // Find the *element* in t matching k, or NULL if not found.
+ void* VG_(OSetGen_Lookup)(const AvlTree* t, const void* k)
+ {
+- AvlNode* n;
+ vg_assert(t);
+- n = avl_lookup(t, k);
++ if (LIKELY(t->root == NULL))
++ return NULL;
++ AvlNode* n = avl_lookup(t, k);
+ return ( n ? elem_of_node(n) : NULL );
+ }
+
+@@ -767,6 +768,8 @@ static Bool avl_removeroot(AvlTree* t)
+ // if not present.
+ void* VG_(OSetGen_Remove)(AvlTree* t, const void* k)
+ {
++ if (LIKELY(t->root == NULL))
++ return NULL;
+ // Have to find the node first, then remove it.
+ AvlNode* n = avl_lookup(t, k);
+ if (n) {
+Index: valgrind-3.18.1/memcheck/mc_main.c
+===================================================================
+--- valgrind-3.18.1.orig/memcheck/mc_main.c
++++ valgrind-3.18.1/memcheck/mc_main.c
+@@ -2402,7 +2402,9 @@ static UWord stats_ocacheL1_misses
+ static UWord stats_ocacheL1_lossage = 0;
+ static UWord stats_ocacheL1_movefwds = 0;
+
+-static UWord stats__ocacheL2_refs = 0;
++static UWord stats__ocacheL2_finds = 0;
++static UWord stats__ocacheL2_adds = 0;
++static UWord stats__ocacheL2_dels = 0;
+ static UWord stats__ocacheL2_misses = 0;
+ static UWord stats__ocacheL2_n_nodes_max = 0;
+
+@@ -2431,26 +2433,86 @@ static INLINE Bool is_valid_oc_tag ( Add
+ #define OC_MOVE_FORWARDS_EVERY_BITS 7
+
+
++/* Originally (pre Dec 2021) it was the case that this code had a
++ parameterizable cache line size, set by changing OC_BITS_PER_LINE.
++ However, as a result of the speedup fixes necessitated by bug 446103, that
++ is no longer really the case, and much of the L1 and L2 cache code has been
++ tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line
++ size is 32 bytes). Changing that would require a bunch of re-tuning
++ effort. So let's set it in stone for now. */
++STATIC_ASSERT(OC_BITS_PER_LINE == 5);
++STATIC_ASSERT(OC_LINES_PER_SET == 2);
++
++/* Fundamentally we want an OCacheLine structure (see below) as follows:
++ struct {
++ Addr tag;
++ UInt w32 [OC_W32S_PER_LINE];
++ UChar descr[OC_W32S_PER_LINE];
++ }
++ However, in various places, we want to set the w32[] and descr[] arrays to
++ zero, or check if they are zero. This can be a very hot path (per bug
++ 446103). So, instead, we have a union which is either those two arrays
++ (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the
++ set-zero/test-zero operations, the OCacheLine_W64s are used.
++*/
++
++// To ensure that OCacheLine.descr[] will fit in an integral number of ULongs.
++STATIC_ASSERT(0 == (OC_W32S_PER_LINE % 8));
++
++#define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \
++ (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \
++ + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */
++STATIC_ASSERT(OC_W64S_PER_MAIN == 5);
++
++typedef
++ ULong OCacheLine_W64s[OC_W64S_PER_MAIN];
++
+ typedef
+ struct {
+- Addr tag;
+- UInt w32[OC_W32S_PER_LINE];
++ UInt w32 [OC_W32S_PER_LINE];
+ UChar descr[OC_W32S_PER_LINE];
+ }
++ OCacheLine_Main;
++
++STATIC_ASSERT(sizeof(OCacheLine_W64s) == sizeof(OCacheLine_Main));
++
++typedef
++ struct {
++ Addr tag;
++ union {
++ OCacheLine_W64s w64s;
++ OCacheLine_Main main;
++ } u;
++ }
+ OCacheLine;
+
+ /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
+ in use, 'n' (nonzero) if it contains at least one valid origin tag,
+ and 'z' if all the represented tags are zero. */
+-static UChar classify_OCacheLine ( OCacheLine* line )
++static inline UChar classify_OCacheLine ( OCacheLine* line )
+ {
+ UWord i;
+ if (line->tag == 1/*invalid*/)
+ return 'e'; /* EMPTY */
+ tl_assert(is_valid_oc_tag(line->tag));
++
++ // BEGIN fast special-case of the test loop below. This will detect
++ // zero-ness (case 'z') for a subset of cases that the loop below will,
++ // hence is safe.
++ if (OC_W64S_PER_MAIN == 5) {
++ if (line->u.w64s[0] == 0
++ && line->u.w64s[1] == 0 && line->u.w64s[2] == 0
++ && line->u.w64s[3] == 0 && line->u.w64s[4] == 0) {
++ return 'z';
++ }
++ } else {
++ tl_assert2(0, "unsupported line size (classify_OCacheLine)");
++ }
++ // END fast special-case of the test loop below.
++
+ for (i = 0; i < OC_W32S_PER_LINE; i++) {
+- tl_assert(0 == ((~0xF) & line->descr[i]));
+- if (line->w32[i] > 0 && line->descr[i] > 0)
++ tl_assert(0 == ((~0xF) & line->u.main.descr[i]));
++ if (line->u.main.w32[i] > 0 && line->u.main.descr[i] > 0)
+ return 'n'; /* NONZERO - contains useful info */
+ }
+ return 'z'; /* ZERO - no useful info */
+@@ -2491,7 +2553,7 @@ static void init_OCache ( void )
+ init_ocacheL2();
+ }
+
+-static void moveLineForwards ( OCacheSet* set, UWord lineno )
++static inline void moveLineForwards ( OCacheSet* set, UWord lineno )
+ {
+ OCacheLine tmp;
+ stats_ocacheL1_movefwds++;
+@@ -2501,11 +2563,23 @@ static void moveLineForwards ( OCacheSet
+ set->line[lineno] = tmp;
+ }
+
+-static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
++static inline void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
+ UWord i;
+- for (i = 0; i < OC_W32S_PER_LINE; i++) {
+- line->w32[i] = 0; /* NO ORIGIN */
+- line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
++ if (OC_W32S_PER_LINE == 8) {
++ // BEGIN fast special-case of the loop below
++ tl_assert(OC_W64S_PER_MAIN == 5);
++ line->u.w64s[0] = 0;
++ line->u.w64s[1] = 0;
++ line->u.w64s[2] = 0;
++ line->u.w64s[3] = 0;
++ line->u.w64s[4] = 0;
++ // END fast special-case of the loop below
++ } else {
++ tl_assert2(0, "unsupported line size (zeroise_OCacheLine)");
++ for (i = 0; i < OC_W32S_PER_LINE; i++) {
++ line->u.main.w32[i] = 0; /* NO ORIGIN */
++ line->u.main.descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
++ }
+ }
+ line->tag = tag;
+ }
+@@ -2513,7 +2587,37 @@ static void zeroise_OCacheLine ( OCacheL
+ //////////////////////////////////////////////////////////////
+ //// OCache backing store
+
+-static OSet* ocacheL2 = NULL;
++// The backing store for ocacheL1 is, conceptually, an AVL tree of lines that
++// got ejected from the L1 (a "victim cache"), and which actually contain
++// useful info -- that is, for which classify_OCacheLine would return 'n' and
++// no other value. However, the tree can grow large, and searching/updating
++// it can be hot paths. Hence we "take out" 12 significant bits of the key by
++// having 4096 trees, and select one using HASH_OCACHE_TAG.
++//
++// What that hash function returns isn't important so long as it is a pure
++// function of the tag values, and is < 4096. However, it is critical for
++// performance of long SARPs. Hence the extra shift of 11 bits. This means
++// each tree conceptually is assigned to contiguous sequences of 2048 lines in
++// the "line address space", giving some locality of reference when scanning
++// linearly through address space, as is done by a SARP. Changing that 11 to
++// 0 gives terrible performance on long SARPs, presumably because each new
++// line is in a different tree, hence we wind up thrashing the (CPU's) caches.
++//
++// On 32-bit targets, we have to be a bit careful not to shift out so many
++// bits that not all 2^12 trees get used. That leads to the constraint
++// (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we
++// can change here. In this case we have OC_BITS_PER_LINE == 5, hence the
++// inequality is (28 < 32) and so we're good.
++//
++// The value 11 was determined empirically from various Firefox runs. 10 or
++// 12 also work pretty well.
++
++static OSet* ocachesL2[4096];
++
++STATIC_ASSERT((OC_BITS_PER_LINE + 11 + 12) < 32);
++static inline UInt HASH_OCACHE_TAG ( Addr tag ) {
++ return (UInt)((tag >> (OC_BITS_PER_LINE + 11)) & 0xFFF);
++}
+
+ static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
+ return VG_(malloc)(cc, szB);
+@@ -2527,23 +2631,26 @@ static UWord stats__ocacheL2_n_nodes = 0
+
+ static void init_ocacheL2 ( void )
+ {
+- tl_assert(!ocacheL2);
+ tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
+ tl_assert(0 == offsetof(OCacheLine,tag));
+- ocacheL2
+- = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
+- NULL, /* fast cmp */
+- ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
++ for (UInt i = 0; i < 4096; i++) {
++ tl_assert(!ocachesL2[i]);
++ ocachesL2[i]
++ = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
++ NULL, /* fast cmp */
++ ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
++ }
+ stats__ocacheL2_n_nodes = 0;
+ }
+
+ /* Find line with the given tag in the tree, or NULL if not found. */
+-static OCacheLine* ocacheL2_find_tag ( Addr tag )
++static inline OCacheLine* ocacheL2_find_tag ( Addr tag )
+ {
+ OCacheLine* line;
+ tl_assert(is_valid_oc_tag(tag));
+- stats__ocacheL2_refs++;
+- line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
++ stats__ocacheL2_finds++;
++ OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
++ line = VG_(OSetGen_Lookup)( oset, &tag );
+ return line;
+ }
+
+@@ -2553,10 +2660,11 @@ static void ocacheL2_del_tag ( Addr tag
+ {
+ OCacheLine* line;
+ tl_assert(is_valid_oc_tag(tag));
+- stats__ocacheL2_refs++;
+- line = VG_(OSetGen_Remove)( ocacheL2, &tag );
++ stats__ocacheL2_dels++;
++ OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
++ line = VG_(OSetGen_Remove)( oset, &tag );
+ if (line) {
+- VG_(OSetGen_FreeNode)(ocacheL2, line);
++ VG_(OSetGen_FreeNode)(oset, line);
+ tl_assert(stats__ocacheL2_n_nodes > 0);
+ stats__ocacheL2_n_nodes--;
+ }
+@@ -2568,10 +2676,11 @@ static void ocacheL2_add_line ( OCacheLi
+ {
+ OCacheLine* copy;
+ tl_assert(is_valid_oc_tag(line->tag));
+- copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
++ OSet* oset = ocachesL2[HASH_OCACHE_TAG(line->tag)];
++ copy = VG_(OSetGen_AllocNode)( oset, sizeof(OCacheLine) );
+ *copy = *line;
+- stats__ocacheL2_refs++;
+- VG_(OSetGen_Insert)( ocacheL2, copy );
++ stats__ocacheL2_adds++;
++ VG_(OSetGen_Insert)( oset, copy );
+ stats__ocacheL2_n_nodes++;
+ if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
+ stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
+@@ -2626,7 +2735,7 @@ static OCacheLine* find_OCacheLine_SLOW
+ /* line contains zeroes. We must ensure the backing store is
+ updated accordingly, either by copying the line there
+ verbatim, or by ensuring it isn't present there. We
+- chosse the latter on the basis that it reduces the size of
++ choose the latter on the basis that it reduces the size of
+ the backing store. */
+ ocacheL2_del_tag( victim->tag );
+ break;
+@@ -2697,10 +2806,10 @@ static INLINE void set_aligned_word64_Or
+ && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
+ }
+ line = find_OCacheLine( a );
+- line->descr[lineoff+0] = 0xF;
+- line->descr[lineoff+1] = 0xF;
+- line->w32[lineoff+0] = otag;
+- line->w32[lineoff+1] = otag;
++ line->u.main.descr[lineoff+0] = 0xF;
++ line->u.main.descr[lineoff+1] = 0xF;
++ line->u.main.w32[lineoff+0] = otag;
++ line->u.main.w32[lineoff+1] = otag;
+ }
+ //// END inlined, specialised version of MC_(helperc_b_store8)
+ }
+@@ -2751,8 +2860,8 @@ void make_aligned_word32_undefined_w_ota
+ tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
+ }
+ line = find_OCacheLine( a );
+- line->descr[lineoff] = 0xF;
+- line->w32[lineoff] = otag;
++ line->u.main.descr[lineoff] = 0xF;
++ line->u.main.w32[lineoff] = otag;
+ }
+ //// END inlined, specialised version of MC_(helperc_b_store4)
+ }
+@@ -2788,7 +2897,7 @@ void make_aligned_word32_noaccess ( Addr
+ tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
+ }
+ line = find_OCacheLine( a );
+- line->descr[lineoff] = 0;
++ line->u.main.descr[lineoff] = 0;
+ }
+ //// END inlined, specialised version of MC_(helperc_b_store4)
+ }
+@@ -2834,10 +2943,10 @@ void make_aligned_word64_undefined_w_ota
+ tl_assert(lineoff >= 0
+ && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
+ line = find_OCacheLine( a );
+- line->descr[lineoff+0] = 0xF;
+- line->descr[lineoff+1] = 0xF;
+- line->w32[lineoff+0] = otag;
+- line->w32[lineoff+1] = otag;
++ line->u.main.descr[lineoff+0] = 0xF;
++ line->u.main.descr[lineoff+1] = 0xF;
++ line->u.main.w32[lineoff+0] = otag;
++ line->u.main.w32[lineoff+1] = otag;
+ }
+ //// END inlined, specialised version of MC_(helperc_b_store8)
+ }
+@@ -2872,8 +2981,8 @@ void make_aligned_word64_noaccess ( Addr
+ tl_assert(lineoff >= 0
+ && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
+ line = find_OCacheLine( a );
+- line->descr[lineoff+0] = 0;
+- line->descr[lineoff+1] = 0;
++ line->u.main.descr[lineoff+0] = 0;
++ line->u.main.descr[lineoff+1] = 0;
+ }
+ //// END inlined, specialised version of MC_(helperc_b_store8)
+ }
+@@ -7399,7 +7508,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load1)
+
+ line = find_OCacheLine( a );
+
+- descr = line->descr[lineoff];
++ descr = line->u.main.descr[lineoff];
+ if (OC_ENABLE_ASSERTIONS) {
+ tl_assert(descr < 0x10);
+ }
+@@ -7407,7 +7516,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load1)
+ if (LIKELY(0 == (descr & (1 << byteoff)))) {
+ return 0;
+ } else {
+- return line->w32[lineoff];
++ return line->u.main.w32[lineoff];
+ }
+ }
+
+@@ -7431,7 +7540,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load2)
+ }
+ line = find_OCacheLine( a );
+
+- descr = line->descr[lineoff];
++ descr = line->u.main.descr[lineoff];
+ if (OC_ENABLE_ASSERTIONS) {
+ tl_assert(descr < 0x10);
+ }
+@@ -7439,7 +7548,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load2)
+ if (LIKELY(0 == (descr & (3 << byteoff)))) {
+ return 0;
+ } else {
+- return line->w32[lineoff];
++ return line->u.main.w32[lineoff];
+ }
+ }
+
+@@ -7462,7 +7571,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load4)
+
+ line = find_OCacheLine( a );
+
+- descr = line->descr[lineoff];
++ descr = line->u.main.descr[lineoff];
+ if (OC_ENABLE_ASSERTIONS) {
+ tl_assert(descr < 0x10);
+ }
+@@ -7470,7 +7579,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load4)
+ if (LIKELY(0 == descr)) {
+ return 0;
+ } else {
+- return line->w32[lineoff];
++ return line->u.main.w32[lineoff];
+ }
+ }
+
+@@ -7493,8 +7602,8 @@ UWord VG_REGPARM(1) MC_(helperc_b_load8)
+
+ line = find_OCacheLine( a );
+
+- descrLo = line->descr[lineoff + 0];
+- descrHi = line->descr[lineoff + 1];
++ descrLo = line->u.main.descr[lineoff + 0];
++ descrHi = line->u.main.descr[lineoff + 1];
+ descr = descrLo | descrHi;
+ if (OC_ENABLE_ASSERTIONS) {
+ tl_assert(descr < 0x10);
+@@ -7503,8 +7612,8 @@ UWord VG_REGPARM(1) MC_(helperc_b_load8)
+ if (LIKELY(0 == descr)) {
+ return 0; /* both 32-bit chunks are defined */
+ } else {
+- UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
+- UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
++ UInt oLo = descrLo == 0 ? 0 : line->u.main.w32[lineoff + 0];
++ UInt oHi = descrHi == 0 ? 0 : line->u.main.w32[lineoff + 1];
+ return merge_origins(oLo, oHi);
+ }
+ }
+@@ -7543,10 +7652,10 @@ void VG_REGPARM(2) MC_(helperc_b_store1)
+ line = find_OCacheLine( a );
+
+ if (d32 == 0) {
+- line->descr[lineoff] &= ~(1 << byteoff);
++ line->u.main.descr[lineoff] &= ~(1 << byteoff);
+ } else {
+- line->descr[lineoff] |= (1 << byteoff);
+- line->w32[lineoff] = d32;
++ line->u.main.descr[lineoff] |= (1 << byteoff);
++ line->u.main.w32[lineoff] = d32;
+ }
+ }
+
+@@ -7571,10 +7680,10 @@ void VG_REGPARM(2) MC_(helperc_b_store2)
+ line = find_OCacheLine( a );
+
+ if (d32 == 0) {
+- line->descr[lineoff] &= ~(3 << byteoff);
++ line->u.main.descr[lineoff] &= ~(3 << byteoff);
+ } else {
+- line->descr[lineoff] |= (3 << byteoff);
+- line->w32[lineoff] = d32;
++ line->u.main.descr[lineoff] |= (3 << byteoff);
++ line->u.main.w32[lineoff] = d32;
+ }
+ }
+
+@@ -7597,14 +7706,15 @@ void VG_REGPARM(2) MC_(helperc_b_store4)
+ line = find_OCacheLine( a );
+
+ if (d32 == 0) {
+- line->descr[lineoff] = 0;
++ line->u.main.descr[lineoff] = 0;
+ } else {
+- line->descr[lineoff] = 0xF;
+- line->w32[lineoff] = d32;
++ line->u.main.descr[lineoff] = 0xF;
++ line->u.main.w32[lineoff] = d32;
+ }
+ }
+
+ void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
++ STATIC_ASSERT(OC_W32S_PER_LINE == 8);
+ OCacheLine* line;
+ UWord lineoff;
+
+@@ -7623,26 +7733,98 @@ void VG_REGPARM(2) MC_(helperc_b_store8)
+ line = find_OCacheLine( a );
+
+ if (d32 == 0) {
+- line->descr[lineoff + 0] = 0;
+- line->descr[lineoff + 1] = 0;
++ line->u.main.descr[lineoff + 0] = 0;
++ line->u.main.descr[lineoff + 1] = 0;
+ } else {
+- line->descr[lineoff + 0] = 0xF;
+- line->descr[lineoff + 1] = 0xF;
+- line->w32[lineoff + 0] = d32;
+- line->w32[lineoff + 1] = d32;
++ line->u.main.descr[lineoff + 0] = 0xF;
++ line->u.main.descr[lineoff + 1] = 0xF;
++ line->u.main.w32[lineoff + 0] = d32;
++ line->u.main.w32[lineoff + 1] = d32;
+ }
+ }
+
+ void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
+- MC_(helperc_b_store8)( a + 0, d32 );
+- MC_(helperc_b_store8)( a + 8, d32 );
++ STATIC_ASSERT(OC_W32S_PER_LINE == 8);
++ OCacheLine* line;
++ UWord lineoff;
++
++ if (UNLIKELY(a & 15)) {
++ /* Handle misaligned case, slowly. */
++ MC_(helperc_b_store8)( a + 0, d32 );
++ MC_(helperc_b_store8)( a + 8, d32 );
++ return;
++ }
++
++ lineoff = oc_line_offset(a);
++ if (OC_ENABLE_ASSERTIONS) {
++ tl_assert(lineoff == (lineoff & 4)); /*0,4*//*since 16-aligned*/
++ }
++
++ line = find_OCacheLine( a );
++
++ if (d32 == 0) {
++ line->u.main.descr[lineoff + 0] = 0;
++ line->u.main.descr[lineoff + 1] = 0;
++ line->u.main.descr[lineoff + 2] = 0;
++ line->u.main.descr[lineoff + 3] = 0;
++ } else {
++ line->u.main.descr[lineoff + 0] = 0xF;
++ line->u.main.descr[lineoff + 1] = 0xF;
++ line->u.main.descr[lineoff + 2] = 0xF;
++ line->u.main.descr[lineoff + 3] = 0xF;
++ line->u.main.w32[lineoff + 0] = d32;
++ line->u.main.w32[lineoff + 1] = d32;
++ line->u.main.w32[lineoff + 2] = d32;
++ line->u.main.w32[lineoff + 3] = d32;
++ }
+ }
+
+ void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
+- MC_(helperc_b_store8)( a + 0, d32 );
+- MC_(helperc_b_store8)( a + 8, d32 );
+- MC_(helperc_b_store8)( a + 16, d32 );
+- MC_(helperc_b_store8)( a + 24, d32 );
++ STATIC_ASSERT(OC_W32S_PER_LINE == 8);
++ OCacheLine* line;
++ UWord lineoff;
++
++ if (UNLIKELY(a & 31)) {
++ /* Handle misaligned case, slowly. */
++ MC_(helperc_b_store16)( a + 0, d32 );
++ MC_(helperc_b_store16)( a + 16, d32 );
++ return;
++ }
++
++ lineoff = oc_line_offset(a);
++ if (OC_ENABLE_ASSERTIONS) {
++ tl_assert(lineoff == 0);
++ }
++
++ line = find_OCacheLine( a );
++
++ if (d32 == 0) {
++ line->u.main.descr[0] = 0;
++ line->u.main.descr[1] = 0;
++ line->u.main.descr[2] = 0;
++ line->u.main.descr[3] = 0;
++ line->u.main.descr[4] = 0;
++ line->u.main.descr[5] = 0;
++ line->u.main.descr[6] = 0;
++ line->u.main.descr[7] = 0;
++ } else {
++ line->u.main.descr[0] = 0xF;
++ line->u.main.descr[1] = 0xF;
++ line->u.main.descr[2] = 0xF;
++ line->u.main.descr[3] = 0xF;
++ line->u.main.descr[4] = 0xF;
++ line->u.main.descr[5] = 0xF;
++ line->u.main.descr[6] = 0xF;
++ line->u.main.descr[7] = 0xF;
++ line->u.main.w32[0] = d32;
++ line->u.main.w32[1] = d32;
++ line->u.main.w32[2] = d32;
++ line->u.main.w32[3] = d32;
++ line->u.main.w32[4] = d32;
++ line->u.main.w32[5] = d32;
++ line->u.main.w32[6] = d32;
++ line->u.main.w32[7] = d32;
++ }
+ }
+
+
+@@ -7650,6 +7832,9 @@ void VG_REGPARM(2) MC_(helperc_b_store32
+ /*--- Origin tracking: sarp handlers ---*/
+ /*--------------------------------------------*/
+
++// We may get asked to do very large SARPs (bug 446103), hence it is important
++// to process 32-byte chunks at a time when possible.
++
+ __attribute__((noinline))
+ static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
+ if ((a & 1) && len >= 1) {
+@@ -7662,9 +7847,40 @@ static void ocache_sarp_Set_Origins ( Ad
+ a += 2;
+ len -= 2;
+ }
+- if (len >= 4)
+- tl_assert(0 == (a & 3));
+- while (len >= 4) {
++ if ((a & 4) && len >= 4) {
++ MC_(helperc_b_store4)( a, otag );
++ a += 4;
++ len -= 4;
++ }
++ if ((a & 8) && len >= 8) {
++ MC_(helperc_b_store8)( a, otag );
++ a += 8;
++ len -= 8;
++ }
++ if ((a & 16) && len >= 16) {
++ MC_(helperc_b_store16)( a, otag );
++ a += 16;
++ len -= 16;
++ }
++ if (len >= 32) {
++ tl_assert(0 == (a & 31));
++ while (len >= 32) {
++ MC_(helperc_b_store32)( a, otag );
++ a += 32;
++ len -= 32;
++ }
++ }
++ if (len >= 16) {
++ MC_(helperc_b_store16)( a, otag );
++ a += 16;
++ len -= 16;
++ }
++ if (len >= 8) {
++ MC_(helperc_b_store8)( a, otag );
++ a += 8;
++ len -= 8;
++ }
++ if (len >= 4) {
+ MC_(helperc_b_store4)( a, otag );
+ a += 4;
+ len -= 4;
+@@ -7694,9 +7910,40 @@ static void ocache_sarp_Clear_Origins (
+ a += 2;
+ len -= 2;
+ }
+- if (len >= 4)
+- tl_assert(0 == (a & 3));
+- while (len >= 4) {
++ if ((a & 4) && len >= 4) {
++ MC_(helperc_b_store4)( a, 0 );
++ a += 4;
++ len -= 4;
++ }
++ if ((a & 8) && len >= 8) {
++ MC_(helperc_b_store8)( a, 0 );
++ a += 8;
++ len -= 8;
++ }
++ if ((a & 16) && len >= 16) {
++ MC_(helperc_b_store16)( a, 0 );
++ a += 16;
++ len -= 16;
++ }
++ if (len >= 32) {
++ tl_assert(0 == (a & 31));
++ while (len >= 32) {
++ MC_(helperc_b_store32)( a, 0 );
++ a += 32;
++ len -= 32;
++ }
++ }
++ if (len >= 16) {
++ MC_(helperc_b_store16)( a, 0 );
++ a += 16;
++ len -= 16;
++ }
++ if (len >= 8) {
++ MC_(helperc_b_store8)( a, 0 );
++ a += 8;
++ len -= 8;
++ }
++ if (len >= 4) {
+ MC_(helperc_b_store4)( a, 0 );
+ a += 4;
+ len -= 4;
+@@ -7846,10 +8093,14 @@ static void mc_post_clo_init ( void )
+ if (MC_(clo_mc_level) >= 3) {
+ init_OCache();
+ tl_assert(ocacheL1 != NULL);
+- tl_assert(ocacheL2 != NULL);
++ for (UInt i = 0; i < 4096; i++ ) {
++ tl_assert(ocachesL2[i] != NULL);
++ }
+ } else {
+ tl_assert(ocacheL1 == NULL);
+- tl_assert(ocacheL2 == NULL);
++ for (UInt i = 0; i < 4096; i++ ) {
++ tl_assert(ocachesL2[i] == NULL);
++ }
+ }
+
+ MC_(chunk_poolalloc) = VG_(newPA)
+@@ -7943,29 +8194,33 @@ static void mc_print_stats (void)
+
+ if (MC_(clo_mc_level) >= 3) {
+ VG_(message)(Vg_DebugMsg,
+- " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
++ " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n",
+ stats_ocacheL1_find,
+ stats_ocacheL1_misses,
+ stats_ocacheL1_lossage );
+ VG_(message)(Vg_DebugMsg,
+- " ocacheL1: %'12lu at 0 %'12lu at 1\n",
++ " ocacheL1: %'14lu at 0 %'14lu at 1\n",
+ stats_ocacheL1_find - stats_ocacheL1_misses
+ - stats_ocacheL1_found_at_1
+ - stats_ocacheL1_found_at_N,
+ stats_ocacheL1_found_at_1 );
+ VG_(message)(Vg_DebugMsg,
+- " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
++ " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n",
+ stats_ocacheL1_found_at_N,
+ stats_ocacheL1_movefwds );
+ VG_(message)(Vg_DebugMsg,
+- " ocacheL1: %'12lu sizeB %'12d useful\n",
++ " ocacheL1: %'14lu sizeB %'14d useful\n",
+ (SizeT)sizeof(OCache),
+ 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
+ VG_(message)(Vg_DebugMsg,
+- " ocacheL2: %'12lu refs %'12lu misses\n",
+- stats__ocacheL2_refs,
++ " ocacheL2: %'14lu finds %'14lu misses\n",
++ stats__ocacheL2_finds,
+ stats__ocacheL2_misses );
+ VG_(message)(Vg_DebugMsg,
++ " ocacheL2: %'14lu adds %'14lu dels\n",
++ stats__ocacheL2_adds,
++ stats__ocacheL2_dels );
++ VG_(message)(Vg_DebugMsg,
+ " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
+ stats__ocacheL2_n_nodes_max,
+ stats__ocacheL2_n_nodes );
+@@ -7974,7 +8229,9 @@ static void mc_print_stats (void)
+ stats__nia_cache_queries, stats__nia_cache_misses);
+ } else {
+ tl_assert(ocacheL1 == NULL);
+- tl_assert(ocacheL2 == NULL);
++ for (UInt i = 0; i < 4096; i++ ) {
++ tl_assert(ocachesL2[1] == NULL);
++ }
+ }
+ }
+
+@@ -8216,7 +8473,9 @@ static void mc_pre_clo_init(void)
+ if we need to, since the command line args haven't been
+ processed yet. Hence defer it to mc_post_clo_init. */
+ tl_assert(ocacheL1 == NULL);
+- tl_assert(ocacheL2 == NULL);
++ for (UInt i = 0; i < 4096; i++ ) {
++ tl_assert(ocachesL2[i] == NULL);
++ }
+
+ /* Check some important stuff. See extensive comments above
+ re UNALIGNED_OR_HIGH for background. */
diff -Nru valgrind-3.18.1/debian/patches/series valgrind-3.18.1/debian/patches/series
--- valgrind-3.18.1/debian/patches/series 2021-10-22 19:18:53.000000000 +0900
+++ valgrind-3.18.1/debian/patches/series 2022-01-19 06:18:55.000000000 +0900
@@ -6,3 +6,4 @@
10_mpi-pkg-config.patch
11_arm64-cache-flush.patch
13_fix-path-to-vgdb.patch
+99_VALGRIND_3_18_1-58-g8ee165616.patch

Просмотреть файл

@ -47,6 +47,7 @@ deb11-valgrind:
dsc:
url: http://snapshot.debian.org/archive/debian/20211022T144903Z/pool/main/v/valgrind/valgrind_3.18.1-1.dsc
sha256: 6f2606862134b6387cdbe070920728fdb84c6ffa85308ca6f36222717a87aadf
patch: valgrind.diff
deb10-python-zstandard:
description: "python-zstandard for Debian buster"