From 77e267d0149201a0b8739a0fccd378b3a98ed06d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 19 Jan 2022 22:27:47 +0000 Subject: [PATCH] Bug 1750599 - Apply valgrind upstream patch that increases performance with --track-origins=yes. r=firefox-build-system-reviewers,mhentges Differential Revision: https://phabricator.services.mozilla.com/D136307 --- build/debian-packages/valgrind.diff | 789 ++++++++++++++++++++++++++++ taskcluster/ci/packages/debian.yml | 1 + 2 files changed, 790 insertions(+) create mode 100644 build/debian-packages/valgrind.diff diff --git a/build/debian-packages/valgrind.diff b/build/debian-packages/valgrind.diff new file mode 100644 index 000000000000..2ef1788f92f9 --- /dev/null +++ b/build/debian-packages/valgrind.diff @@ -0,0 +1,789 @@ +diff -Nru valgrind-3.18.1/debian/changelog valgrind-3.18.1/debian/changelog +--- valgrind-3.18.1/debian/changelog 2021-10-22 19:18:53.000000000 +0900 ++++ valgrind-3.18.1/debian/changelog 2022-01-19 06:20:18.000000000 +0900 +@@ -1,3 +1,9 @@ ++valgrind (1:3.18.1-1.deb11moz1) bullseye; urgency=medium ++ ++ * Apply patch for https://bugs.kde.org/show_bug.cgi?id=446103 ++ ++ -- Mike Hommey Wed, 19 Jan 2022 06:20:18 +0900 ++ + valgrind (1:3.18.1-1) unstable; urgency=medium + + * New upstream release +diff -Nru valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch +--- valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch 1970-01-01 09:00:00.000000000 +0900 ++++ valgrind-3.18.1/debian/patches/99_VALGRIND_3_18_1-58-g8ee165616.patch 2022-01-19 06:19:56.000000000 +0900 +@@ -0,0 +1,764 @@ ++Index: valgrind-3.18.1/coregrind/m_oset.c ++=================================================================== ++--- valgrind-3.18.1.orig/coregrind/m_oset.c +++++ valgrind-3.18.1/coregrind/m_oset.c ++@@ -570,7 +570,7 @@ void VG_(OSetWord_Insert)(AvlTree* t, UW ++ /*--------------------------------------------------------------------*/ ++ ++ // Find the *node* in t matching k, or NULL if not found. ++-static AvlNode* avl_lookup(const AvlTree* t, const void* k) +++static inline AvlNode* avl_lookup(const AvlTree* t, const void* k) ++ { ++ Word cmpres; ++ AvlNode* curr = t->root; ++@@ -604,9 +604,10 @@ static AvlNode* avl_lookup(const AvlTree ++ // Find the *element* in t matching k, or NULL if not found. ++ void* VG_(OSetGen_Lookup)(const AvlTree* t, const void* k) ++ { ++- AvlNode* n; ++ vg_assert(t); ++- n = avl_lookup(t, k); +++ if (LIKELY(t->root == NULL)) +++ return NULL; +++ AvlNode* n = avl_lookup(t, k); ++ return ( n ? elem_of_node(n) : NULL ); ++ } ++ ++@@ -767,6 +768,8 @@ static Bool avl_removeroot(AvlTree* t) ++ // if not present. ++ void* VG_(OSetGen_Remove)(AvlTree* t, const void* k) ++ { +++ if (LIKELY(t->root == NULL)) +++ return NULL; ++ // Have to find the node first, then remove it. ++ AvlNode* n = avl_lookup(t, k); ++ if (n) { ++Index: valgrind-3.18.1/memcheck/mc_main.c ++=================================================================== ++--- valgrind-3.18.1.orig/memcheck/mc_main.c +++++ valgrind-3.18.1/memcheck/mc_main.c ++@@ -2402,7 +2402,9 @@ static UWord stats_ocacheL1_misses ++ static UWord stats_ocacheL1_lossage = 0; ++ static UWord stats_ocacheL1_movefwds = 0; ++ ++-static UWord stats__ocacheL2_refs = 0; +++static UWord stats__ocacheL2_finds = 0; +++static UWord stats__ocacheL2_adds = 0; +++static UWord stats__ocacheL2_dels = 0; ++ static UWord stats__ocacheL2_misses = 0; ++ static UWord stats__ocacheL2_n_nodes_max = 0; ++ ++@@ -2431,26 +2433,86 @@ static INLINE Bool is_valid_oc_tag ( Add ++ #define OC_MOVE_FORWARDS_EVERY_BITS 7 ++ ++ +++/* Originally (pre Dec 2021) it was the case that this code had a +++ parameterizable cache line size, set by changing OC_BITS_PER_LINE. +++ However, as a result of the speedup fixes necessitated by bug 446103, that +++ is no longer really the case, and much of the L1 and L2 cache code has been +++ tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line +++ size is 32 bytes). Changing that would require a bunch of re-tuning +++ effort. So let's set it in stone for now. */ +++STATIC_ASSERT(OC_BITS_PER_LINE == 5); +++STATIC_ASSERT(OC_LINES_PER_SET == 2); +++ +++/* Fundamentally we want an OCacheLine structure (see below) as follows: +++ struct { +++ Addr tag; +++ UInt w32 [OC_W32S_PER_LINE]; +++ UChar descr[OC_W32S_PER_LINE]; +++ } +++ However, in various places, we want to set the w32[] and descr[] arrays to +++ zero, or check if they are zero. This can be a very hot path (per bug +++ 446103). So, instead, we have a union which is either those two arrays +++ (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the +++ set-zero/test-zero operations, the OCacheLine_W64s are used. +++*/ +++ +++// To ensure that OCacheLine.descr[] will fit in an integral number of ULongs. +++STATIC_ASSERT(0 == (OC_W32S_PER_LINE % 8)); +++ +++#define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \ +++ (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \ +++ + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */ +++STATIC_ASSERT(OC_W64S_PER_MAIN == 5); +++ +++typedef +++ ULong OCacheLine_W64s[OC_W64S_PER_MAIN]; +++ ++ typedef ++ struct { ++- Addr tag; ++- UInt w32[OC_W32S_PER_LINE]; +++ UInt w32 [OC_W32S_PER_LINE]; ++ UChar descr[OC_W32S_PER_LINE]; ++ } +++ OCacheLine_Main; +++ +++STATIC_ASSERT(sizeof(OCacheLine_W64s) == sizeof(OCacheLine_Main)); +++ +++typedef +++ struct { +++ Addr tag; +++ union { +++ OCacheLine_W64s w64s; +++ OCacheLine_Main main; +++ } u; +++ } ++ OCacheLine; ++ ++ /* Classify and also sanity-check 'line'. Return 'e' (empty) if not ++ in use, 'n' (nonzero) if it contains at least one valid origin tag, ++ and 'z' if all the represented tags are zero. */ ++-static UChar classify_OCacheLine ( OCacheLine* line ) +++static inline UChar classify_OCacheLine ( OCacheLine* line ) ++ { ++ UWord i; ++ if (line->tag == 1/*invalid*/) ++ return 'e'; /* EMPTY */ ++ tl_assert(is_valid_oc_tag(line->tag)); +++ +++ // BEGIN fast special-case of the test loop below. This will detect +++ // zero-ness (case 'z') for a subset of cases that the loop below will, +++ // hence is safe. +++ if (OC_W64S_PER_MAIN == 5) { +++ if (line->u.w64s[0] == 0 +++ && line->u.w64s[1] == 0 && line->u.w64s[2] == 0 +++ && line->u.w64s[3] == 0 && line->u.w64s[4] == 0) { +++ return 'z'; +++ } +++ } else { +++ tl_assert2(0, "unsupported line size (classify_OCacheLine)"); +++ } +++ // END fast special-case of the test loop below. +++ ++ for (i = 0; i < OC_W32S_PER_LINE; i++) { ++- tl_assert(0 == ((~0xF) & line->descr[i])); ++- if (line->w32[i] > 0 && line->descr[i] > 0) +++ tl_assert(0 == ((~0xF) & line->u.main.descr[i])); +++ if (line->u.main.w32[i] > 0 && line->u.main.descr[i] > 0) ++ return 'n'; /* NONZERO - contains useful info */ ++ } ++ return 'z'; /* ZERO - no useful info */ ++@@ -2491,7 +2553,7 @@ static void init_OCache ( void ) ++ init_ocacheL2(); ++ } ++ ++-static void moveLineForwards ( OCacheSet* set, UWord lineno ) +++static inline void moveLineForwards ( OCacheSet* set, UWord lineno ) ++ { ++ OCacheLine tmp; ++ stats_ocacheL1_movefwds++; ++@@ -2501,11 +2563,23 @@ static void moveLineForwards ( OCacheSet ++ set->line[lineno] = tmp; ++ } ++ ++-static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) { +++static inline void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) { ++ UWord i; ++- for (i = 0; i < OC_W32S_PER_LINE; i++) { ++- line->w32[i] = 0; /* NO ORIGIN */ ++- line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */ +++ if (OC_W32S_PER_LINE == 8) { +++ // BEGIN fast special-case of the loop below +++ tl_assert(OC_W64S_PER_MAIN == 5); +++ line->u.w64s[0] = 0; +++ line->u.w64s[1] = 0; +++ line->u.w64s[2] = 0; +++ line->u.w64s[3] = 0; +++ line->u.w64s[4] = 0; +++ // END fast special-case of the loop below +++ } else { +++ tl_assert2(0, "unsupported line size (zeroise_OCacheLine)"); +++ for (i = 0; i < OC_W32S_PER_LINE; i++) { +++ line->u.main.w32[i] = 0; /* NO ORIGIN */ +++ line->u.main.descr[i] = 0; /* REALLY REALLY NO ORIGIN! */ +++ } ++ } ++ line->tag = tag; ++ } ++@@ -2513,7 +2587,37 @@ static void zeroise_OCacheLine ( OCacheL ++ ////////////////////////////////////////////////////////////// ++ //// OCache backing store ++ ++-static OSet* ocacheL2 = NULL; +++// The backing store for ocacheL1 is, conceptually, an AVL tree of lines that +++// got ejected from the L1 (a "victim cache"), and which actually contain +++// useful info -- that is, for which classify_OCacheLine would return 'n' and +++// no other value. However, the tree can grow large, and searching/updating +++// it can be hot paths. Hence we "take out" 12 significant bits of the key by +++// having 4096 trees, and select one using HASH_OCACHE_TAG. +++// +++// What that hash function returns isn't important so long as it is a pure +++// function of the tag values, and is < 4096. However, it is critical for +++// performance of long SARPs. Hence the extra shift of 11 bits. This means +++// each tree conceptually is assigned to contiguous sequences of 2048 lines in +++// the "line address space", giving some locality of reference when scanning +++// linearly through address space, as is done by a SARP. Changing that 11 to +++// 0 gives terrible performance on long SARPs, presumably because each new +++// line is in a different tree, hence we wind up thrashing the (CPU's) caches. +++// +++// On 32-bit targets, we have to be a bit careful not to shift out so many +++// bits that not all 2^12 trees get used. That leads to the constraint +++// (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we +++// can change here. In this case we have OC_BITS_PER_LINE == 5, hence the +++// inequality is (28 < 32) and so we're good. +++// +++// The value 11 was determined empirically from various Firefox runs. 10 or +++// 12 also work pretty well. +++ +++static OSet* ocachesL2[4096]; +++ +++STATIC_ASSERT((OC_BITS_PER_LINE + 11 + 12) < 32); +++static inline UInt HASH_OCACHE_TAG ( Addr tag ) { +++ return (UInt)((tag >> (OC_BITS_PER_LINE + 11)) & 0xFFF); +++} ++ ++ static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) { ++ return VG_(malloc)(cc, szB); ++@@ -2527,23 +2631,26 @@ static UWord stats__ocacheL2_n_nodes = 0 ++ ++ static void init_ocacheL2 ( void ) ++ { ++- tl_assert(!ocacheL2); ++ tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */ ++ tl_assert(0 == offsetof(OCacheLine,tag)); ++- ocacheL2 ++- = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), ++- NULL, /* fast cmp */ ++- ocacheL2_malloc, "mc.ioL2", ocacheL2_free); +++ for (UInt i = 0; i < 4096; i++) { +++ tl_assert(!ocachesL2[i]); +++ ocachesL2[i] +++ = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), +++ NULL, /* fast cmp */ +++ ocacheL2_malloc, "mc.ioL2", ocacheL2_free); +++ } ++ stats__ocacheL2_n_nodes = 0; ++ } ++ ++ /* Find line with the given tag in the tree, or NULL if not found. */ ++-static OCacheLine* ocacheL2_find_tag ( Addr tag ) +++static inline OCacheLine* ocacheL2_find_tag ( Addr tag ) ++ { ++ OCacheLine* line; ++ tl_assert(is_valid_oc_tag(tag)); ++- stats__ocacheL2_refs++; ++- line = VG_(OSetGen_Lookup)( ocacheL2, &tag ); +++ stats__ocacheL2_finds++; +++ OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)]; +++ line = VG_(OSetGen_Lookup)( oset, &tag ); ++ return line; ++ } ++ ++@@ -2553,10 +2660,11 @@ static void ocacheL2_del_tag ( Addr tag ++ { ++ OCacheLine* line; ++ tl_assert(is_valid_oc_tag(tag)); ++- stats__ocacheL2_refs++; ++- line = VG_(OSetGen_Remove)( ocacheL2, &tag ); +++ stats__ocacheL2_dels++; +++ OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)]; +++ line = VG_(OSetGen_Remove)( oset, &tag ); ++ if (line) { ++- VG_(OSetGen_FreeNode)(ocacheL2, line); +++ VG_(OSetGen_FreeNode)(oset, line); ++ tl_assert(stats__ocacheL2_n_nodes > 0); ++ stats__ocacheL2_n_nodes--; ++ } ++@@ -2568,10 +2676,11 @@ static void ocacheL2_add_line ( OCacheLi ++ { ++ OCacheLine* copy; ++ tl_assert(is_valid_oc_tag(line->tag)); ++- copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) ); +++ OSet* oset = ocachesL2[HASH_OCACHE_TAG(line->tag)]; +++ copy = VG_(OSetGen_AllocNode)( oset, sizeof(OCacheLine) ); ++ *copy = *line; ++- stats__ocacheL2_refs++; ++- VG_(OSetGen_Insert)( ocacheL2, copy ); +++ stats__ocacheL2_adds++; +++ VG_(OSetGen_Insert)( oset, copy ); ++ stats__ocacheL2_n_nodes++; ++ if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max) ++ stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes; ++@@ -2626,7 +2735,7 @@ static OCacheLine* find_OCacheLine_SLOW ++ /* line contains zeroes. We must ensure the backing store is ++ updated accordingly, either by copying the line there ++ verbatim, or by ensuring it isn't present there. We ++- chosse the latter on the basis that it reduces the size of +++ choose the latter on the basis that it reduces the size of ++ the backing store. */ ++ ocacheL2_del_tag( victim->tag ); ++ break; ++@@ -2697,10 +2806,10 @@ static INLINE void set_aligned_word64_Or ++ && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); ++ } ++ line = find_OCacheLine( a ); ++- line->descr[lineoff+0] = 0xF; ++- line->descr[lineoff+1] = 0xF; ++- line->w32[lineoff+0] = otag; ++- line->w32[lineoff+1] = otag; +++ line->u.main.descr[lineoff+0] = 0xF; +++ line->u.main.descr[lineoff+1] = 0xF; +++ line->u.main.w32[lineoff+0] = otag; +++ line->u.main.w32[lineoff+1] = otag; ++ } ++ //// END inlined, specialised version of MC_(helperc_b_store8) ++ } ++@@ -2751,8 +2860,8 @@ void make_aligned_word32_undefined_w_ota ++ tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); ++ } ++ line = find_OCacheLine( a ); ++- line->descr[lineoff] = 0xF; ++- line->w32[lineoff] = otag; +++ line->u.main.descr[lineoff] = 0xF; +++ line->u.main.w32[lineoff] = otag; ++ } ++ //// END inlined, specialised version of MC_(helperc_b_store4) ++ } ++@@ -2788,7 +2897,7 @@ void make_aligned_word32_noaccess ( Addr ++ tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE); ++ } ++ line = find_OCacheLine( a ); ++- line->descr[lineoff] = 0; +++ line->u.main.descr[lineoff] = 0; ++ } ++ //// END inlined, specialised version of MC_(helperc_b_store4) ++ } ++@@ -2834,10 +2943,10 @@ void make_aligned_word64_undefined_w_ota ++ tl_assert(lineoff >= 0 ++ && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); ++ line = find_OCacheLine( a ); ++- line->descr[lineoff+0] = 0xF; ++- line->descr[lineoff+1] = 0xF; ++- line->w32[lineoff+0] = otag; ++- line->w32[lineoff+1] = otag; +++ line->u.main.descr[lineoff+0] = 0xF; +++ line->u.main.descr[lineoff+1] = 0xF; +++ line->u.main.w32[lineoff+0] = otag; +++ line->u.main.w32[lineoff+1] = otag; ++ } ++ //// END inlined, specialised version of MC_(helperc_b_store8) ++ } ++@@ -2872,8 +2981,8 @@ void make_aligned_word64_noaccess ( Addr ++ tl_assert(lineoff >= 0 ++ && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/); ++ line = find_OCacheLine( a ); ++- line->descr[lineoff+0] = 0; ++- line->descr[lineoff+1] = 0; +++ line->u.main.descr[lineoff+0] = 0; +++ line->u.main.descr[lineoff+1] = 0; ++ } ++ //// END inlined, specialised version of MC_(helperc_b_store8) ++ } ++@@ -7399,7 +7508,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load1) ++ ++ line = find_OCacheLine( a ); ++ ++- descr = line->descr[lineoff]; +++ descr = line->u.main.descr[lineoff]; ++ if (OC_ENABLE_ASSERTIONS) { ++ tl_assert(descr < 0x10); ++ } ++@@ -7407,7 +7516,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load1) ++ if (LIKELY(0 == (descr & (1 << byteoff)))) { ++ return 0; ++ } else { ++- return line->w32[lineoff]; +++ return line->u.main.w32[lineoff]; ++ } ++ } ++ ++@@ -7431,7 +7540,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load2) ++ } ++ line = find_OCacheLine( a ); ++ ++- descr = line->descr[lineoff]; +++ descr = line->u.main.descr[lineoff]; ++ if (OC_ENABLE_ASSERTIONS) { ++ tl_assert(descr < 0x10); ++ } ++@@ -7439,7 +7548,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load2) ++ if (LIKELY(0 == (descr & (3 << byteoff)))) { ++ return 0; ++ } else { ++- return line->w32[lineoff]; +++ return line->u.main.w32[lineoff]; ++ } ++ } ++ ++@@ -7462,7 +7571,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load4) ++ ++ line = find_OCacheLine( a ); ++ ++- descr = line->descr[lineoff]; +++ descr = line->u.main.descr[lineoff]; ++ if (OC_ENABLE_ASSERTIONS) { ++ tl_assert(descr < 0x10); ++ } ++@@ -7470,7 +7579,7 @@ UWord VG_REGPARM(1) MC_(helperc_b_load4) ++ if (LIKELY(0 == descr)) { ++ return 0; ++ } else { ++- return line->w32[lineoff]; +++ return line->u.main.w32[lineoff]; ++ } ++ } ++ ++@@ -7493,8 +7602,8 @@ UWord VG_REGPARM(1) MC_(helperc_b_load8) ++ ++ line = find_OCacheLine( a ); ++ ++- descrLo = line->descr[lineoff + 0]; ++- descrHi = line->descr[lineoff + 1]; +++ descrLo = line->u.main.descr[lineoff + 0]; +++ descrHi = line->u.main.descr[lineoff + 1]; ++ descr = descrLo | descrHi; ++ if (OC_ENABLE_ASSERTIONS) { ++ tl_assert(descr < 0x10); ++@@ -7503,8 +7612,8 @@ UWord VG_REGPARM(1) MC_(helperc_b_load8) ++ if (LIKELY(0 == descr)) { ++ return 0; /* both 32-bit chunks are defined */ ++ } else { ++- UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0]; ++- UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1]; +++ UInt oLo = descrLo == 0 ? 0 : line->u.main.w32[lineoff + 0]; +++ UInt oHi = descrHi == 0 ? 0 : line->u.main.w32[lineoff + 1]; ++ return merge_origins(oLo, oHi); ++ } ++ } ++@@ -7543,10 +7652,10 @@ void VG_REGPARM(2) MC_(helperc_b_store1) ++ line = find_OCacheLine( a ); ++ ++ if (d32 == 0) { ++- line->descr[lineoff] &= ~(1 << byteoff); +++ line->u.main.descr[lineoff] &= ~(1 << byteoff); ++ } else { ++- line->descr[lineoff] |= (1 << byteoff); ++- line->w32[lineoff] = d32; +++ line->u.main.descr[lineoff] |= (1 << byteoff); +++ line->u.main.w32[lineoff] = d32; ++ } ++ } ++ ++@@ -7571,10 +7680,10 @@ void VG_REGPARM(2) MC_(helperc_b_store2) ++ line = find_OCacheLine( a ); ++ ++ if (d32 == 0) { ++- line->descr[lineoff] &= ~(3 << byteoff); +++ line->u.main.descr[lineoff] &= ~(3 << byteoff); ++ } else { ++- line->descr[lineoff] |= (3 << byteoff); ++- line->w32[lineoff] = d32; +++ line->u.main.descr[lineoff] |= (3 << byteoff); +++ line->u.main.w32[lineoff] = d32; ++ } ++ } ++ ++@@ -7597,14 +7706,15 @@ void VG_REGPARM(2) MC_(helperc_b_store4) ++ line = find_OCacheLine( a ); ++ ++ if (d32 == 0) { ++- line->descr[lineoff] = 0; +++ line->u.main.descr[lineoff] = 0; ++ } else { ++- line->descr[lineoff] = 0xF; ++- line->w32[lineoff] = d32; +++ line->u.main.descr[lineoff] = 0xF; +++ line->u.main.w32[lineoff] = d32; ++ } ++ } ++ ++ void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) { +++ STATIC_ASSERT(OC_W32S_PER_LINE == 8); ++ OCacheLine* line; ++ UWord lineoff; ++ ++@@ -7623,26 +7733,98 @@ void VG_REGPARM(2) MC_(helperc_b_store8) ++ line = find_OCacheLine( a ); ++ ++ if (d32 == 0) { ++- line->descr[lineoff + 0] = 0; ++- line->descr[lineoff + 1] = 0; +++ line->u.main.descr[lineoff + 0] = 0; +++ line->u.main.descr[lineoff + 1] = 0; ++ } else { ++- line->descr[lineoff + 0] = 0xF; ++- line->descr[lineoff + 1] = 0xF; ++- line->w32[lineoff + 0] = d32; ++- line->w32[lineoff + 1] = d32; +++ line->u.main.descr[lineoff + 0] = 0xF; +++ line->u.main.descr[lineoff + 1] = 0xF; +++ line->u.main.w32[lineoff + 0] = d32; +++ line->u.main.w32[lineoff + 1] = d32; ++ } ++ } ++ ++ void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) { ++- MC_(helperc_b_store8)( a + 0, d32 ); ++- MC_(helperc_b_store8)( a + 8, d32 ); +++ STATIC_ASSERT(OC_W32S_PER_LINE == 8); +++ OCacheLine* line; +++ UWord lineoff; +++ +++ if (UNLIKELY(a & 15)) { +++ /* Handle misaligned case, slowly. */ +++ MC_(helperc_b_store8)( a + 0, d32 ); +++ MC_(helperc_b_store8)( a + 8, d32 ); +++ return; +++ } +++ +++ lineoff = oc_line_offset(a); +++ if (OC_ENABLE_ASSERTIONS) { +++ tl_assert(lineoff == (lineoff & 4)); /*0,4*//*since 16-aligned*/ +++ } +++ +++ line = find_OCacheLine( a ); +++ +++ if (d32 == 0) { +++ line->u.main.descr[lineoff + 0] = 0; +++ line->u.main.descr[lineoff + 1] = 0; +++ line->u.main.descr[lineoff + 2] = 0; +++ line->u.main.descr[lineoff + 3] = 0; +++ } else { +++ line->u.main.descr[lineoff + 0] = 0xF; +++ line->u.main.descr[lineoff + 1] = 0xF; +++ line->u.main.descr[lineoff + 2] = 0xF; +++ line->u.main.descr[lineoff + 3] = 0xF; +++ line->u.main.w32[lineoff + 0] = d32; +++ line->u.main.w32[lineoff + 1] = d32; +++ line->u.main.w32[lineoff + 2] = d32; +++ line->u.main.w32[lineoff + 3] = d32; +++ } ++ } ++ ++ void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) { ++- MC_(helperc_b_store8)( a + 0, d32 ); ++- MC_(helperc_b_store8)( a + 8, d32 ); ++- MC_(helperc_b_store8)( a + 16, d32 ); ++- MC_(helperc_b_store8)( a + 24, d32 ); +++ STATIC_ASSERT(OC_W32S_PER_LINE == 8); +++ OCacheLine* line; +++ UWord lineoff; +++ +++ if (UNLIKELY(a & 31)) { +++ /* Handle misaligned case, slowly. */ +++ MC_(helperc_b_store16)( a + 0, d32 ); +++ MC_(helperc_b_store16)( a + 16, d32 ); +++ return; +++ } +++ +++ lineoff = oc_line_offset(a); +++ if (OC_ENABLE_ASSERTIONS) { +++ tl_assert(lineoff == 0); +++ } +++ +++ line = find_OCacheLine( a ); +++ +++ if (d32 == 0) { +++ line->u.main.descr[0] = 0; +++ line->u.main.descr[1] = 0; +++ line->u.main.descr[2] = 0; +++ line->u.main.descr[3] = 0; +++ line->u.main.descr[4] = 0; +++ line->u.main.descr[5] = 0; +++ line->u.main.descr[6] = 0; +++ line->u.main.descr[7] = 0; +++ } else { +++ line->u.main.descr[0] = 0xF; +++ line->u.main.descr[1] = 0xF; +++ line->u.main.descr[2] = 0xF; +++ line->u.main.descr[3] = 0xF; +++ line->u.main.descr[4] = 0xF; +++ line->u.main.descr[5] = 0xF; +++ line->u.main.descr[6] = 0xF; +++ line->u.main.descr[7] = 0xF; +++ line->u.main.w32[0] = d32; +++ line->u.main.w32[1] = d32; +++ line->u.main.w32[2] = d32; +++ line->u.main.w32[3] = d32; +++ line->u.main.w32[4] = d32; +++ line->u.main.w32[5] = d32; +++ line->u.main.w32[6] = d32; +++ line->u.main.w32[7] = d32; +++ } ++ } ++ ++ ++@@ -7650,6 +7832,9 @@ void VG_REGPARM(2) MC_(helperc_b_store32 ++ /*--- Origin tracking: sarp handlers ---*/ ++ /*--------------------------------------------*/ ++ +++// We may get asked to do very large SARPs (bug 446103), hence it is important +++// to process 32-byte chunks at a time when possible. +++ ++ __attribute__((noinline)) ++ static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) { ++ if ((a & 1) && len >= 1) { ++@@ -7662,9 +7847,40 @@ static void ocache_sarp_Set_Origins ( Ad ++ a += 2; ++ len -= 2; ++ } ++- if (len >= 4) ++- tl_assert(0 == (a & 3)); ++- while (len >= 4) { +++ if ((a & 4) && len >= 4) { +++ MC_(helperc_b_store4)( a, otag ); +++ a += 4; +++ len -= 4; +++ } +++ if ((a & 8) && len >= 8) { +++ MC_(helperc_b_store8)( a, otag ); +++ a += 8; +++ len -= 8; +++ } +++ if ((a & 16) && len >= 16) { +++ MC_(helperc_b_store16)( a, otag ); +++ a += 16; +++ len -= 16; +++ } +++ if (len >= 32) { +++ tl_assert(0 == (a & 31)); +++ while (len >= 32) { +++ MC_(helperc_b_store32)( a, otag ); +++ a += 32; +++ len -= 32; +++ } +++ } +++ if (len >= 16) { +++ MC_(helperc_b_store16)( a, otag ); +++ a += 16; +++ len -= 16; +++ } +++ if (len >= 8) { +++ MC_(helperc_b_store8)( a, otag ); +++ a += 8; +++ len -= 8; +++ } +++ if (len >= 4) { ++ MC_(helperc_b_store4)( a, otag ); ++ a += 4; ++ len -= 4; ++@@ -7694,9 +7910,40 @@ static void ocache_sarp_Clear_Origins ( ++ a += 2; ++ len -= 2; ++ } ++- if (len >= 4) ++- tl_assert(0 == (a & 3)); ++- while (len >= 4) { +++ if ((a & 4) && len >= 4) { +++ MC_(helperc_b_store4)( a, 0 ); +++ a += 4; +++ len -= 4; +++ } +++ if ((a & 8) && len >= 8) { +++ MC_(helperc_b_store8)( a, 0 ); +++ a += 8; +++ len -= 8; +++ } +++ if ((a & 16) && len >= 16) { +++ MC_(helperc_b_store16)( a, 0 ); +++ a += 16; +++ len -= 16; +++ } +++ if (len >= 32) { +++ tl_assert(0 == (a & 31)); +++ while (len >= 32) { +++ MC_(helperc_b_store32)( a, 0 ); +++ a += 32; +++ len -= 32; +++ } +++ } +++ if (len >= 16) { +++ MC_(helperc_b_store16)( a, 0 ); +++ a += 16; +++ len -= 16; +++ } +++ if (len >= 8) { +++ MC_(helperc_b_store8)( a, 0 ); +++ a += 8; +++ len -= 8; +++ } +++ if (len >= 4) { ++ MC_(helperc_b_store4)( a, 0 ); ++ a += 4; ++ len -= 4; ++@@ -7846,10 +8093,14 @@ static void mc_post_clo_init ( void ) ++ if (MC_(clo_mc_level) >= 3) { ++ init_OCache(); ++ tl_assert(ocacheL1 != NULL); ++- tl_assert(ocacheL2 != NULL); +++ for (UInt i = 0; i < 4096; i++ ) { +++ tl_assert(ocachesL2[i] != NULL); +++ } ++ } else { ++ tl_assert(ocacheL1 == NULL); ++- tl_assert(ocacheL2 == NULL); +++ for (UInt i = 0; i < 4096; i++ ) { +++ tl_assert(ocachesL2[i] == NULL); +++ } ++ } ++ ++ MC_(chunk_poolalloc) = VG_(newPA) ++@@ -7943,29 +8194,33 @@ static void mc_print_stats (void) ++ ++ if (MC_(clo_mc_level) >= 3) { ++ VG_(message)(Vg_DebugMsg, ++- " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n", +++ " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n", ++ stats_ocacheL1_find, ++ stats_ocacheL1_misses, ++ stats_ocacheL1_lossage ); ++ VG_(message)(Vg_DebugMsg, ++- " ocacheL1: %'12lu at 0 %'12lu at 1\n", +++ " ocacheL1: %'14lu at 0 %'14lu at 1\n", ++ stats_ocacheL1_find - stats_ocacheL1_misses ++ - stats_ocacheL1_found_at_1 ++ - stats_ocacheL1_found_at_N, ++ stats_ocacheL1_found_at_1 ); ++ VG_(message)(Vg_DebugMsg, ++- " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n", +++ " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n", ++ stats_ocacheL1_found_at_N, ++ stats_ocacheL1_movefwds ); ++ VG_(message)(Vg_DebugMsg, ++- " ocacheL1: %'12lu sizeB %'12d useful\n", +++ " ocacheL1: %'14lu sizeB %'14d useful\n", ++ (SizeT)sizeof(OCache), ++ 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS ); ++ VG_(message)(Vg_DebugMsg, ++- " ocacheL2: %'12lu refs %'12lu misses\n", ++- stats__ocacheL2_refs, +++ " ocacheL2: %'14lu finds %'14lu misses\n", +++ stats__ocacheL2_finds, ++ stats__ocacheL2_misses ); ++ VG_(message)(Vg_DebugMsg, +++ " ocacheL2: %'14lu adds %'14lu dels\n", +++ stats__ocacheL2_adds, +++ stats__ocacheL2_dels ); +++ VG_(message)(Vg_DebugMsg, ++ " ocacheL2: %'9lu max nodes %'9lu curr nodes\n", ++ stats__ocacheL2_n_nodes_max, ++ stats__ocacheL2_n_nodes ); ++@@ -7974,7 +8229,9 @@ static void mc_print_stats (void) ++ stats__nia_cache_queries, stats__nia_cache_misses); ++ } else { ++ tl_assert(ocacheL1 == NULL); ++- tl_assert(ocacheL2 == NULL); +++ for (UInt i = 0; i < 4096; i++ ) { +++ tl_assert(ocachesL2[1] == NULL); +++ } ++ } ++ } ++ ++@@ -8216,7 +8473,9 @@ static void mc_pre_clo_init(void) ++ if we need to, since the command line args haven't been ++ processed yet. Hence defer it to mc_post_clo_init. */ ++ tl_assert(ocacheL1 == NULL); ++- tl_assert(ocacheL2 == NULL); +++ for (UInt i = 0; i < 4096; i++ ) { +++ tl_assert(ocachesL2[i] == NULL); +++ } ++ ++ /* Check some important stuff. See extensive comments above ++ re UNALIGNED_OR_HIGH for background. */ +diff -Nru valgrind-3.18.1/debian/patches/series valgrind-3.18.1/debian/patches/series +--- valgrind-3.18.1/debian/patches/series 2021-10-22 19:18:53.000000000 +0900 ++++ valgrind-3.18.1/debian/patches/series 2022-01-19 06:18:55.000000000 +0900 +@@ -6,3 +6,4 @@ + 10_mpi-pkg-config.patch + 11_arm64-cache-flush.patch + 13_fix-path-to-vgdb.patch ++99_VALGRIND_3_18_1-58-g8ee165616.patch diff --git a/taskcluster/ci/packages/debian.yml b/taskcluster/ci/packages/debian.yml index 7436ea2f6b3b..394274e3b082 100644 --- a/taskcluster/ci/packages/debian.yml +++ b/taskcluster/ci/packages/debian.yml @@ -47,6 +47,7 @@ deb11-valgrind: dsc: url: http://snapshot.debian.org/archive/debian/20211022T144903Z/pool/main/v/valgrind/valgrind_3.18.1-1.dsc sha256: 6f2606862134b6387cdbe070920728fdb84c6ffa85308ca6f36222717a87aadf + patch: valgrind.diff deb10-python-zstandard: description: "python-zstandard for Debian buster"