From 96ffe11d0f09f647883d732f9456b08f4807c6c0 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Thu, 28 Jan 2021 13:49:07 -0800
Subject: [PATCH] Remove complexity in caching of llvm-nm results. NFC (#13360)

This extra argument (include_internal) and the extra complexity
it entailed had only a single call site in the test suite.

Calling llvm-nm directly in the test suite is much simpler.
---
 tests/test_other.py |  4 ++--
 tools/building.py   | 44 ++++++++++++++++++--------------------------
 2 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/tests/test_other.py b/tests/test_other.py
index 90eaf83e3..3b68e6609 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -6502,8 +6502,8 @@ int main() {
 
     # To this test to be successful, foo() shouldn't have been inlined above and
     # foo() should be in the function list
-    syms = building.llvm_nm('test2.o', include_internal=True)
-    assert 'foo' in syms.defs, 'foo() should not be inlined'
+    output = self.run_process([shared.LLVM_NM, 'test2.o'], stdout=PIPE).stdout
+    self.assertContained('foo', output)
 
   def test_output_eol(self):
     for params in [[], ['--proxy-to-worker'], ['--proxy-to-worker', '-s', 'WASM=0']]:
diff --git a/tools/building.py b/tools/building.py
index c47b6c968..8b0b436b0 100644
--- a/tools/building.py
+++ b/tools/building.py
@@ -39,10 +39,8 @@ logger = logging.getLogger('building')
 multiprocessing_pool = None
 binaryen_checked = False
 
-# internal caches
-internal_nm_cache = {}
 # cache results of nm - it can be slow to run
-uninternal_nm_cache = {}
+nm_cache = {}
 # Stores the object files contained in different archive files passed as input
 ar_contents = {}
 _is_ar_cache = {}
@@ -150,12 +148,11 @@ def unique_ordered(values):
   return list(filter(check, values))
 
 
-# clear internal caches. this is not normally needed, except if the clang/LLVM
+# clear caches. this is not normally needed, except if the clang/LLVM
 # used changes inside this invocation of Building, which can happen in the benchmarker
 # when it compares different builds.
 def clear():
-  internal_nm_cache.clear()
-  uninternal_nm_cache.clear()
+  nm_cache.clear()
   ar_contents.clear()
   _is_ar_cache.clear()
 
@@ -372,7 +369,7 @@ def make_paths_absolute(f):
 
 
 # Runs llvm-nm in parallel for the given list of files.
-# The results are populated in uninternal_nm_cache
+# The results are populated in nm_cache
 # multiprocessing_pool: An existing multiprocessing pool to reuse for the operation, or None
 # to have the function allocate its own.
 def parallel_llvm_nm(files):
@@ -383,7 +380,7 @@ def parallel_llvm_nm(files):
     for i, file in enumerate(files):
       if object_contents[i].returncode != 0:
         logger.debug('llvm-nm failed on file ' + file + ': return code ' + str(object_contents[i].returncode) + ', error: ' + object_contents[i].output)
-      uninternal_nm_cache[file] = object_contents[i]
+      nm_cache[file] = object_contents[i]
     return object_contents
 
 
@@ -398,7 +395,7 @@ def read_link_inputs(files):
 
       if absolute_path_f not in ar_contents and is_ar(absolute_path_f):
         archive_names.append(absolute_path_f)
-      elif absolute_path_f not in uninternal_nm_cache and is_bitcode(absolute_path_f):
+      elif absolute_path_f not in nm_cache and is_bitcode(absolute_path_f):
         object_names.append(absolute_path_f)
 
     # Archives contain objects, so process all archives first in parallel to obtain the object files in them.
@@ -419,7 +416,7 @@ def read_link_inputs(files):
 
     for o in object_names_in_archives:
       for f in o['files']:
-        if f not in uninternal_nm_cache:
+        if f not in nm_cache:
           object_names.append(f)
 
     # Next, extract symbols from all object files (either standalone or inside archives we just extracted)
@@ -730,7 +727,7 @@ def get_command_with_possible_response_file(cmd):
   return new_cmd
 
 
-def parse_symbols(output, include_internal=False):
+def parse_symbols(output):
   defs = []
   undefs = []
   commons = []
@@ -753,42 +750,37 @@ def parse_symbols(output, include_internal=False):
         undefs.append(symbol)
       elif status == 'C':
         commons.append(symbol)
-      elif (not include_internal and status == status.upper()) or \
-           (include_internal and status in ['W', 't', 'T', 'd', 'D']):
+      elif status == status.upper():
         # FIXME: using WTD in the previous line fails due to llvm-nm behavior on macOS,
         #        so for now we assume all uppercase are normally defined external symbols
         defs.append(symbol)
   return ObjectFileInfo(0, None, set(defs), set(undefs), set(commons))
 
 
-def llvm_nm_uncached(filename, stdout=PIPE, stderr=PIPE, include_internal=False):
+def llvm_nm_uncached(filename, stdout=PIPE, stderr=PIPE):
   # LLVM binary ==> list of symbols
   proc = run_process([LLVM_NM, filename], stdout=stdout, stderr=stderr, check=False)
   if proc.returncode == 0:
-    return parse_symbols(proc.stdout, include_internal)
+    return parse_symbols(proc.stdout)
   else:
     return ObjectFileInfo(proc.returncode, str(proc.stdout) + str(proc.stderr))
 
 
-def llvm_nm(filename, stdout=PIPE, stderr=PIPE, include_internal=False):
+def llvm_nm(filename, stdout=PIPE, stderr=PIPE):
   # Always use absolute paths to maximize cache usage
   filename = os.path.abspath(filename)
 
-  if include_internal and filename in internal_nm_cache:
-    return internal_nm_cache[filename]
-  elif not include_internal and filename in uninternal_nm_cache:
-    return uninternal_nm_cache[filename]
+  if filename in nm_cache:
+    return nm_cache[filename]
 
-  ret = llvm_nm_uncached(filename, stdout, stderr, include_internal)
+  ret = llvm_nm_uncached(filename, stdout, stderr)
 
   if ret.returncode != 0:
     logger.debug('llvm-nm failed on file ' + filename + ': return code ' + str(ret.returncode) + ', error: ' + ret.output)
 
-  # Even if we fail, write the results to the NM cache so that we don't keep trying to llvm-nm the failing file again later.
-  if include_internal:
-    internal_nm_cache[filename] = ret
-  else:
-    uninternal_nm_cache[filename] = ret
+  # Even if we fail, write the results to the NM cache so that we don't keep trying to llvm-nm the
+  # failing file again later.
+  nm_cache[filename] = ret
 
   return ret