[Build] gn_helpers.py: ToGNString(): Add option for pretty print.

ToGNString() in gn_helpers.py prints (nested) lists as a one-liners. This can create every long lines, which are difficult to diff across versions. This CL reimpements ToGNString(), and adds a "pretty" option to print list each leaf elements on a separate line, with indent. Details: * Keep pretty print off by default, and preserve existing output. * Reason: ToGNString() outputs may be committed into code base. We want each caller to make its own decisions regarding pretty print. * Allow new line to appear in string (GN expresses this as "$0x0A"). * Fix Unicode representation (after UTF-8 encoding need "$0xXX"). * Add stricter checks for GN identifiers for {'identifier': 'value'}. * gn_helpers_unittest.py: * Add many more tests, for outputs without and with pretty print. * Make script compatible with Python 2 and Python 3. Bug: 1074446 Change-Id: I7a5e0174bdebf8bfb03cdcf42aed7c26ae56f120 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2231505 Commit-Queue: Samuel Huang <huangs@chromium.org> Reviewed-by: Andrew Grieve <agrieve@chromium.org> Cr-Original-Commit-Position: refs/heads/master@{#775776} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 007ab8b8721f26b184da0ef20e71b0cc28813761
2020-06-05 21:44:34 +00:00 · 2020-06-05 21:44:34 +00:00 · e8b256842f
--- a/gn_helpers.py
+++ b/gn_helpers.py
@ -34,51 +34,121 @@ class GNError(Exception):
  pass


-def ToGNString(value, allow_dicts=True):
+# Computes ASCII code of an element of encoded Python 2 str / Python 3 bytes.
+_Ord = ord if sys.version_info.major < 3 else lambda c: c
+
+
+def _TranslateToGnChars(s):
+  for decoded_ch in s.encode('utf-8'):  # str in Python 2, bytes in Python 3.
+    code = _Ord(decoded_ch)  # int
+    if code in (34, 36, 92):  # For '"', '$', or '\\'.
+      yield '\\' + chr(code)
+    elif 32 <= code < 127:
+      yield chr(code)
+    else:
+      yield '$0x%02X' % code
+
+
+def ToGNString(value, pretty=False):
  """Returns a stringified GN equivalent of a Python value.

  Args:
    value: The Python value to convert.
-    allow_dicts: Whether to enable converting dictionaries to GN scopes. This is
-      only possible at the top level (GN scope cannot be nested in a list), so
-      recursive calls should set this to False.
-
+    pretty: Whether to pretty print. If true, then non-empty lists are rendered
+        recursively with one item per line, with indents. Otherwise lists are
+        rendered without new line.
  Returns:
    The stringified GN equivalent to |value|.

  Raises:
    GNError: |value| cannot be printed to GN.
  """
-  if isinstance(value, str):
-    if value.find('\n') >= 0:
-      raise GNError('Trying to print a string with a newline in it.')
-    return '"' + \
-        value.replace('\\', '\\\\').replace('"', '\\"').replace('$', '\\$') + \
-        '"'

-  if sys.version_info.major < 3 and isinstance(value, unicode):
-    return ToGNString(value.encode('utf-8'))
+  if sys.version_info.major < 3:
+    basestring_compat = basestring
+  else:
+    basestring_compat = str

-  if isinstance(value, bool):
-    return 'true' if value else 'false'
+  # Emits all output tokens without intervening whitespaces.
+  def GenerateTokens(v, level):
+    if isinstance(v, basestring_compat):
+      yield '"' + ''.join(_TranslateToGnChars(v)) + '"'

-  if isinstance(value, list):
-    return '[ %s ]' % ', '.join(ToGNString(v) for v in value)
+    elif isinstance(v, bool):
+      yield 'true' if v else 'false'

-  if isinstance(value, dict):
-    if not allow_dicts:
-      raise GNError('Attempting to recursively print a dictionary.')
-    result = ''
-    for key in sorted(value):
-      if not isinstance(key, str) and not isinstance(key, unicode):
-        raise GNError('Dictionary key is not a string.')
-      result += '%s = %s\n' % (key, ToGNString(value[key], False))
-    return result
+    elif isinstance(v, int):
+      yield str(v)

-  if isinstance(value, int):
-    return str(value)
+    elif isinstance(v, list):
+      yield '['
+      for i, item in enumerate(v):
+        if i > 0:
+          yield ','
+        for tok in GenerateTokens(item, level + 1):
+          yield tok
+      yield ']'

-  raise GNError('Unsupported type when printing to GN.')
+    elif isinstance(v, dict):
+      if level > 0:
+        raise GNError('Attempting to recursively print a dictionary.')
+      for key in sorted(v):
+        if not isinstance(key, basestring_compat):
+          raise GNError('Dictionary key is not a string.')
+        if not key or key[0].isdigit() or not key.replace('_', '').isalnum():
+          raise GNError('Dictionary key is not a valid GN identifier.')
+        yield key  # No quotations.
+        yield '='
+        for tok in GenerateTokens(value[key], level + 1):
+          yield tok
+
+    else:  # Not supporting float: Add only when needed.
+      raise GNError('Unsupported type when printing to GN.')
+
+  can_start = lambda tok: tok and tok not in ',]='
+  can_end = lambda tok: tok and tok not in ',[='
+
+  # Adds whitespaces, trying to keep everything (except dicts) in 1 line.
+  def PlainGlue(gen):
+    prev_tok = None
+    for i, tok in enumerate(gen):
+      if i > 0:
+        if can_end(prev_tok) and can_start(tok):
+          yield '\n'  # New dict item.
+        elif prev_tok == '[' and tok == ']':
+          yield '  '  # Special case for [].
+        elif tok != ',':
+          yield ' '
+      yield tok
+      prev_tok = tok
+
+  # Adds whitespaces so non-empty lists can span multiple lines, with indent.
+  def PrettyGlue(gen):
+    prev_tok = None
+    level = 0
+    for i, tok in enumerate(gen):
+      if i > 0:
+        if can_end(prev_tok) and can_start(tok):
+          yield '\n' + '  ' * level  # New dict item.
+        elif tok == '=' or prev_tok in '=':
+          yield ' '  # Separator before and after '=', on same line.
+      if tok == ']':
+        level -= 1
+      if int(prev_tok == '[') + int(tok == ']') == 1:  # Exclude '[]' case.
+        yield '\n' + '  ' * level
+      yield tok
+      if tok == '[':
+        level += 1
+      if tok == ',':
+        yield '\n' + '  ' * level
+      prev_tok = tok
+
+  token_gen = GenerateTokens(value, 0)
+  ret = ''.join((PrettyGlue if pretty else PlainGlue)(token_gen))
+  # Add terminating '\n' for dict |value| or multi-line output.
+  if isinstance(value, dict) or '\n' in ret:
+    return ret + '\n'
+  return ret


 def FromGNString(input_string):
--- a/gn_helpers_unittest.py
+++ b/gn_helpers_unittest.py
@ -3,6 +3,7 @@
 # found in the LICENSE file.

 import mock
+import sys
 import textwrap
 import unittest

@ -11,9 +12,56 @@ import gn_helpers

 class UnitTest(unittest.TestCase):
  def test_ToGNString(self):
-    self.assertEqual(
-        gn_helpers.ToGNString([1, 'two', [ '"thr$\\', True, False, [] ]]),
-        '[ 1, "two", [ "\\"thr\\$\\\\", true, false, [  ] ] ]')
+    test_cases = [
+        (42, '42', '42'), ('foo', '"foo"', '"foo"'), (True, 'true', 'true'),
+        (False, 'false', 'false'), ('', '""', '""'),
+        ('\\$"$\\', '"\\\\\\$\\"\\$\\\\"', '"\\\\\\$\\"\\$\\\\"'),
+        (' \t\r\n', '" $0x09$0x0D$0x0A"', '" $0x09$0x0D$0x0A"'),
+        (u'\u2713', '"$0xE2$0x9C$0x93"', '"$0xE2$0x9C$0x93"'),
+        ([], '[  ]', '[]'), ([1], '[ 1 ]', '[\n  1\n]\n'),
+        ([3, 1, 4, 1], '[ 3, 1, 4, 1 ]', '[\n  3,\n  1,\n  4,\n  1\n]\n'),
+        (['a', True, 2], '[ "a", true, 2 ]', '[\n  "a",\n  true,\n  2\n]\n'),
+        ({
+            'single': 'item'
+        }, 'single = "item"\n', 'single = "item"\n'),
+        ({
+            'kEy': 137,
+            '_42A_Zaz_': [False, True]
+        }, '_42A_Zaz_ = [ false, true ]\nkEy = 137\n',
+         '_42A_Zaz_ = [\n  false,\n  true\n]\nkEy = 137\n'),
+        ([1, 'two',
+          ['"thr,.$\\', True, False, [],
+           u'(\u2713)']], '[ 1, "two", [ "\\"thr,.\\$\\\\", true, false, ' +
+         '[  ], "($0xE2$0x9C$0x93)" ] ]', '''[
+  1,
+  "two",
+  [
+    "\\"thr,.\\$\\\\",
+    true,
+    false,
+    [],
+    "($0xE2$0x9C$0x93)"
+  ]
+]
+'''),
+        ({
+            's': 'foo',
+            'n': 42,
+            'b': True,
+            'a': [3, 'x']
+        }, 'a = [ 3, "x" ]\nb = true\nn = 42\ns = "foo"\n',
+         'a = [\n  3,\n  "x"\n]\nb = true\nn = 42\ns = "foo"\n'),
+        (
+            [[[], [[]]], []],
+            '[ [ [  ], [ [  ] ] ], [  ] ]',
+            '[\n  [\n    [],\n    [\n      []\n    ]\n  ],\n  []\n]\n',
+        )
+    ]
+    for obj, exp_ugly, exp_pretty in test_cases:
+      out_ugly = gn_helpers.ToGNString(obj)
+      self.assertEqual(exp_ugly, out_ugly)
+      out_pretty = gn_helpers.ToGNString(obj, pretty=True)
+      self.assertEqual(exp_pretty, out_pretty)

  def test_UnescapeGNString(self):
    # Backslash followed by a \, $, or " means the folling character without
@ -139,7 +187,7 @@ class UnitTest(unittest.TestCase):
        some_arg2 = "val2"
    """))
    parser.ReplaceImports()
-    self.assertEquals(
+    self.assertEqual(
        parser.input,
        textwrap.dedent("""
        some_arg1 = "val1"
@ -155,9 +203,11 @@ class UnitTest(unittest.TestCase):
        some_arg2 = "val2"
    """))
    fake_import = 'some_imported_arg = "imported_val"'
-    with mock.patch('__builtin__.open', mock.mock_open(read_data=fake_import)):
+    builtin_var = '__builtin__' if sys.version_info.major < 3 else 'builtins'
+    open_fun = '{}.open'.format(builtin_var)
+    with mock.patch(open_fun, mock.mock_open(read_data=fake_import)):
      parser.ReplaceImports()
-    self.assertEquals(
+    self.assertEqual(
        parser.input,
        textwrap.dedent("""
        some_arg1 = "val1"