chromium-src-build/escape_unicode.py

#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Convert any unicode characters found in the input file to C literals."""

import codecs
import optparse
import os
import sys


def main(argv):
  parser = optparse.OptionParser()
  usage = 'Usage: %prog -o <output_dir> <input_file>'
  parser.set_usage(usage)
  parser.add_option('-o', dest='output_dir')

  options, arglist = parser.parse_args(argv)

  if not options.output_dir:
    print "output_dir required"
    return 1

  if len(arglist) != 2:
    print "input_file required"
    return 1

  in_filename = arglist[1]

  if not in_filename.endswith('.utf8'):
    print "input_file should end in .utf8"
    return 1

  out_filename = os.path.join(options.output_dir, os.path.basename(
      os.path.splitext(in_filename)[0]))

  WriteEscapedFile(in_filename, out_filename)
  return 0


def WriteEscapedFile(in_filename, out_filename):
  input_data = codecs.open(in_filename, 'r', 'utf8').read()
  with codecs.open(out_filename, 'w', 'ascii') as out_file:
    for i, char in enumerate(input_data):
      if ord(char) > 127:
        out_file.write(repr(char.encode('utf8'))[1:-1])
        if input_data[i + 1:i + 2] in '0123456789abcdefABCDEF':
          out_file.write('""')
      else:
        out_file.write(char.encode('ascii'))


if __name__ == '__main__':
  sys.exit(main(sys.argv))
Fix python scripts in src/build/ Make sure that: - shebang is only present for executable files - shebang is #!/usr/bin/env python - __main__ is only present for executable files - file's executable bit is coherent Also fix EOF LF to be only one. TBR=michaelbai@chromium.org BUG=105108 TEST= Review URL: http://codereview.chromium.org/8667008 git-svn-id: http://src.chromium.org/svn/trunk/src/build@111385 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-11-23 23:13:44 +04:00			`#!/usr/bin/env python`
Make autofill regular expressions unicode again. Instead of compiling the source file with UTF8 directly, run it through a python script that rewrites the UTF8 into C literals. BUG=95858 Review URL: http://codereview.chromium.org/7891020 git-svn-id: http://src.chromium.org/svn/trunk/src/build@101236 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-09-15 07:35:53 +04:00			`# Copyright (c) 2011 The Chromium Authors. All rights reserved.`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`

			`"""Convert any unicode characters found in the input file to C literals."""`

			`import codecs`
			`import optparse`
			`import os`
			`import sys`

Fix python scripts in src/build/ Make sure that: - shebang is only present for executable files - shebang is #!/usr/bin/env python - __main__ is only present for executable files - file's executable bit is coherent Also fix EOF LF to be only one. TBR=michaelbai@chromium.org BUG=105108 TEST= Review URL: http://codereview.chromium.org/8667008 git-svn-id: http://src.chromium.org/svn/trunk/src/build@111385 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-11-23 23:13:44 +04:00
Make autofill regular expressions unicode again. Instead of compiling the source file with UTF8 directly, run it through a python script that rewrites the UTF8 into C literals. BUG=95858 Review URL: http://codereview.chromium.org/7891020 git-svn-id: http://src.chromium.org/svn/trunk/src/build@101236 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-09-15 07:35:53 +04:00			`def main(argv):`
			`parser = optparse.OptionParser()`
			`usage = 'Usage: %prog -o <output_dir> <input_file>'`
			`parser.set_usage(usage)`
			`parser.add_option('-o', dest='output_dir')`

			`options, arglist = parser.parse_args(argv)`

			`if not options.output_dir:`
			`print "output_dir required"`
			`return 1`

			`if len(arglist) != 2:`
			`print "input_file required"`
			`return 1`

			`in_filename = arglist[1]`

			`if not in_filename.endswith('.utf8'):`
			`print "input_file should end in .utf8"`
			`return 1`

			`out_filename = os.path.join(options.output_dir, os.path.basename(`
			`os.path.splitext(in_filename)[0]))`

			`WriteEscapedFile(in_filename, out_filename)`
Fix python scripts in src/build/ Make sure that: - shebang is only present for executable files - shebang is #!/usr/bin/env python - __main__ is only present for executable files - file's executable bit is coherent Also fix EOF LF to be only one. TBR=michaelbai@chromium.org BUG=105108 TEST= Review URL: http://codereview.chromium.org/8667008 git-svn-id: http://src.chromium.org/svn/trunk/src/build@111385 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-11-23 23:13:44 +04:00			`return 0`
Make autofill regular expressions unicode again. Instead of compiling the source file with UTF8 directly, run it through a python script that rewrites the UTF8 into C literals. BUG=95858 Review URL: http://codereview.chromium.org/7891020 git-svn-id: http://src.chromium.org/svn/trunk/src/build@101236 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-09-15 07:35:53 +04:00

			`def WriteEscapedFile(in_filename, out_filename):`
			`input_data = codecs.open(in_filename, 'r', 'utf8').read()`
			`with codecs.open(out_filename, 'w', 'ascii') as out_file:`
			`for i, char in enumerate(input_data):`
			`if ord(char) > 127:`
			`out_file.write(repr(char.encode('utf8'))[1:-1])`
			`if input_data[i + 1:i + 2] in '0123456789abcdefABCDEF':`
			`out_file.write('""')`
			`else:`
			`out_file.write(char.encode('ascii'))`


			`if __name__ == '__main__':`
Fix python scripts in src/build/ Make sure that: - shebang is only present for executable files - shebang is #!/usr/bin/env python - __main__ is only present for executable files - file's executable bit is coherent Also fix EOF LF to be only one. TBR=michaelbai@chromium.org BUG=105108 TEST= Review URL: http://codereview.chromium.org/8667008 git-svn-id: http://src.chromium.org/svn/trunk/src/build@111385 4ff67af0-8c30-449e-8e8b-ad334ec8d88c 2011-11-23 23:13:44 +04:00			`sys.exit(main(sys.argv))`