зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1413254 - Write a JSON file with normalized data; r=jmaher
The new file contains changes that optimize for final file size: * The components are stored as an integer index/key into a map instead of strings. * Paths are stored in hierarchical dicts. * No indentation is used. These changes significantly redude redundancy in string data. For every file in the repo, the raw JSON size is decreased from 27,074,607 to 7,643,529 bytes. MozReview-Commit-ID: 58FWSct8W39 --HG-- extra : rebase_source : 3ccc17d58fadaaac428b2eb2d3ad357fec9afafd
This commit is contained in:
Родитель
65ea8f0256
Коммит
f8e8ee5d2e
|
@ -186,6 +186,7 @@ class MozbuildFileCommands(MachCommandBase):
|
|||
import gzip
|
||||
|
||||
missing_component = set()
|
||||
seen_components = set()
|
||||
component_by_path = {}
|
||||
|
||||
# TODO operate in VCS space. This requires teaching the VCS reader
|
||||
|
@ -198,10 +199,29 @@ class MozbuildFileCommands(MachCommandBase):
|
|||
continue
|
||||
|
||||
c = m['BUG_COMPONENT']
|
||||
seen_components.add(c)
|
||||
component_by_path[p] = [c.product, c.component]
|
||||
|
||||
print('Examined %d files' % len(component_by_path))
|
||||
|
||||
# We also have a normalized versions of the file to components mapping
|
||||
# that requires far less storage space by eliminating redundant strings.
|
||||
indexed_components = {i: [c.product, c.component]
|
||||
for i, c in enumerate(sorted(seen_components))}
|
||||
components_index = {tuple(v): k for k, v in indexed_components.items()}
|
||||
normalized_component = {
|
||||
'components': indexed_components,
|
||||
'paths': {}
|
||||
}
|
||||
|
||||
for p, c in component_by_path.items():
|
||||
d = normalized_component['paths']
|
||||
while '/' in p:
|
||||
base, p = p.split('/', 1)
|
||||
d = d.setdefault(base, {})
|
||||
|
||||
d[p] = components_index[tuple(c)]
|
||||
|
||||
if not os.path.exists(out_dir):
|
||||
os.makedirs(out_dir)
|
||||
|
||||
|
@ -215,8 +235,15 @@ class MozbuildFileCommands(MachCommandBase):
|
|||
with open(missing_json, 'wb') as fh:
|
||||
json.dump({'missing': sorted(missing_component)}, fh, indent=2)
|
||||
|
||||
indexed_components_json = os.path.join(out_dir,
|
||||
'components-normalized.json')
|
||||
print('Writing %s' % indexed_components_json)
|
||||
with open(indexed_components_json, 'wb') as fh:
|
||||
# Don't indent so file is as small as possible.
|
||||
json.dump(normalized_component, fh, sort_keys=True)
|
||||
|
||||
# Write compressed versions of JSON files.
|
||||
for p in (components_json, missing_json):
|
||||
for p in (components_json, indexed_components_json, missing_json):
|
||||
gzip_path = '%s.gz' % p
|
||||
print('Writing %s' % gzip_path)
|
||||
with open(p, 'rb') as ifh, gzip.open(gzip_path, 'wb') as ofh:
|
||||
|
|
Загрузка…
Ссылка в новой задаче