From 34f67c9619b56ab4718427c1a31071641f0dccdb Mon Sep 17 00:00:00 2001 From: Kilian Kilger Date: Fri, 8 Jul 2022 08:01:00 +0000 Subject: [PATCH 1/2] git-p4: fix bug with encoding of p4 client name The Perforce client name can contain arbitrary characters which do not decode to UTF-8. Use the fallback strategy implemented in metadata_stream_to_writable_bytes() also for the client name. Signed-off-by: Kilian Kilger Reviewed-by: Tao Klerks Signed-off-by: Junio C Hamano --- git-p4.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/git-p4.py b/git-p4.py index 8fbf6eb1fe..e65d6a2b0e 100755 --- a/git-p4.py +++ b/git-p4.py @@ -854,12 +854,12 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, if bytes is not str: # Decode unmarshalled dict to use str keys and values, except for: # - `data` which may contain arbitrary binary data - # - `desc` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes + # - `desc` or `client` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text, handled by decode_path() decoded_entry = {} for key, value in entry.items(): key = key.decode() - if isinstance(value, bytes) and not (key in ('data', 'desc', 'FullName', 'path', 'clientFile') or key.startswith('depotFile')): + if isinstance(value, bytes) and not (key in ('data', 'desc', 'FullName', 'path', 'clientFile', 'client') or key.startswith('depotFile')): value = value.decode() decoded_entry[key] = value # Parse out data if it's an error response @@ -871,6 +871,8 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, continue if 'desc' in entry: entry['desc'] = metadata_stream_to_writable_bytes(entry['desc']) + if 'client' in entry: + entry['client'] = metadata_stream_to_writable_bytes(entry['client']) if 'FullName' in entry: entry['FullName'] = metadata_stream_to_writable_bytes(entry['FullName']) if cb is not None: From d205483695366803e912da6ae24380c33c293467 Mon Sep 17 00:00:00 2001 From: Kilian Kilger Date: Thu, 21 Jul 2022 09:07:20 +0000 Subject: [PATCH 2/2] git-p4: refactoring of p4CmdList() The function p4CmdList executes a Perforce command and decodes the marshalled python dictionary. Special care has to be taken for certain dictionary values which contain non-unicode characters. The old handling contained separate hacks for each of the corresponding dictionary keys. This commit tries to refactor the coding to handle the special cases uniformely. Signed-off-by: Kilian Kilger Signed-off-by: Junio C Hamano --- git-p4.py | 53 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/git-p4.py b/git-p4.py index e65d6a2b0e..9323b943c6 100755 --- a/git-p4.py +++ b/git-p4.py @@ -822,6 +822,42 @@ def isModeExecChanged(src_mode, dst_mode): return isModeExec(src_mode) != isModeExec(dst_mode) +def p4KeysContainingNonUtf8Chars(): + """Returns all keys which may contain non UTF-8 encoded strings + for which a fallback strategy has to be applied. + """ + return ['desc', 'client', 'FullName'] + + +def p4KeysContainingBinaryData(): + """Returns all keys which may contain arbitrary binary data + """ + return ['data'] + + +def p4KeyContainsFilePaths(key): + """Returns True if the key contains file paths. These are handled by decode_path(). + Otherwise False. + """ + return key.startswith('depotFile') or key in ['path', 'clientFile'] + + +def p4KeyWhichCanBeDirectlyDecoded(key): + """Returns True if the key can be directly decoded as UTF-8 string + Otherwise False. + + Keys which can not be encoded directly: + - `data` which may contain arbitrary binary data + - `desc` or `client` or `FullName` which may contain non-UTF8 encoded text + - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text, handled by decode_path() + """ + if key in p4KeysContainingNonUtf8Chars() or \ + key in p4KeysContainingBinaryData() or \ + p4KeyContainsFilePaths(key): + return False + return True + + def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, errors_as_exceptions=False, *k, **kw): @@ -851,15 +887,13 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, try: while True: entry = marshal.load(p4.stdout) + if bytes is not str: - # Decode unmarshalled dict to use str keys and values, except for: - # - `data` which may contain arbitrary binary data - # - `desc` or `client` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes - # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text, handled by decode_path() + # Decode unmarshalled dict to use str keys and values. Special cases are handled below. decoded_entry = {} for key, value in entry.items(): key = key.decode() - if isinstance(value, bytes) and not (key in ('data', 'desc', 'FullName', 'path', 'clientFile', 'client') or key.startswith('depotFile')): + if isinstance(value, bytes) and p4KeyWhichCanBeDirectlyDecoded(key): value = value.decode() decoded_entry[key] = value # Parse out data if it's an error response @@ -869,12 +903,9 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, if skip_info: if 'code' in entry and entry['code'] == 'info': continue - if 'desc' in entry: - entry['desc'] = metadata_stream_to_writable_bytes(entry['desc']) - if 'client' in entry: - entry['client'] = metadata_stream_to_writable_bytes(entry['client']) - if 'FullName' in entry: - entry['FullName'] = metadata_stream_to_writable_bytes(entry['FullName']) + for key in p4KeysContainingNonUtf8Chars(): + if key in entry: + entry[key] = metadata_stream_to_writable_bytes(entry[key]) if cb is not None: cb(entry) else: