From 88124ab263670b4252be7c13d03754a127cee90e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 27 Mar 2020 04:03:00 -0400
Subject: [PATCH 1/3] test-lib-functions: make packetize() more efficient

The packetize() function takes its input on stdin, and requires 4
separate sub-processes to format a simple string. We can do much better
by getting the length via the shell's "${#packet}" construct. The one
caveat is that the shell can't put a NUL into a variable, so we'll have
to continue to provide the stdin form for a few calls.

There are a few other cleanups here in the touched code:

 - the stdin form of packetize() had an extra stray "%s" when printing
   the packet

 - the converted calls in t5562 can be made simpler by redirecting
   output as a block, rather than repeated appending

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5562-http-backend-content-length.sh | 19 ++++++++++++-------
 t/test-lib-functions.sh                | 23 ++++++++++++++++-------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/t/t5562-http-backend-content-length.sh b/t/t5562-http-backend-content-length.sh
index 4a110b307e..3f4ac71f83 100755
--- a/t/t5562-http-backend-content-length.sh
+++ b/t/t5562-http-backend-content-length.sh
@@ -53,15 +53,20 @@ test_expect_success 'setup' '
 	test_commit c1 &&
 	hash_head=$(git rev-parse HEAD) &&
 	hash_prev=$(git rev-parse HEAD~1) &&
-	printf "want %s" "$hash_head" | packetize >fetch_body &&
-	printf 0000 >>fetch_body &&
-	printf "have %s" "$hash_prev" | packetize >>fetch_body &&
-	printf done | packetize >>fetch_body &&
+	{
+		packetize "want $hash_head" &&
+		printf 0000 &&
+		packetize "have $hash_prev" &&
+		packetize "done"
+	} >fetch_body &&
 	test_copy_bytes 10 <fetch_body >fetch_body.trunc &&
 	hash_next=$(git commit-tree -p HEAD -m next HEAD^{tree}) &&
-	printf "%s %s refs/heads/newbranch\\0report-status\\n" "$ZERO_OID" "$hash_next" | packetize >push_body &&
-	printf 0000 >>push_body &&
-	echo "$hash_next" | git pack-objects --stdout >>push_body &&
+	{
+		printf "%s %s refs/heads/newbranch\\0report-status\\n" \
+			"$ZERO_OID" "$hash_next" | packetize &&
+		printf 0000 &&
+		echo "$hash_next" | git pack-objects --stdout
+	} >push_body &&
 	test_copy_bytes 10 <push_body >push_body.trunc &&
 	: >empty_body
 '
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 352c213d52..216918a58c 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1362,14 +1362,23 @@ nongit () {
 	)
 } 7>&2 2>&4
 
-# convert stdin to pktline representation; note that empty input becomes an
-# empty packet, not a flush packet (for that you can just print 0000 yourself).
+# convert function arguments or stdin (if not arguments given) to pktline
+# representation. If multiple arguments are given, they are separated by
+# whitespace and put in a single packet. Note that data containing NULs must be
+# given on stdin, and that empty input becomes an empty packet, not a flush
+# packet (for that you can just print 0000 yourself).
 packetize() {
-	cat >packetize.tmp &&
-	len=$(wc -c <packetize.tmp) &&
-	printf '%04x%s' "$(($len + 4))" &&
-	cat packetize.tmp &&
-	rm -f packetize.tmp
+	if test $# -gt 0
+	then
+		packet="$*"
+		printf '%04x%s' "$((4 + ${#packet}))" "$packet"
+	else
+		cat >packetize.tmp &&
+		len=$(wc -c <packetize.tmp) &&
+		printf '%04x' "$(($len + 4))" &&
+		cat packetize.tmp &&
+		rm -f packetize.tmp
+	fi
 }
 
 # Parse the input as a series of pktlines, writing the result to stdout.

From 4845b7724582a315eb4eb13d5058f85d21798e94 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 27 Mar 2020 04:03:38 -0400
Subject: [PATCH 2/3] upload-pack: handle unexpected delim packets

When processing the arguments list for a v2 ls-refs or fetch command, we
loop like this:

  while (packet_reader_read(request) != PACKET_READ_FLUSH) {
          const char *arg = request->line;
	  ...handle arg...
  }

to read and handle packets until we see a flush. The hidden assumption
here is that anything except PACKET_READ_FLUSH will give us valid packet
data to read. But that's not true; PACKET_READ_DELIM or PACKET_READ_EOF
will leave packet->line as NULL, and we'll segfault trying to look at
it.

Instead, we should follow the more careful model demonstrated on the
client side (e.g., in process_capabilities_v2): keep looping as long
as we get normal packets, and then make sure that we broke out of the
loop due to a real flush. That fixes the segfault and correctly
diagnoses any unexpected input from the client.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 ls-refs.c                      |  5 ++++-
 t/t5704-protocol-violations.sh | 33 +++++++++++++++++++++++++++++++++
 upload-pack.c                  |  5 ++++-
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100755 t/t5704-protocol-violations.sh

diff --git a/ls-refs.c b/ls-refs.c
index 818aef70a0..50d86866c6 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -93,7 +93,7 @@ int ls_refs(struct repository *r, struct argv_array *keys,
 
 	git_config(ls_refs_config, NULL);
 
-	while (packet_reader_read(request) != PACKET_READ_FLUSH) {
+	while (packet_reader_read(request) == PACKET_READ_NORMAL) {
 		const char *arg = request->line;
 		const char *out;
 
@@ -105,6 +105,9 @@ int ls_refs(struct repository *r, struct argv_array *keys,
 			argv_array_push(&data.prefixes, out);
 	}
 
+	if (request->status != PACKET_READ_FLUSH)
+		die(_("expected flush after ls-refs arguments"));
+
 	head_ref_namespaced(send_ref, &data);
 	for_each_namespaced_ref(send_ref, &data);
 	packet_flush(1);
diff --git a/t/t5704-protocol-violations.sh b/t/t5704-protocol-violations.sh
new file mode 100755
index 0000000000..950cfb21fe
--- /dev/null
+++ b/t/t5704-protocol-violations.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description='Test responses to violations of the network protocol. In most
+of these cases it will generally be acceptable for one side to break off
+communications if the other side says something unexpected. We are mostly
+making sure that we do not segfault or otherwise behave badly.'
+. ./test-lib.sh
+
+test_expect_success 'extra delim packet in v2 ls-refs args' '
+	{
+		packetize command=ls-refs &&
+		printf 0001 &&
+		# protocol expects 0000 flush here
+		printf 0001
+	} >input &&
+	test_must_fail env GIT_PROTOCOL=version=2 \
+		git upload-pack . <input 2>err &&
+	test_i18ngrep "expected flush after ls-refs arguments" err
+'
+
+test_expect_success 'extra delim packet in v2 fetch args' '
+	{
+		packetize command=fetch &&
+		printf 0001 &&
+		# protocol expects 0000 flush here
+		printf 0001
+	} >input &&
+	test_must_fail env GIT_PROTOCOL=version=2 \
+		git upload-pack . <input 2>err &&
+	test_i18ngrep "expected flush after fetch arguments" err
+'
+
+test_done
diff --git a/upload-pack.c b/upload-pack.c
index c53249cac1..902d0ad5e1 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1252,7 +1252,7 @@ static void process_args(struct packet_reader *request,
 			 struct upload_pack_data *data,
 			 struct object_array *want_obj)
 {
-	while (packet_reader_read(request) != PACKET_READ_FLUSH) {
+	while (packet_reader_read(request) == PACKET_READ_NORMAL) {
 		const char *arg = request->line;
 		const char *p;
 
@@ -1321,6 +1321,9 @@ static void process_args(struct packet_reader *request,
 		/* ignore unknown lines maybe? */
 		die("unexpected line: '%s'", arg);
 	}
+
+	if (request->status != PACKET_READ_FLUSH)
+		die(_("expected flush after fetch arguments"));
 }
 
 static int process_haves(struct oid_array *haves, struct oid_array *common,

From cacae4329fa4779127f4944e7807512e7b9e8cac Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 29 Mar 2020 11:02:26 -0400
Subject: [PATCH 3/3] test-lib-functions: simplify packetize() stdin code

The code path in packetize() for reading stdin needs to handle NUL
bytes, so we can't rely on shell variables. However, the current code
takes a whopping 4 processes and uses a temporary file. We can do this
much more simply and efficiently by using a single perl invocation (and
we already rely on perl in the matching depacketize() function).

We'll keep the non-stdin code path as it is, since that uses zero extra
processes.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib-functions.sh | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 216918a58c..88b7dbd69a 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1373,11 +1373,10 @@ packetize() {
 		packet="$*"
 		printf '%04x%s' "$((4 + ${#packet}))" "$packet"
 	else
-		cat >packetize.tmp &&
-		len=$(wc -c <packetize.tmp) &&
-		printf '%04x' "$(($len + 4))" &&
-		cat packetize.tmp &&
-		rm -f packetize.tmp
+		perl -e '
+			my $packet = do { local $/; <STDIN> };
+			printf "%04x%s", 4 + length($packet), $packet;
+		'
 	fi
 }