From 2cf942ac9ee74e3f5dbb0e87f6e2a441ad382a93 Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Fri, 17 Oct 2014 18:23:04 -0700 Subject: [PATCH 1/3] Implement Python tool that counts how many times a bytecode is used in a JAR. Fixes #468. --- tests/opcodeCoverage.py | 79 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 tests/opcodeCoverage.py diff --git a/tests/opcodeCoverage.py b/tests/opcodeCoverage.py new file mode 100755 index 00000000..49342a14 --- /dev/null +++ b/tests/opcodeCoverage.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +import sys +import os +import re + +from os.path import walk + +cache = {} + +table = ["nop", "aconst_null", "iconst_m1", "iconst_0", "iconst_1", "iconst_2", "iconst_3", "iconst_4", "iconst_5", + "lconst_0", "lconst_1", "fconst_0", "fconst_1", "fconst_2", "dconst_0", "dconst_1", "bipush", "sipush", + "ldc", "ldc_w", "ldc2_w", "iload", "lload", "fload", "dload", "aload", "iload_0", "iload_1", "iload_2", + "iload_3", "lload_0", "lload_1", "lload_2", "lload_3", "fload_0", "fload_1", "fload_2", "fload_3", "dload_0", + "dload_1", "dload_2", "dload_3", "aload_0", "aload_1", "aload_2", "aload_3", "iaload", "laload", "faload", + "daload", "aaload", "baload", "caload", "saload", "istore", "lstore", "fstore", "dstore", "astore", + "istore_0", "istore_1", "istore_2", "istore_3", "lstore_0", "lstore_1", "lstore_2", "lstore_3", "fstore_0", + "fstore_1", "fstore_2", "fstore_3", "dstore_0", "dstore_1", "dstore_2", "dstore_3", "astore_0", "astore_1", + "astore_2", "astore_3", "iastore", "lastore", "fastore", "dastore", "aastore", "bastore", "castore", + "sastore", "pop", "pop2", "dup", "dup_x1", "dup_x2", "dup2", "dup2_x1", "dup2_x2", "swap", "iadd", "ladd", + "fadd", "dadd", "isub", "lsub", "fsub", "dsub", "imul", "lmul", "fmul", "dmul", "idiv", "ldiv", "fdiv", + "ddiv", "irem", "lrem", "frem", "drem", "ineg", "lneg", "fneg", "dneg", "ishl", "lshl", "ishr", "lshr", + "iushr", "lushr", "iand", "land", "ior", "lor", "ixor", "lxor", "iinc", "i2l", "i2f", "i2d", "l2i", "l2f", + "l2d", "f2i", "f2l", "f2d", "d2i", "d2l", "d2f", "i2b", "i2c", "i2s", "lcmp", "fcmpl", "fcmpg", "dcmpl", + "dcmpg", "ifeq", "ifne", "iflt", "ifge", "ifgt", "ifle", "if_icmpeq", "if_icmpne", "if_icmplt", "if_icmpge", + "if_icmpgt", "if_icmple", "if_acmpeq", "if_acmpne", "goto", "jsr", "ret", "tableswitch", "lookupswitch", + "ireturn", "lreturn", "freturn", "dreturn", "areturn", "return", "getstatic", "putstatic", "getfield", + "putfield", "invokevirtual", "invokespecial", "invokestatic", "invokeinterface", "new", "newarray", + "anewarray", "arraylength", "athrow", "checkcast", "instanceof", "monitorenter", "monitorexit", "wide", + "multianewarray", "ifnull", "ifnonnull", "goto_w", "jsr_w"] + +def decompile(jar, path): + os.system("unzip " + jar + " -d " + path) + + for root, dirs, files in os.walk(path): + for name in files: + if name.endswith(".class"): + print("Decompiling " + name + "...") + filePath = root + "/" + name[:-6] + os.system("javap -verbose -l -c -s -private " + filePath + " > " + filePath+".jbc") + + for root, dirs, files in os.walk(path): + for name in files: + if not name.endswith(".jbc"): + os.remove(root + "/" + name) + +def readAll(path): + for root, dirs, files in os.walk(path): + for name in files: + if name.endswith(".jbc") and len(name) > 4: + cache[name[:-4]] = open(root + "/" + name).read() + +def countOpcodeUsage(): + opcodes = {} + for opcode in table: + opcodes[opcode] = 0 + + for opcode in table: + for elem in cache: + opcodes[opcode] += cache[elem].count(opcode) + + for opcode in table: + print(opcode + ": " + str(opcodes[opcode])) + +def main(argv): + if len(sys.argv) == 4 and sys.argv[1] == "dejar": + jar = sys.argv[2] + destPath = sys.argv[3] + decompile(jar, destPath) + return + + path = sys.argv[1] + + readAll(path) + + countOpcodeUsage() + +if __name__ == "__main__": + main(sys.argv) From 7ef6ed8a4bc7b78b26e550606c466e82bedf64cd Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Fri, 17 Oct 2014 23:41:06 -0700 Subject: [PATCH 2/3] Order by frequency --- tests/opcodeCoverage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/opcodeCoverage.py b/tests/opcodeCoverage.py index 49342a14..5ec1e280 100755 --- a/tests/opcodeCoverage.py +++ b/tests/opcodeCoverage.py @@ -59,7 +59,7 @@ def countOpcodeUsage(): for elem in cache: opcodes[opcode] += cache[elem].count(opcode) - for opcode in table: + for opcode in sorted(opcodes, key=opcodes.get, reverse=True): print(opcode + ": " + str(opcodes[opcode])) def main(argv): From 88b648074a19240385e7130fc368d096ae8ea69d Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Fri, 17 Oct 2014 23:44:28 -0700 Subject: [PATCH 3/3] Count exact matches --- tests/opcodeCoverage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/opcodeCoverage.py b/tests/opcodeCoverage.py index 5ec1e280..73634044 100755 --- a/tests/opcodeCoverage.py +++ b/tests/opcodeCoverage.py @@ -57,7 +57,7 @@ def countOpcodeUsage(): for opcode in table: for elem in cache: - opcodes[opcode] += cache[elem].count(opcode) + opcodes[opcode] += cache[elem].split().count(opcode) for opcode in sorted(opcodes, key=opcodes.get, reverse=True): print(opcode + ": " + str(opcodes[opcode]))