Merge pull request #20 from microsoft/compare_baseline2

Add script to compare benchmark data with baseline
2020-06-21 23:34:30 -07:00 · 2020-06-21 23:34:30 -07:00 · cf431c280c
--- a/UNIX/azure_table.py
+++ b/UNIX/azure_table.py
@ -18,13 +18,25 @@ class AzureTableConnection:
  def commitBatch(self, batch):
    self.tableService.commit_batch(self.tableName, batch)

+  def getData(self, partitionKey, rowKey):
+    startRowKey = '{0}_0'.format(rowKey)
+    endRowKey = '{0}_9999'.format(rowKey)
+    filterExpression = "PartitionKey eq '{0}' and \
+                        RowKey gt '{1}' and \
+                        RowKey lt '{2}'" \
+                        .format(partitionKey, startRowKey, endRowKey)
+    return self.tableService.query_entities(self.tableName, filter=filterExpression)
+

 def getTableConnection():
  tableName = 'benchmark'
  azureTable = AzureTableConnection(tableName)
-  assert azureTable, "Connection to Azure Table failed"
+  assert azureTable, 'Connection to Azure Table failed'
  return azureTable

+def get(partitionKey, rowKey):
+  azureTable = getTableConnection()
+  return azureTable.getData(partitionKey, rowKey)

 def put(runData, testData):
  azureTable = getTableConnection()
@ -33,13 +45,11 @@ def put(runData, testData):

  # Add the run data to the batch.
  for key, value in runData.items():
-    if key == 'user':
-      user = value
-      entity['PartitionKey'] = user
+    if key == 'partitionkey':
+      entity['PartitionKey'] = value

-    elif key == 'timestamp':
-      timestamp = value
-      entity['RowKey'] = '{0}_{1}'.format(timestamp, 0)
+    elif key == 'rowkey':
+      entity['RowKey'] = '{0}_{1}'.format(value, 0)

    else:
      entity[key] = str(value)
@ -50,8 +60,8 @@ def put(runData, testData):
  rowNo = 1
  for testName, testResults in testData.items():
    entity = {}
-    entity['PartitionKey'] = user
-    entity['RowKey'] = '{0}_{1}'.format(timestamp, rowNo)
+    entity['PartitionKey'] = runData['partitionkey']
+    entity['RowKey'] = '{0}_{1}'.format(runData['rowkey'], rowNo)

    entity['test_name'] = testName
    for metric, value in testResults.items():
@ -71,6 +81,6 @@ def put(runData, testData):
  print '======================================================================'
  print 'Benchmark data successfully saved to Azure Table Storage'
  print '======================================================================'
-  print 'PartitionKey: {0}'.format(user)
-  print 'RowKey (for the config record): {0}_{1}'.format(timestamp, 0)
+  print 'PartitionKey: {0}'.format(runData['partitionkey'])
+  print 'RowKey: {0}'.format(runData['rowkey'])
  print '# of records inserted: {0}'.format(rowNo)
--- a/UNIX/compare_benchmark_data.py
+++ b/UNIX/compare_benchmark_data.py
@ -0,0 +1,111 @@
+#!/usr/bin/python
+
+import azure_table
+import json
+import os
+
+metrics = ['compile_time', 'size', 'link_time', 'exec_times', 'section_sizes']
+
+def formatData(data):
+  result = {}
+
+  for metric in metrics:
+    result[metric] = {}
+
+  curr = []
+  for d in data:
+    test_name = d.test_name
+
+    result[metrics[0]][test_name] = d.compile_time
+    result[metrics[1]][test_name] = d.size
+    result[metrics[2]][test_name] = d.link_time
+
+    exec_time = d.exec_time.replace("'", '"')
+    exec_times = json.loads(exec_time)
+    result[metrics[3]][test_name] = {}
+    for k in exec_times:
+      result[metrics[3]][test_name][k] = exec_times[k]
+
+    section_size = d.section_sizes.replace("'", '"')
+    section_sizes = json.loads(section_size)
+    result[metrics[4]][test_name] = {}
+    for k in section_sizes:
+      result[metrics[4]][test_name][k] = section_sizes[k]
+
+  return result
+
+def prettyPrint(k, b, r):
+  b = round(float(b), 2)
+  r = round(float(r), 2)
+
+  diff = 0
+  if b > 0:
+    diff = round((b - r) * 100 / b, 2)
+
+  print '{0}\t{1}\t{2}\t{3}'.format(k, b, r, diff)
+
+def compareData(baselineData, runData):
+  b = formatData(baselineData)
+  r = formatData(runData)
+
+  for metric in metrics:
+    if metric not in r:
+      continue
+
+    print '======================================================================'
+    print '{0}:'.format(metric)
+    print '======================================================================'
+
+    for test_name in sorted(b[metric].keys()):
+      if test_name not in r[metric]:
+        continue
+
+      b_data = b[metric][test_name]
+      r_data = r[metric][test_name]
+
+      if metric == 'exec_times':
+        print '\n{0}:'.format(test_name)
+
+        for exe_name in sorted(b_data.keys()):
+          if exe_name not in r_data:
+            continue
+
+          b_val = b_data[exe_name]
+          r_val = r_data[exe_name]
+          prettyPrint(exe_name, b_val, r_val)
+
+      elif metric == 'section_sizes':
+        print '\n{0}:'.format(test_name)
+
+        for section_name in sorted(b_data.keys()):
+          if section_name not in r_data:
+            continue
+
+          b_val = b_data[section_name]
+          r_val = r_data[section_name]
+          test_name += '/' + section_name
+          prettyPrint(section_name, b_val, r_val)
+
+      else:
+        prettyPrint(test_name, b_data, r_data)
+
+    print '\n'
+
+
+baselinePartitionKey = os.environ['BASELINEPARTITIONKEY']
+baselineRowKey = os.environ['BASELINEROWKEY']
+baselineData = azure_table.get(baselinePartitionKey, baselineRowKey)
+
+runPartitionKey = os.environ['RUNPARTITIONKEY']
+runRowKey = os.environ['RUNROWKEY']
+runData = azure_table.get(runPartitionKey, runRowKey)
+
+print '======================================================================'
+print 'Comparing benchmark results to baseline'
+print '======================================================================'
+print 'Baseline Partition Key: {0}'.format(baselinePartitionKey)
+print 'Baseline Row Key: {0}'.format(baselineRowKey)
+print 'Run Partition Key: {0}'.format(runPartitionKey)
+print 'Run Row Key: {0}'.format(runRowKey)
+
+compareData(baselineData, runData)
--- a/UNIX/extract_benchmark_data.py
+++ b/UNIX/extract_benchmark_data.py
@ -51,8 +51,8 @@ class LogFile:
  def getRunData(self, configData):
    runData = {}

-    runData['user'] = getpass.getuser()
-    runData['timestamp'] = time.time()
+    runData['partitionkey'] = os.environ['RUNPARTITIONKEY']
+    runData['rowkey'] = os.environ['RUNROWKEY']
    runData['date'] = date.today().strftime('%Y-%m-%d')

    runData['config'] = {}