Merge pull request #14 from github/addGit

adding some rest
2021-03-03 12:42:47 +01:00 · 2021-03-03 12:42:47 +01:00 · 20e66c4edf
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 .ghec-audit-log
 .last-cursor-update
+.last-v3-cursor-update

 # Created by https://www.gitignore.io/api/node
 # Edit at https://www.gitignore.io/?templates=node
@ -110,4 +111,4 @@ dist/
 tmp/
 temp/

-# End of https://www.gitignore.io/api/node
+# End of https://www.gitignore.io/api/node
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # CLI for the Audit Log using GHEC

 This CLI made in node helps on querying the audit log. It can query the full
-audit providing all the data the API can serve, or, given a cursor, it can 
+audit providing all the data the API can serve, or, given a cursor, it can
 provide the newest entries from that specific moment.

 You can build an sh script on top of this one to store the data or query it.
@ -14,21 +14,22 @@ This script can take the following arguments:
 Usage: audit-log-ghec-cli [options]

 Options:
-  -v, --version            Output the current version
-  -t, --token <string>     the token to access the API (mandatory)
-  -o, --org <string>       the organization we want to extract the audit log from
-  -cfg, --config <string>  location for the config yaml file. Default ".ghec-audit-log" (default: "./.ghec-audit-log")
-  -p, --pretty             prints the json data in a readable format (default: false)
-  -l, --limit              a maximum limit on the number of items retrieved
-  -f, --file               the name of the file where you want to output the result
-  -c, --cursor <string>    if provided, this cursor will be used to query the newest entries from the cursor provided. If not present,
-                the result will contain all the audit log from the org
-  -h, --help               display help for command
+  -v, --version             Output the current version
+  -t, --token <string>      the token to access the API (mandatory)
+  -o, --org <string>        the organization we want to extract the audit log from
+  -cfg, --config <string>   location for the config yaml file. Default ".ghec-audit-log" (default: "./.ghec-audit-log")
+  -p, --pretty              prints the json data in a readable format (default: false)
+  -l, --limit <number>      a maximum limit on the number of items retrieved
+  -f, --file <string>       the output file where the result should be printed
+  -a, --api <string>        the version of GitHub API to call (default: "v4")
+  -at, --api-type <string>  Only if -a is v3. API type to bring, either all, web or git (default: "all")
+  -c, --cursor <string>     if provided, this cursor will be used to query the newest entries from the cursor provided. If not present, the result will contain all the audit log from the org
+  -h, --help                display help for command

 ```

 Optionally, you can create a file called `.ghec-audit-log` that supports
-the token and organization, and omit the parameters while running the script.
+the **token** and **organization**, and omit the parameters while running the script.

 ```yaml
 org: org-name
@ -71,10 +72,10 @@ and integrate it with your favorite service.

 This workflow:
 - Runs periodically
- Grabs any existing cursor as the last item grabbed from the log 
+- Grabs any existing cursor as the last item grabbed from the log
 - Grabs the latest changes from the audit log
 - Forwards those changes to a service
- Commits the latest cursor for the next call 
+- Commits the latest cursor for the next call


 ## Releases
@ -104,6 +105,8 @@ You will need to create the following **Github Secrets** To allow the tool to wo

 ### Notes
 - Modify the polling workflow to run on a cron, instead of push
+- The `Organization` **must** be a part of a **GitHub** Enterprise or the API calls will fail
+- The `Personal Access token` **must** be SSO enabled to query the GitHub Organization if it is enabled

 ## Disclaimer

--- a/ghec-audit-log-cli.js
+++ b/ghec-audit-log-cli.js
@ -2,7 +2,10 @@
 const YAML = require('yaml')
 const fs = require('fs')
 const { graphql } = require('@octokit/graphql')
-const { requestEntries } = require('./ghec-audit-log-client')
+const { Octokit } = require('@octokit/rest')
+const { requestV4Entries, requestV3Entries } = require('./ghec-audit-log-client')
+const { retry } = require('@octokit/plugin-retry')
+const { throttling } = require('@octokit/plugin-throttling')
 const { validateInput } = require('./ghec-audit-log-utils')

 // Obtain configuration
@ -14,6 +17,8 @@ program.version('1.0.0', '-v, --version', 'Output the current version')
  .option('-p, --pretty', 'prints the json data in a readable format', false)
  .option('-l, --limit <number>', 'a maximum limit on the number of items retrieved')
  .option('-f, --file <string>', 'the output file where the result should be printed')
+  .option('-a, --api <string>', 'the version of GitHub API to call', 'v4')
+  .option('-at, --api-type <string>', 'Only if -a is v3. API type to bring, either all, web or git', 'all')
  .option('-c, --cursor <string>', 'if provided, this cursor will be used to query the newest entries from the cursor provided. If not present, the result will contain all the audit log from the org')

 program.parse(process.argv)
@ -27,24 +32,63 @@ try {
 }

 // TODO idea: maybe add support for other formats like PUTVAL to forward the data in an easier way
-const { cursor, pretty, limit, token, org, outputFile } = validateInput(program, config)
+const { cursor, pretty, limit, api, apiType, token, org, outputFile } = validateInput(program, config)
+
+function buildV3Octokit () {
+  const Octo = Octokit.plugin(retry, throttling)
+  const octokit = new Octo({
+    auth: token,
+    throttle: {
+      onRateLimit: (retryAfter, _) => {
+        octokit.log.warn(
+          `[${new Date().toISOString()}] ${program} Request quota exhausted for request, will retry in ${retryAfter}`
+        )
+        return true
+      },
+      onAbuseLimit: (retryAfter, _) => {
+        octokit.log.warn(
+          `[${new Date().toISOString()}] ${program} Abuse detected for request, will retry in ${retryAfter}`
+        )
+        return true
+      }
+    }
+  })
+  return octokit
+}
+
+function buildGraphQLOctokit () {
+  return graphql.defaults({
+    headers: {
+      authorization: `token ${token}`
+    }
+  })
+}

 /**
- * Function containing all the queries
+ * Function containing the GitHub API v4 Graphql calls for the audit log
 */
 async function queryAuditLog () {
  // Select the query to run
  let queryRunner
-  if (cursor) {
-    queryRunner = () => requestEntries(graphqlWithAuth, org, limit, cursor)
-  } else {
-    queryRunner = () => requestEntries(graphqlWithAuth, org, limit)
+  switch (api) {
+    case 'v4': // API v4 call with cursor
+      queryRunner = () => requestV4Entries(buildGraphQLOctokit(), org, limit, cursor || null)
+      break
+    case 'v3': // API v3 call with cursor
+      queryRunner = () => requestV3Entries(buildV3Octokit(), org, limit, cursor || null, apiType)
+      break
  }

+  // Sanity check the switch
+  if (!queryRunner) return []
+
  // Run the query and store the most recent cursor
  const { data, newestCursorId } = await queryRunner()
  const entries = data
-  if (newestCursorId) fs.writeFileSync('.last-cursor-update', newestCursorId)
+  if (newestCursorId) {
+    const cursorFileName = `.last${api === 'v3' ? '-v3-' : '-'}cursor-update`
+    fs.writeFileSync(cursorFileName, newestCursorId)
+  }

  // Return the data
  if (pretty === true) {
@ -54,12 +98,9 @@ async function queryAuditLog () {
  }
 }

-// Execute the request and print the result
-const graphqlWithAuth = graphql.defaults({
-  headers: {
-    authorization: `token ${token}`
-  }
-})
+/*
+* Logic to see if we need to run the API v3 vs API v4
+*/
 queryAuditLog()
  .then((data) => {
    if (outputFile) {
--- a/ghec-audit-log-client.js
+++ b/ghec-audit-log-client.js
@ -1,6 +1,7 @@
+const hash = require('json-hash')
 const { allEntriesQuery } = require('./ghec-audit-log-queries')

-async function requestEntries (requestExecutor, org, limit, cursor) {
+async function requestV4Entries (graphqlApi, org, limit, cursor) {
  let entries = []
  const variables = {
    org: org,
@ -13,7 +14,7 @@ async function requestEntries (requestExecutor, org, limit, cursor) {
  const hasLimit = limit || false
  let limitReached = false
  while (hasNextPage && !foundCursor && !limitReached) {
-    const data = await requestExecutor(allEntriesQuery, variables)
+    const data = await graphqlApi(allEntriesQuery, variables)
    let newEntries = data.organization.auditLog.nodes

    // Cursor check
@ -45,6 +46,63 @@ async function requestEntries (requestExecutor, org, limit, cursor) {
  return { data: entries, newestCursorId: firstPageCursorId }
 }

-module.exports = {
-  requestEntries
+// In this case we are not using the cursors from the header Link as identifies the page and the last element, but wouldn't
+// be reliable if pagination, limit and size changes. To avoid that we are using the findHashedEntry method and we are hashing
+// each of the elements separately so we can find them in a more reliable way
+async function requestV3Entries (octokit, org, limit, cursor, apiType) {
+  let entries = []
+  const hasLimit = limit || false
+  let foundCursor = false
+  let foundLimit = false
+  for await (const { data } of octokit.paginate.iterator(`GET /orgs/{org}/audit-log?include=${apiType}&per_page=${Math.min(100, limit)}`, {
+    org: org
+  })) {
+    let newEntries = data
+
+    // If we find the entry in the current request, we should add the remaining and stop
+    if (cursor != null) {
+      const index = findHashedEntry(cursor, data)
+      if (index !== -1) {
+        newEntries = data.slice(0, index)
+        foundCursor = true
+      }
+    }
+
+    // Concat the previous entries and the new ones
+    entries = entries.concat(newEntries)
+
+    // Limit has been found
+    if (hasLimit) {
+      if (entries.length >= limit) {
+        entries = entries.slice(0, limit)
+      }
+      foundLimit = true
+    }
+
+    // Stop going through the iterator if either we reached limit or found the cursor
+    if (foundLimit || foundCursor) break
+  }
+
+  // Calculate the newest element that was provided
+  let lastCursor = null
+  if (entries.length > 0) {
+    lastCursor = generateHashAudit(entries[0])
+  }
+
+  // Provide the data
+  return { data: entries, newestCursorId: lastCursor }
+}
+
+function generateHashAudit (entry) {
+  const hashed = hash.digest(entry)
+  return Buffer.from(hashed).toString('base64')
+}
+
+function findHashedEntry (cursor, entries) {
+  return entries.findIndex((elem) => generateHashAudit(elem) === cursor)
+}
+
+module.exports = {
+  requestV4Entries,
+  requestV3Entries
 }
--- a/ghec-audit-log-utils.js
+++ b/ghec-audit-log-utils.js
@ -7,6 +7,8 @@ function validateInput (program, config) {
    cursor: program.cursor || null,
    pretty: program.pretty || false,
    limit: program.limit || null,
+    api: program.api || 'v4',
+    apiType: program.apiType || 'all',
    token: program.token || config.token,
    org: program.org || config.org,
    outputFile: program.file
@ -41,6 +43,22 @@ function validateInput (program, config) {
      },
      format: alphanumericRegex
    },
+    api: {
+      type: 'string',
+      presence: { allowEmpty: false },
+      length: {
+        is: 2
+      },
+      inclusion: ['v3', 'v4']
+    },
+    apiType: {
+      type: 'string',
+      presence: { allowEmpty: false },
+      length: {
+        is: 3
+      },
+      inclusion: ['all', 'git', 'web']
+    },
    org: {
      type: 'string',
      presence: { allowEmpty: false },
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -27,7 +27,11 @@
  "license": "MIT",
  "dependencies": {
    "@octokit/graphql": "^4.3.1",
+    "@octokit/plugin-retry": "*",
+    "@octokit/plugin-throttling": "^3.3.1",
+    "@octokit/rest": "^18.3.1",
    "commander": "^5.1.0",
+    "json-hash": "^1.2.0",
    "validate.js": "^0.13.1",
    "yaml": "^1.9.2"
  },
--- a/workflow/forward-v3-workflow.yml
+++ b/workflow/forward-v3-workflow.yml
@ -0,0 +1,66 @@
+############################################
+# Github Action Workflow to poll and aggregate logs #
+############################################
+name: POLL/POST Audit Log Data from v3 API
+
+##############################################
+# Run once an hour and when pushed to main #
+##############################################
+on:
+  push:
+    branches: main
+  schedule:
+    - cron: '59 * * * *'
+
+#################
+# Build the job #
+#################
+jobs:
+  poll:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        node-version: [12.x]
+
+    steps:
+    # Clone source code
+    - name: Checkout source code
+      uses: actions/checkout@v2
+
+    # Install congiure NodeJS
+    - name: Use Node.js ${{ matrix.node-version }}
+      uses: actions/setup-node@v1
+      with:
+        node-version: ${{ matrix.node-version }}
+
+    # Configure the cursor endpoint
+    - name: Export Cursor
+      run: export LAST_CURSOR=$(cat .last-v3-cursor-update)
+
+    # Need to install NPM
+    - name: NPM Install
+      run: npm install
+
+      # If this is the first time we poll, then do a fresh poll. If not, poll from latest cursor.
+    - name: Poll from Cursor
+      run: |
+        if [ -z "$LAST_CURSOR" ]; then
+          echo "FIRST TIME RUNNING AUDIT LOG POLL"
+          npm start -- --token ${{secrets.AUDIT_LOG_TOKEN}} --org ${{secrets.ORG_NAME}} --api 'v3' --api-type 'all' --file 'audit-log-output.json'
+        else
+          echo "RUNNING AUDIT LOG POLL FROM $LAST_CURSOR"
+          npm start -- --token ${{secrets.AUDIT_LOG_TOKEN}} --org ${{secrets.ORG_NAME}} --api 'v3' --api-type 'all' --cursor $LAST_CURSOR --file 'audit-log-output.json'
+        fi
+        curl -X POST -H "Content-Type: application/json" -d @audit-log-output.json ${{secrets.WEBHOOK_URL}}
+
+    # Commit the cursor back to source
+    - name: Commit cursor
+      uses: EndBug/add-and-commit@v5
+      with:
+        author_name: Audit Log Integration
+        author_email: ${{ secrets.COMMITTER_EMAIL }}
+        message: "Updating cursor for audit log"
+        add: ".last-v3-cursor-update"
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/workflow/forward-v4-workflow.yml
+++ b/workflow/forward-v4-workflow.yml
@ -1,7 +1,7 @@
 ############################################
 # Github Action Workflow to poll and aggregate logs #
 ############################################
-name: POLL/POST Audit Log Data
+name: POLL/POST Audit Log Data from V4 API

 ##############################################
 # Run once an hour and when pushed to main #