Add functions for parsing and validating access paths

This adds functions for parsing and validating access paths to prepare for future functionality where we're going to be parsing and validating access paths.
2024-01-10 16:20:13 +01:00 · 2024-01-10 16:20:13 +01:00 · ea6e148df9
--- a/extensions/ql-vscode/src/model-editor/shared/access-paths.ts
+++ b/extensions/ql-vscode/src/model-editor/shared/access-paths.ts
@ -0,0 +1,128 @@
+/**
+ * This file contains functions for parsing and validating access paths.
+ *
+ * This intentionally does not simply split by '.' since tokens may contain dots,
+ * e.g. `Field[foo.Bar.x]`. Instead, it uses some simple parsing to match valid tokens.
+ *
+ * Valid syntax was determined based on this file:
+ * https://github.com/github/codeql/blob/a04830b8b2d3e5f7df8e1f80f06c020b987a89a3/ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll
+ *
+ * In contrast to that file, we do not use a regex for parsing to allow us to be more lenient.
+ * For example, we can parse partial access paths such as `Field[foo.Bar.x` without error.
+ */
+
+/**
+ * A range of characters in an access path. The start position is inclusive, the end position is exclusive.
+ */
+type AccessPathRange = {
+  /**
+   * Zero-based index of the first character of the token.
+   */
+  start: number;
+  /**
+   * Zero-based index of the character after the last character of the token.
+   */
+  end: number;
+};
+
+/**
+ * A token in an access path. For example, `Argument[foo]` is a token.
+ */
+type AccessPartToken = {
+  text: string;
+  range: AccessPathRange;
+};
+
+/**
+ * Parses an access path into tokens.
+ *
+ * @param path The access path to parse.
+ * @returns An array of tokens.
+ */
+export function parseAccessPathTokens(path: string): AccessPartToken[] {
+  const parts: AccessPartToken[] = [];
+
+  let currentPart = "";
+  let currentPathStart = 0;
+  // Keep track of the number of brackets we can parse the path correctly when it contains
+  // nested brackets such as `Argument[foo[bar].test].Element`.
+  let bracketCounter = 0;
+  for (let i = 0; i < path.length; i++) {
+    const c = path[i];
+
+    if (c === "[") {
+      bracketCounter++;
+    } else if (c === "]") {
+      bracketCounter--;
+    } else if (c === "." && bracketCounter === 0) {
+      // A part ends when we encounter a dot that is not inside brackets.
+      parts.push({
+        text: currentPart,
+        range: {
+          start: currentPathStart,
+          end: i,
+        },
+      });
+      currentPart = "";
+      currentPathStart = i + 1;
+      continue;
+    }
+
+    currentPart += c;
+  }
+
+  // The last part should not be followed by a dot, so we need to add it manually.
+  // If the path is empty, such as for `Argument[foo].`, then this is still correct
+  // since the `validateAccessPath` function will check that none of the tokens are
+  // empty.
+  parts.push({
+    text: currentPart,
+    range: {
+      start: currentPathStart,
+      end: path.length,
+    },
+  });
+
+  return parts;
+}
+
+// Regex for a single part of the access path
+const tokenRegex = /^(\w+)(?:\[([^\]]*)])?$/;
+
+type AccessPathDiagnostic = {
+  range: AccessPathRange;
+  message: string;
+};
+
+/**
+ * Validates an access path and returns any errors. This requires that the path is a valid path
+ * and does not allow partial access paths.
+ *
+ * @param path The access path to validate.
+ * @returns An array of diagnostics for any errors in the access path.
+ */
+export function validateAccessPath(path: string): AccessPathDiagnostic[] {
+  if (path === "") {
+    return [];
+  }
+
+  const tokens = parseAccessPathTokens(path);
+
+  return tokens
+    .map((token): AccessPathDiagnostic | null => {
+      if (tokenRegex.test(token.text)) {
+        return null;
+      }
+
+      let message = "Invalid access path";
+      if (token.range.start === token.range.end) {
+        message = "Unexpected empty token";
+      }
+
+      return {
+        range: token.range,
+        message,
+      };
+    })
+    .filter((token): token is AccessPathDiagnostic => token !== null);
+}
--- a/extensions/ql-vscode/test/unit-tests/model-editor/shared/access-paths.test.ts
+++ b/extensions/ql-vscode/test/unit-tests/model-editor/shared/access-paths.test.ts
@ -0,0 +1,251 @@
+import {
+  parseAccessPathTokens,
+  validateAccessPath,
+} from "../../../../src/model-editor/shared/access-paths";
+
+describe("parseAccessPathTokens", () => {
+  it.each([
+    {
+      path: "Argument[foo].Element.Field[@test]",
+      parts: [
+        {
+          range: {
+            start: 0,
+            end: 13,
+          },
+          text: "Argument[foo]",
+        },
+        {
+          range: {
+            start: 14,
+            end: 21,
+          },
+          text: "Element",
+        },
+        {
+          range: {
+            start: 22,
+            end: 34,
+          },
+          text: "Field[@test]",
+        },
+      ],
+    },
+    {
+      path: "Argument[foo].Element.Field[foo.Bar.x]",
+      parts: [
+        {
+          range: {
+            start: 0,
+            end: 13,
+          },
+          text: "Argument[foo]",
+        },
+        {
+          range: {
+            start: 14,
+            end: 21,
+          },
+          text: "Element",
+        },
+        {
+          range: {
+            start: 22,
+            end: 38,
+          },
+          text: "Field[foo.Bar.x]",
+        },
+      ],
+    },
+    {
+      path: "Argument[",
+      parts: [
+        {
+          range: {
+            start: 0,
+            end: 9,
+          },
+          text: "Argument[",
+        },
+      ],
+    },
+    {
+      path: "Argument[se",
+      parts: [
+        {
+          range: {
+            start: 0,
+            end: 11,
+          },
+          text: "Argument[se",
+        },
+      ],
+    },
+    {
+      path: "Argument[foo].Field[",
+      parts: [
+        {
+          range: {
+            start: 0,
+            end: 13,
+          },
+          text: "Argument[foo]",
+        },
+        {
+          range: {
+            start: 14,
+            end: 20,
+          },
+          text: "Field[",
+        },
+      ],
+    },
+    {
+      path: "Argument[foo].",
+      parts: [
+        {
+          text: "Argument[foo]",
+          range: {
+            end: 13,
+            start: 0,
+          },
+        },
+        {
+          text: "",
+          range: {
+            end: 14,
+            start: 14,
+          },
+        },
+      ],
+    },
+    {
+      path: "Argument[foo]..",
+      parts: [
+        {
+          text: "Argument[foo]",
+          range: {
+            end: 13,
+            start: 0,
+          },
+        },
+        {
+          text: "",
+          range: {
+            end: 14,
+            start: 14,
+          },
+        },
+        {
+          text: "",
+          range: {
+            end: 15,
+            start: 15,
+          },
+        },
+      ],
+    },
+    {
+      path: "Argument[foo[bar].test].Element.",
+      parts: [
+        {
+          range: {
+            start: 0,
+            end: 23,
+          },
+          text: "Argument[foo[bar].test]",
+        },
+        {
+          range: {
+            start: 24,
+            end: 31,
+          },
+          text: "Element",
+        },
+        {
+          range: {
+            start: 32,
+            end: 32,
+          },
+          text: "",
+        },
+      ],
+    },
+  ])(`parses correctly for $path`, ({ path, parts }) => {
+    expect(parseAccessPathTokens(path)).toEqual(parts);
+  });
+});
+
+describe("validateAccessPath", () => {
+  it.each([
+    {
+      path: "Argument[foo].Element.Field[@test]",
+      diagnostics: [],
+    },
+    {
+      path: "Argument[foo].Element.Field[foo.Bar.x]",
+      diagnostics: [],
+    },
+    {
+      path: "Argument[",
+      diagnostics: [
+        {
+          message: "Invalid access path",
+          range: {
+            start: 0,
+            end: 9,
+          },
+        },
+      ],
+    },
+    {
+      path: "Argument[se",
+      diagnostics: [
+        {
+          message: "Invalid access path",
+          range: {
+            start: 0,
+            end: 11,
+          },
+        },
+      ],
+    },
+    {
+      path: "Argument[foo].Field[",
+      diagnostics: [
+        {
+          message: "Invalid access path",
+          range: {
+            start: 14,
+            end: 20,
+          },
+        },
+      ],
+    },
+    {
+      path: "Argument[foo].",
+      diagnostics: [
+        { message: "Unexpected empty token", range: { start: 14, end: 14 } },
+      ],
+    },
+    {
+      path: "Argument[foo]..",
+      diagnostics: [
+        { message: "Unexpected empty token", range: { start: 14, end: 14 } },
+        { message: "Unexpected empty token", range: { start: 15, end: 15 } },
+      ],
+    },
+    {
+      path: "Argument[foo[bar].test].Element.",
+      diagnostics: [
+        { message: "Invalid access path", range: { start: 0, end: 23 } },
+        { message: "Unexpected empty token", range: { start: 32, end: 32 } },
+      ],
+    },
+  ])(
+    `validates $path correctly with $diagnostics.length errors`,
+    ({ path, diagnostics }) => {
+      expect(validateAccessPath(path)).toEqual(diagnostics);
+    },
+  );
+});