snapshot python implementation (#714)

2024-04-22 13:58:41 -06:00 · 2024-04-22 13:58:41 -06:00 · a4ba6529f3
--- a/.yarn/versions/cb6d33ac.yml
+++ b/.yarn/versions/cb6d33ac.yml
@ -0,0 +1,9 @@
+releases:
+  "@datashaper/schema": minor
+
+declined:
+  - "@datashaper/app-framework"
+  - "@datashaper/react"
+  - "@datashaper/tables"
+  - "@datashaper/webapp"
+  - "@datashaper/workflow"
--- a/javascript/schema/docs/markdown/schema.filetype.md
+++ b/javascript/schema/docs/markdown/schema.filetype.md
@ -0,0 +1,22 @@
+<!-- Do not edit this file. It is automatically generated by API Documenter. -->
+
+[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [FileType](./schema.filetype.md)
+
+## FileType enum
+
+These are the available formats for the snapshot verb.
+
+<b>Signature:</b>
+
+```typescript
+export declare enum FileType 
+```
+
+## Enumeration Members
+
+|  Member | Value | Description |
+|  --- | --- | --- |
+|  Csv | <code>&quot;csv&quot;</code> |  |
+|  Json | <code>&quot;json&quot;</code> |  |
+|  Parquet | <code>&quot;parquet&quot;</code> |  |
+
--- a/javascript/schema/docs/markdown/schema.md
+++ b/javascript/schema/docs/markdown/schema.md
@ -19,6 +19,7 @@
 |  [DateComparisonOperator](./schema.datecomparisonoperator.md) |  |
 |  [ErrorCode](./schema.errorcode.md) |  |
 |  [FieldAggregateOperation](./schema.fieldaggregateoperation.md) | This is the subset of aggregate functions that can operate on a single field so we don't accommodate additional args. See https://uwdata.github.io/arquero/api/op\#aggregate-functions |
+|  [FileType](./schema.filetype.md) | These are the available formats for the snapshot verb. |
 |  [FilterCompareType](./schema.filtercomparetype.md) | Indicates the comparison type used for a filter operation. This is done on a row-by-row basis. |
 |  [JoinStrategy](./schema.joinstrategy.md) |  |
 |  [KnownProfile](./schema.knownprofile.md) |  |
@ -98,6 +99,7 @@
 |  [ResourceSchema](./schema.resourceschema.md) | Parent class for any resource type understood by the system. Any object type that extends from Resource is expected to have a standalone schema published. For project state, this can be left as generic as possible for now. |
 |  [RollupArgs](./schema.rollupargs.md) |  |
 |  [SampleArgs](./schema.sampleargs.md) |  |
+|  [SnapshotArgs](./schema.snapshotargs.md) |  |
 |  [SpreadArgs](./schema.spreadargs.md) |  |
 |  [StepJsonCommon](./schema.stepjsoncommon.md) | Common step properties |
 |  [StringsArgs](./schema.stringsargs.md) |  |
--- a/javascript/schema/docs/markdown/schema.snapshotargs.filetype.md
+++ b/javascript/schema/docs/markdown/schema.snapshotargs.filetype.md
@ -0,0 +1,11 @@
+<!-- Do not edit this file. It is automatically generated by API Documenter. -->
+
+[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [SnapshotArgs](./schema.snapshotargs.md) &gt; [fileType](./schema.snapshotargs.filetype.md)
+
+## SnapshotArgs.fileType property
+
+<b>Signature:</b>
+
+```typescript
+fileType: FileType;
+```
--- a/javascript/schema/docs/markdown/schema.snapshotargs.md
+++ b/javascript/schema/docs/markdown/schema.snapshotargs.md
@ -0,0 +1,19 @@
+<!-- Do not edit this file. It is automatically generated by API Documenter. -->
+
+[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [SnapshotArgs](./schema.snapshotargs.md)
+
+## SnapshotArgs interface
+
+<b>Signature:</b>
+
+```typescript
+export interface SnapshotArgs 
+```
+
+## Properties
+
+|  Property | Modifiers | Type | Description |
+|  --- | --- | --- | --- |
+|  [fileType](./schema.snapshotargs.filetype.md) |  | [FileType](./schema.filetype.md) |  |
+|  [name](./schema.snapshotargs.name.md) |  | string |  |
+
--- a/javascript/schema/docs/markdown/schema.snapshotargs.name.md
+++ b/javascript/schema/docs/markdown/schema.snapshotargs.name.md
@ -0,0 +1,11 @@
+<!-- Do not edit this file. It is automatically generated by API Documenter. -->
+
+[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [SnapshotArgs](./schema.snapshotargs.md) &gt; [name](./schema.snapshotargs.name.md)
+
+## SnapshotArgs.name property
+
+<b>Signature:</b>
+
+```typescript
+name: string;
+```
--- a/javascript/schema/docs/report/schema.api.json
+++ b/javascript/schema/docs/report/schema.api.json
@ -5619,6 +5619,85 @@
          ],
          "extendsTokenRanges": []
        },
+        {
+          "kind": "Enum",
+          "canonicalReference": "@datashaper/schema!FileType:enum",
+          "docComment": "/**\n * These are the available formats for the snapshot verb.\n */\n",
+          "excerptTokens": [
+            {
+              "kind": "Content",
+              "text": "export declare enum FileType "
+            }
+          ],
+          "releaseTag": "Public",
+          "name": "FileType",
+          "preserveMemberOrder": false,
+          "members": [
+            {
+              "kind": "EnumMember",
+              "canonicalReference": "@datashaper/schema!FileType.Csv:member",
+              "docComment": "",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "Csv = "
+                },
+                {
+                  "kind": "Content",
+                  "text": "\"csv\""
+                }
+              ],
+              "initializerTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              },
+              "releaseTag": "Public",
+              "name": "Csv"
+            },
+            {
+              "kind": "EnumMember",
+              "canonicalReference": "@datashaper/schema!FileType.Json:member",
+              "docComment": "",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "Json = "
+                },
+                {
+                  "kind": "Content",
+                  "text": "\"json\""
+                }
+              ],
+              "initializerTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              },
+              "releaseTag": "Public",
+              "name": "Json"
+            },
+            {
+              "kind": "EnumMember",
+              "canonicalReference": "@datashaper/schema!FileType.Parquet:member",
+              "docComment": "",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "Parquet = "
+                },
+                {
+                  "kind": "Content",
+                  "text": "\"parquet\""
+                }
+              ],
+              "initializerTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              },
+              "releaseTag": "Public",
+              "name": "Parquet"
+            }
+          ]
+        },
        {
          "kind": "Interface",
          "canonicalReference": "@datashaper/schema!FillArgs:interface",
@ -9269,6 +9348,78 @@
            }
          ]
        },
+        {
+          "kind": "Interface",
+          "canonicalReference": "@datashaper/schema!SnapshotArgs:interface",
+          "docComment": "",
+          "excerptTokens": [
+            {
+              "kind": "Content",
+              "text": "export interface SnapshotArgs "
+            }
+          ],
+          "releaseTag": "Public",
+          "name": "SnapshotArgs",
+          "preserveMemberOrder": false,
+          "members": [
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@datashaper/schema!SnapshotArgs#fileType:member",
+              "docComment": "",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "fileType: "
+                },
+                {
+                  "kind": "Reference",
+                  "text": "FileType",
+                  "canonicalReference": "@datashaper/schema!FileType:enum"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": false,
+              "releaseTag": "Public",
+              "name": "fileType",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            },
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@datashaper/schema!SnapshotArgs#name:member",
+              "docComment": "",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "name: "
+                },
+                {
+                  "kind": "Content",
+                  "text": "string"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": false,
+              "releaseTag": "Public",
+              "name": "name",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            }
+          ],
+          "extendsTokenRanges": []
+        },
        {
          "kind": "Enum",
          "canonicalReference": "@datashaper/schema!SortDirection:enum",
--- a/javascript/schema/docs/report/schema.api.md
+++ b/javascript/schema/docs/report/schema.api.md
@ -490,6 +490,18 @@ export interface FieldMetadata {
    type?: DataType;
 }

+// Warning: (ae-missing-release-tag) "FileType" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
+//
+// @public
+export enum FileType {
+    // (undocumented)
+    Csv = "csv",
+    // (undocumented)
+    Json = "json",
+    // (undocumented)
+    Parquet = "parquet"
+}
+
 // Warning: (ae-missing-release-tag) "FillArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
 //
 // @public (undocumented)
@ -897,6 +909,16 @@ export enum SetOp {
    Union = "union"
 }

+// Warning: (ae-missing-release-tag) "SnapshotArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
+//
+// @public (undocumented)
+export interface SnapshotArgs {
+    // (undocumented)
+    fileType: FileType;
+    // (undocumented)
+    name: string;
+}
+
 // Warning: (ae-missing-release-tag) "SortDirection" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
 //
 // @public (undocumented)
--- a/javascript/schema/src/workflow/verbs.ts
+++ b/javascript/schema/src/workflow/verbs.ts
@ -358,6 +358,15 @@ export enum WindowFunction {
 	UUID = 'uuid'
 }

+/**
+ * These are the available formats for the snapshot verb.
+ */
+export enum FileType {
+	Csv = 'csv',
+	Json = 'json',
+	Parquet = 'parquet'
+}
+
 export interface AggregateArgs extends RollupArgs {
 	/**
 	 * Column to group by
@ -722,6 +731,11 @@ export interface PrintArgs {
 	limit?: number 
 }

+export interface SnapshotArgs {
+	name: string
+	fileType: FileType 
+}
+
 export interface StringsReplaceArgs extends StringsArgs {
 	pattern: string
 	replacement: string
--- a/python/datashaper/datashaper/init.py
+++ b/python/datashaper/datashaper/init.py
@ -9,6 +9,7 @@ from .engine import (
    Criterion,
    DataType,
    FieldAggregateOperation,
+    FileType,
    FilterArgs,
    FilterCompareType,
    InputColumnArgs,
@ -116,6 +117,7 @@ __all__ = [
    "Category",
    "Criterion",
    "FieldAggregateOperation",
+    "FileType",
    "FilterArgs",
    "FilterCompareType",
    "InputColumnArgs",
--- a/python/datashaper/datashaper/engine/init.py
+++ b/python/datashaper/datashaper/engine/init.py
@ -15,6 +15,7 @@ from .types import (
    Criterion,
    DataType,
    FieldAggregateOperation,
+    FileType,
    FilterArgs,
    FilterCompareType,
    InputColumnArgs,
@ -55,6 +56,7 @@ __all__ = [
    "Category",
    "Criterion",
    "FieldAggregateOperation",
+    "FileType",
    "FilterArgs",
    "FilterCompareType",
    "InputColumnArgs",
--- a/python/datashaper/datashaper/engine/types.py
+++ b/python/datashaper/datashaper/engine/types.py
@ -243,6 +243,14 @@ class WindowFunction(str, Enum):
    UUID = "uuid"


+class FileType(str, Enum):
+    """File type used for the snapshot verb.."""
+
+    Json = "json"
+    Csv = "csv"
+    Parquet = "parquet"
+
+
@dataclass
 class OrderByInstruction:
    """Details regarding how to order a column."""
--- a/python/datashaper/datashaper/engine/verbs/snapshot.py
+++ b/python/datashaper/datashaper/engine/verbs/snapshot.py
@ -0,0 +1,37 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project.
+#
+"""Snapshot verb implementation."""
+from typing import cast
+
+import pandas as pd
+
+from datashaper.engine.types import FileType
+from datashaper.engine.verbs.verbs_mapping import verb
+from datashaper.table_store.types import VerbResult, create_verb_result
+
+from .verb_input import VerbInput
+
+
+@verb(name="snapshot")
+def snapshot(
+    input: VerbInput,
+    name: str,
+    file_type: FileType,
+    **_kwargs: dict,
+) -> VerbResult:
+    """Snapshot verb implementation."""
+    output = cast(pd.DataFrame, input.get_input())
+    file_name = "./" + name + "." + file_type
+
+    if file_type == FileType.Csv:
+        output.to_csv(file_name)
+
+    if file_type == FileType.Json:
+        output.to_json(file_name, orient="records", compression="infer")
+
+    if file_type == FileType.Parquet:
+        output.to_parquet(file_name)
+
+    return create_verb_result(output)
--- a/python/datashaper/tests/verbs/snapshot_test.py
+++ b/python/datashaper/tests/verbs/snapshot_test.py
@ -0,0 +1,33 @@
+import pandas as pd
+
+from datashaper.engine.verbs import VerbInput, VerbManager
+from datashaper.table_store.types import TableContainer, VerbResult
+
+
+def make_verb_input(data: list, columns: list[str]):
+    pd_table = pd.DataFrame(data=data, columns=columns)
+    table_container = TableContainer(pd_table)
+    return VerbInput(table_container)
+
+
+def test_snapshot_csv():
+    verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
+    snapshot = VerbManager.get().get_verb("snapshot").func
+    output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="csv")
+    output: TableContainer = output.output
+
+
+def test_snapshot_json():
+    verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
+    snapshot = VerbManager.get().get_verb("snapshot").func
+    output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="json")
+    output: TableContainer = output.output
+
+
+def test_snapshot_parquet():
+    verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
+    snapshot = VerbManager.get().get_verb("snapshot").func
+    output: VerbResult = snapshot(
+        input=verb_input, name="test-file", file_type="parquet"
+    )
+    output: TableContainer = output.output