diff --git a/.yarn/versions/cb6d33ac.yml b/.yarn/versions/cb6d33ac.yml
new file mode 100644
index 000000000..bf4897c3b
--- /dev/null
+++ b/.yarn/versions/cb6d33ac.yml
@@ -0,0 +1,9 @@
+releases:
+ "@datashaper/schema": minor
+
+declined:
+ - "@datashaper/app-framework"
+ - "@datashaper/react"
+ - "@datashaper/tables"
+ - "@datashaper/webapp"
+ - "@datashaper/workflow"
diff --git a/javascript/schema/docs/markdown/schema.filetype.md b/javascript/schema/docs/markdown/schema.filetype.md
new file mode 100644
index 000000000..16990beb6
--- /dev/null
+++ b/javascript/schema/docs/markdown/schema.filetype.md
@@ -0,0 +1,22 @@
+
+
+[Home](./index.md) > [@datashaper/schema](./schema.md) > [FileType](./schema.filetype.md)
+
+## FileType enum
+
+These are the available formats for the snapshot verb.
+
+Signature:
+
+```typescript
+export declare enum FileType
+```
+
+## Enumeration Members
+
+| Member | Value | Description |
+| --- | --- | --- |
+| Csv | "csv"
| |
+| Json | "json"
| |
+| Parquet | "parquet"
| |
+
diff --git a/javascript/schema/docs/markdown/schema.md b/javascript/schema/docs/markdown/schema.md
index b0f6cb008..ee4f1b6cf 100644
--- a/javascript/schema/docs/markdown/schema.md
+++ b/javascript/schema/docs/markdown/schema.md
@@ -19,6 +19,7 @@
| [DateComparisonOperator](./schema.datecomparisonoperator.md) | |
| [ErrorCode](./schema.errorcode.md) | |
| [FieldAggregateOperation](./schema.fieldaggregateoperation.md) | This is the subset of aggregate functions that can operate on a single field so we don't accommodate additional args. See https://uwdata.github.io/arquero/api/op\#aggregate-functions |
+| [FileType](./schema.filetype.md) | These are the available formats for the snapshot verb. |
| [FilterCompareType](./schema.filtercomparetype.md) | Indicates the comparison type used for a filter operation. This is done on a row-by-row basis. |
| [JoinStrategy](./schema.joinstrategy.md) | |
| [KnownProfile](./schema.knownprofile.md) | |
@@ -98,6 +99,7 @@
| [ResourceSchema](./schema.resourceschema.md) | Parent class for any resource type understood by the system. Any object type that extends from Resource is expected to have a standalone schema published. For project state, this can be left as generic as possible for now. |
| [RollupArgs](./schema.rollupargs.md) | |
| [SampleArgs](./schema.sampleargs.md) | |
+| [SnapshotArgs](./schema.snapshotargs.md) | |
| [SpreadArgs](./schema.spreadargs.md) | |
| [StepJsonCommon](./schema.stepjsoncommon.md) | Common step properties |
| [StringsArgs](./schema.stringsargs.md) | |
diff --git a/javascript/schema/docs/markdown/schema.snapshotargs.filetype.md b/javascript/schema/docs/markdown/schema.snapshotargs.filetype.md
new file mode 100644
index 000000000..696dce660
--- /dev/null
+++ b/javascript/schema/docs/markdown/schema.snapshotargs.filetype.md
@@ -0,0 +1,11 @@
+
+
+[Home](./index.md) > [@datashaper/schema](./schema.md) > [SnapshotArgs](./schema.snapshotargs.md) > [fileType](./schema.snapshotargs.filetype.md)
+
+## SnapshotArgs.fileType property
+
+Signature:
+
+```typescript
+fileType: FileType;
+```
diff --git a/javascript/schema/docs/markdown/schema.snapshotargs.md b/javascript/schema/docs/markdown/schema.snapshotargs.md
new file mode 100644
index 000000000..aefebfa31
--- /dev/null
+++ b/javascript/schema/docs/markdown/schema.snapshotargs.md
@@ -0,0 +1,19 @@
+
+
+[Home](./index.md) > [@datashaper/schema](./schema.md) > [SnapshotArgs](./schema.snapshotargs.md)
+
+## SnapshotArgs interface
+
+Signature:
+
+```typescript
+export interface SnapshotArgs
+```
+
+## Properties
+
+| Property | Modifiers | Type | Description |
+| --- | --- | --- | --- |
+| [fileType](./schema.snapshotargs.filetype.md) | | [FileType](./schema.filetype.md) | |
+| [name](./schema.snapshotargs.name.md) | | string | |
+
diff --git a/javascript/schema/docs/markdown/schema.snapshotargs.name.md b/javascript/schema/docs/markdown/schema.snapshotargs.name.md
new file mode 100644
index 000000000..12a1a3bf6
--- /dev/null
+++ b/javascript/schema/docs/markdown/schema.snapshotargs.name.md
@@ -0,0 +1,11 @@
+
+
+[Home](./index.md) > [@datashaper/schema](./schema.md) > [SnapshotArgs](./schema.snapshotargs.md) > [name](./schema.snapshotargs.name.md)
+
+## SnapshotArgs.name property
+
+Signature:
+
+```typescript
+name: string;
+```
diff --git a/javascript/schema/docs/report/schema.api.json b/javascript/schema/docs/report/schema.api.json
index 2ab48f653..155de03f9 100644
--- a/javascript/schema/docs/report/schema.api.json
+++ b/javascript/schema/docs/report/schema.api.json
@@ -5619,6 +5619,85 @@
],
"extendsTokenRanges": []
},
+ {
+ "kind": "Enum",
+ "canonicalReference": "@datashaper/schema!FileType:enum",
+ "docComment": "/**\n * These are the available formats for the snapshot verb.\n */\n",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "export declare enum FileType "
+ }
+ ],
+ "releaseTag": "Public",
+ "name": "FileType",
+ "preserveMemberOrder": false,
+ "members": [
+ {
+ "kind": "EnumMember",
+ "canonicalReference": "@datashaper/schema!FileType.Csv:member",
+ "docComment": "",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "Csv = "
+ },
+ {
+ "kind": "Content",
+ "text": "\"csv\""
+ }
+ ],
+ "initializerTokenRange": {
+ "startIndex": 1,
+ "endIndex": 2
+ },
+ "releaseTag": "Public",
+ "name": "Csv"
+ },
+ {
+ "kind": "EnumMember",
+ "canonicalReference": "@datashaper/schema!FileType.Json:member",
+ "docComment": "",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "Json = "
+ },
+ {
+ "kind": "Content",
+ "text": "\"json\""
+ }
+ ],
+ "initializerTokenRange": {
+ "startIndex": 1,
+ "endIndex": 2
+ },
+ "releaseTag": "Public",
+ "name": "Json"
+ },
+ {
+ "kind": "EnumMember",
+ "canonicalReference": "@datashaper/schema!FileType.Parquet:member",
+ "docComment": "",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "Parquet = "
+ },
+ {
+ "kind": "Content",
+ "text": "\"parquet\""
+ }
+ ],
+ "initializerTokenRange": {
+ "startIndex": 1,
+ "endIndex": 2
+ },
+ "releaseTag": "Public",
+ "name": "Parquet"
+ }
+ ]
+ },
{
"kind": "Interface",
"canonicalReference": "@datashaper/schema!FillArgs:interface",
@@ -9269,6 +9348,78 @@
}
]
},
+ {
+ "kind": "Interface",
+ "canonicalReference": "@datashaper/schema!SnapshotArgs:interface",
+ "docComment": "",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "export interface SnapshotArgs "
+ }
+ ],
+ "releaseTag": "Public",
+ "name": "SnapshotArgs",
+ "preserveMemberOrder": false,
+ "members": [
+ {
+ "kind": "PropertySignature",
+ "canonicalReference": "@datashaper/schema!SnapshotArgs#fileType:member",
+ "docComment": "",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "fileType: "
+ },
+ {
+ "kind": "Reference",
+ "text": "FileType",
+ "canonicalReference": "@datashaper/schema!FileType:enum"
+ },
+ {
+ "kind": "Content",
+ "text": ";"
+ }
+ ],
+ "isReadonly": false,
+ "isOptional": false,
+ "releaseTag": "Public",
+ "name": "fileType",
+ "propertyTypeTokenRange": {
+ "startIndex": 1,
+ "endIndex": 2
+ }
+ },
+ {
+ "kind": "PropertySignature",
+ "canonicalReference": "@datashaper/schema!SnapshotArgs#name:member",
+ "docComment": "",
+ "excerptTokens": [
+ {
+ "kind": "Content",
+ "text": "name: "
+ },
+ {
+ "kind": "Content",
+ "text": "string"
+ },
+ {
+ "kind": "Content",
+ "text": ";"
+ }
+ ],
+ "isReadonly": false,
+ "isOptional": false,
+ "releaseTag": "Public",
+ "name": "name",
+ "propertyTypeTokenRange": {
+ "startIndex": 1,
+ "endIndex": 2
+ }
+ }
+ ],
+ "extendsTokenRanges": []
+ },
{
"kind": "Enum",
"canonicalReference": "@datashaper/schema!SortDirection:enum",
diff --git a/javascript/schema/docs/report/schema.api.md b/javascript/schema/docs/report/schema.api.md
index 80f082550..d9fdab987 100644
--- a/javascript/schema/docs/report/schema.api.md
+++ b/javascript/schema/docs/report/schema.api.md
@@ -490,6 +490,18 @@ export interface FieldMetadata {
type?: DataType;
}
+// Warning: (ae-missing-release-tag) "FileType" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
+//
+// @public
+export enum FileType {
+ // (undocumented)
+ Csv = "csv",
+ // (undocumented)
+ Json = "json",
+ // (undocumented)
+ Parquet = "parquet"
+}
+
// Warning: (ae-missing-release-tag) "FillArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public (undocumented)
@@ -897,6 +909,16 @@ export enum SetOp {
Union = "union"
}
+// Warning: (ae-missing-release-tag) "SnapshotArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
+//
+// @public (undocumented)
+export interface SnapshotArgs {
+ // (undocumented)
+ fileType: FileType;
+ // (undocumented)
+ name: string;
+}
+
// Warning: (ae-missing-release-tag) "SortDirection" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public (undocumented)
diff --git a/javascript/schema/src/workflow/verbs.ts b/javascript/schema/src/workflow/verbs.ts
index 9fc0ce23c..81e0f923b 100644
--- a/javascript/schema/src/workflow/verbs.ts
+++ b/javascript/schema/src/workflow/verbs.ts
@@ -358,6 +358,15 @@ export enum WindowFunction {
UUID = 'uuid'
}
+/**
+ * These are the available formats for the snapshot verb.
+ */
+export enum FileType {
+ Csv = 'csv',
+ Json = 'json',
+ Parquet = 'parquet'
+}
+
export interface AggregateArgs extends RollupArgs {
/**
* Column to group by
@@ -722,6 +731,11 @@ export interface PrintArgs {
limit?: number
}
+export interface SnapshotArgs {
+ name: string
+ fileType: FileType
+}
+
export interface StringsReplaceArgs extends StringsArgs {
pattern: string
replacement: string
diff --git a/python/datashaper/datashaper/__init__.py b/python/datashaper/datashaper/__init__.py
index 4bd527268..62cc81e25 100644
--- a/python/datashaper/datashaper/__init__.py
+++ b/python/datashaper/datashaper/__init__.py
@@ -9,6 +9,7 @@ from .engine import (
Criterion,
DataType,
FieldAggregateOperation,
+ FileType,
FilterArgs,
FilterCompareType,
InputColumnArgs,
@@ -116,6 +117,7 @@ __all__ = [
"Category",
"Criterion",
"FieldAggregateOperation",
+ "FileType",
"FilterArgs",
"FilterCompareType",
"InputColumnArgs",
diff --git a/python/datashaper/datashaper/engine/__init__.py b/python/datashaper/datashaper/engine/__init__.py
index ee403e779..015f1e6ee 100644
--- a/python/datashaper/datashaper/engine/__init__.py
+++ b/python/datashaper/datashaper/engine/__init__.py
@@ -15,6 +15,7 @@ from .types import (
Criterion,
DataType,
FieldAggregateOperation,
+ FileType,
FilterArgs,
FilterCompareType,
InputColumnArgs,
@@ -55,6 +56,7 @@ __all__ = [
"Category",
"Criterion",
"FieldAggregateOperation",
+ "FileType",
"FilterArgs",
"FilterCompareType",
"InputColumnArgs",
diff --git a/python/datashaper/datashaper/engine/types.py b/python/datashaper/datashaper/engine/types.py
index 190ec77d0..5b19af1fd 100644
--- a/python/datashaper/datashaper/engine/types.py
+++ b/python/datashaper/datashaper/engine/types.py
@@ -243,6 +243,14 @@ class WindowFunction(str, Enum):
UUID = "uuid"
+class FileType(str, Enum):
+ """File type used for the snapshot verb.."""
+
+ Json = "json"
+ Csv = "csv"
+ Parquet = "parquet"
+
+
@dataclass
class OrderByInstruction:
"""Details regarding how to order a column."""
diff --git a/python/datashaper/datashaper/engine/verbs/snapshot.py b/python/datashaper/datashaper/engine/verbs/snapshot.py
new file mode 100644
index 000000000..72c7febc9
--- /dev/null
+++ b/python/datashaper/datashaper/engine/verbs/snapshot.py
@@ -0,0 +1,37 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project.
+#
+"""Snapshot verb implementation."""
+from typing import cast
+
+import pandas as pd
+
+from datashaper.engine.types import FileType
+from datashaper.engine.verbs.verbs_mapping import verb
+from datashaper.table_store.types import VerbResult, create_verb_result
+
+from .verb_input import VerbInput
+
+
+@verb(name="snapshot")
+def snapshot(
+ input: VerbInput,
+ name: str,
+ file_type: FileType,
+ **_kwargs: dict,
+) -> VerbResult:
+ """Snapshot verb implementation."""
+ output = cast(pd.DataFrame, input.get_input())
+ file_name = "./" + name + "." + file_type
+
+ if file_type == FileType.Csv:
+ output.to_csv(file_name)
+
+ if file_type == FileType.Json:
+ output.to_json(file_name, orient="records", compression="infer")
+
+ if file_type == FileType.Parquet:
+ output.to_parquet(file_name)
+
+ return create_verb_result(output)
diff --git a/python/datashaper/tests/verbs/snapshot_test.py b/python/datashaper/tests/verbs/snapshot_test.py
new file mode 100644
index 000000000..6afcae016
--- /dev/null
+++ b/python/datashaper/tests/verbs/snapshot_test.py
@@ -0,0 +1,33 @@
+import pandas as pd
+
+from datashaper.engine.verbs import VerbInput, VerbManager
+from datashaper.table_store.types import TableContainer, VerbResult
+
+
+def make_verb_input(data: list, columns: list[str]):
+ pd_table = pd.DataFrame(data=data, columns=columns)
+ table_container = TableContainer(pd_table)
+ return VerbInput(table_container)
+
+
+def test_snapshot_csv():
+ verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
+ snapshot = VerbManager.get().get_verb("snapshot").func
+ output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="csv")
+ output: TableContainer = output.output
+
+
+def test_snapshot_json():
+ verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
+ snapshot = VerbManager.get().get_verb("snapshot").func
+ output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="json")
+ output: TableContainer = output.output
+
+
+def test_snapshot_parquet():
+ verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
+ snapshot = VerbManager.get().get_verb("snapshot").func
+ output: VerbResult = snapshot(
+ input=verb_input, name="test-file", file_type="parquet"
+ )
+ output: TableContainer = output.output