snapshot python implementation (#714)
This commit is contained in:
Родитель
128b15bb78
Коммит
a4ba6529f3
|
@ -0,0 +1,9 @@
|
|||
releases:
|
||||
"@datashaper/schema": minor
|
||||
|
||||
declined:
|
||||
- "@datashaper/app-framework"
|
||||
- "@datashaper/react"
|
||||
- "@datashaper/tables"
|
||||
- "@datashaper/webapp"
|
||||
- "@datashaper/workflow"
|
|
@ -0,0 +1,22 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [@datashaper/schema](./schema.md) > [FileType](./schema.filetype.md)
|
||||
|
||||
## FileType enum
|
||||
|
||||
These are the available formats for the snapshot verb.
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
export declare enum FileType
|
||||
```
|
||||
|
||||
## Enumeration Members
|
||||
|
||||
| Member | Value | Description |
|
||||
| --- | --- | --- |
|
||||
| Csv | <code>"csv"</code> | |
|
||||
| Json | <code>"json"</code> | |
|
||||
| Parquet | <code>"parquet"</code> | |
|
||||
|
|
@ -19,6 +19,7 @@
|
|||
| [DateComparisonOperator](./schema.datecomparisonoperator.md) | |
|
||||
| [ErrorCode](./schema.errorcode.md) | |
|
||||
| [FieldAggregateOperation](./schema.fieldaggregateoperation.md) | This is the subset of aggregate functions that can operate on a single field so we don't accommodate additional args. See https://uwdata.github.io/arquero/api/op\#aggregate-functions |
|
||||
| [FileType](./schema.filetype.md) | These are the available formats for the snapshot verb. |
|
||||
| [FilterCompareType](./schema.filtercomparetype.md) | Indicates the comparison type used for a filter operation. This is done on a row-by-row basis. |
|
||||
| [JoinStrategy](./schema.joinstrategy.md) | |
|
||||
| [KnownProfile](./schema.knownprofile.md) | |
|
||||
|
@ -98,6 +99,7 @@
|
|||
| [ResourceSchema](./schema.resourceschema.md) | Parent class for any resource type understood by the system. Any object type that extends from Resource is expected to have a standalone schema published. For project state, this can be left as generic as possible for now. |
|
||||
| [RollupArgs](./schema.rollupargs.md) | |
|
||||
| [SampleArgs](./schema.sampleargs.md) | |
|
||||
| [SnapshotArgs](./schema.snapshotargs.md) | |
|
||||
| [SpreadArgs](./schema.spreadargs.md) | |
|
||||
| [StepJsonCommon](./schema.stepjsoncommon.md) | Common step properties |
|
||||
| [StringsArgs](./schema.stringsargs.md) | |
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [@datashaper/schema](./schema.md) > [SnapshotArgs](./schema.snapshotargs.md) > [fileType](./schema.snapshotargs.filetype.md)
|
||||
|
||||
## SnapshotArgs.fileType property
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
fileType: FileType;
|
||||
```
|
|
@ -0,0 +1,19 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [@datashaper/schema](./schema.md) > [SnapshotArgs](./schema.snapshotargs.md)
|
||||
|
||||
## SnapshotArgs interface
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
export interface SnapshotArgs
|
||||
```
|
||||
|
||||
## Properties
|
||||
|
||||
| Property | Modifiers | Type | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| [fileType](./schema.snapshotargs.filetype.md) | | [FileType](./schema.filetype.md) | |
|
||||
| [name](./schema.snapshotargs.name.md) | | string | |
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [@datashaper/schema](./schema.md) > [SnapshotArgs](./schema.snapshotargs.md) > [name](./schema.snapshotargs.name.md)
|
||||
|
||||
## SnapshotArgs.name property
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
name: string;
|
||||
```
|
|
@ -5619,6 +5619,85 @@
|
|||
],
|
||||
"extendsTokenRanges": []
|
||||
},
|
||||
{
|
||||
"kind": "Enum",
|
||||
"canonicalReference": "@datashaper/schema!FileType:enum",
|
||||
"docComment": "/**\n * These are the available formats for the snapshot verb.\n */\n",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "export declare enum FileType "
|
||||
}
|
||||
],
|
||||
"releaseTag": "Public",
|
||||
"name": "FileType",
|
||||
"preserveMemberOrder": false,
|
||||
"members": [
|
||||
{
|
||||
"kind": "EnumMember",
|
||||
"canonicalReference": "@datashaper/schema!FileType.Csv:member",
|
||||
"docComment": "",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "Csv = "
|
||||
},
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "\"csv\""
|
||||
}
|
||||
],
|
||||
"initializerTokenRange": {
|
||||
"startIndex": 1,
|
||||
"endIndex": 2
|
||||
},
|
||||
"releaseTag": "Public",
|
||||
"name": "Csv"
|
||||
},
|
||||
{
|
||||
"kind": "EnumMember",
|
||||
"canonicalReference": "@datashaper/schema!FileType.Json:member",
|
||||
"docComment": "",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "Json = "
|
||||
},
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "\"json\""
|
||||
}
|
||||
],
|
||||
"initializerTokenRange": {
|
||||
"startIndex": 1,
|
||||
"endIndex": 2
|
||||
},
|
||||
"releaseTag": "Public",
|
||||
"name": "Json"
|
||||
},
|
||||
{
|
||||
"kind": "EnumMember",
|
||||
"canonicalReference": "@datashaper/schema!FileType.Parquet:member",
|
||||
"docComment": "",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "Parquet = "
|
||||
},
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "\"parquet\""
|
||||
}
|
||||
],
|
||||
"initializerTokenRange": {
|
||||
"startIndex": 1,
|
||||
"endIndex": 2
|
||||
},
|
||||
"releaseTag": "Public",
|
||||
"name": "Parquet"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"kind": "Interface",
|
||||
"canonicalReference": "@datashaper/schema!FillArgs:interface",
|
||||
|
@ -9269,6 +9348,78 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"kind": "Interface",
|
||||
"canonicalReference": "@datashaper/schema!SnapshotArgs:interface",
|
||||
"docComment": "",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "export interface SnapshotArgs "
|
||||
}
|
||||
],
|
||||
"releaseTag": "Public",
|
||||
"name": "SnapshotArgs",
|
||||
"preserveMemberOrder": false,
|
||||
"members": [
|
||||
{
|
||||
"kind": "PropertySignature",
|
||||
"canonicalReference": "@datashaper/schema!SnapshotArgs#fileType:member",
|
||||
"docComment": "",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "fileType: "
|
||||
},
|
||||
{
|
||||
"kind": "Reference",
|
||||
"text": "FileType",
|
||||
"canonicalReference": "@datashaper/schema!FileType:enum"
|
||||
},
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": ";"
|
||||
}
|
||||
],
|
||||
"isReadonly": false,
|
||||
"isOptional": false,
|
||||
"releaseTag": "Public",
|
||||
"name": "fileType",
|
||||
"propertyTypeTokenRange": {
|
||||
"startIndex": 1,
|
||||
"endIndex": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "PropertySignature",
|
||||
"canonicalReference": "@datashaper/schema!SnapshotArgs#name:member",
|
||||
"docComment": "",
|
||||
"excerptTokens": [
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "name: "
|
||||
},
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": "string"
|
||||
},
|
||||
{
|
||||
"kind": "Content",
|
||||
"text": ";"
|
||||
}
|
||||
],
|
||||
"isReadonly": false,
|
||||
"isOptional": false,
|
||||
"releaseTag": "Public",
|
||||
"name": "name",
|
||||
"propertyTypeTokenRange": {
|
||||
"startIndex": 1,
|
||||
"endIndex": 2
|
||||
}
|
||||
}
|
||||
],
|
||||
"extendsTokenRanges": []
|
||||
},
|
||||
{
|
||||
"kind": "Enum",
|
||||
"canonicalReference": "@datashaper/schema!SortDirection:enum",
|
||||
|
|
|
@ -490,6 +490,18 @@ export interface FieldMetadata {
|
|||
type?: DataType;
|
||||
}
|
||||
|
||||
// Warning: (ae-missing-release-tag) "FileType" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
|
||||
//
|
||||
// @public
|
||||
export enum FileType {
|
||||
// (undocumented)
|
||||
Csv = "csv",
|
||||
// (undocumented)
|
||||
Json = "json",
|
||||
// (undocumented)
|
||||
Parquet = "parquet"
|
||||
}
|
||||
|
||||
// Warning: (ae-missing-release-tag) "FillArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
|
||||
//
|
||||
// @public (undocumented)
|
||||
|
@ -897,6 +909,16 @@ export enum SetOp {
|
|||
Union = "union"
|
||||
}
|
||||
|
||||
// Warning: (ae-missing-release-tag) "SnapshotArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
|
||||
//
|
||||
// @public (undocumented)
|
||||
export interface SnapshotArgs {
|
||||
// (undocumented)
|
||||
fileType: FileType;
|
||||
// (undocumented)
|
||||
name: string;
|
||||
}
|
||||
|
||||
// Warning: (ae-missing-release-tag) "SortDirection" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
|
||||
//
|
||||
// @public (undocumented)
|
||||
|
|
|
@ -358,6 +358,15 @@ export enum WindowFunction {
|
|||
UUID = 'uuid'
|
||||
}
|
||||
|
||||
/**
|
||||
* These are the available formats for the snapshot verb.
|
||||
*/
|
||||
export enum FileType {
|
||||
Csv = 'csv',
|
||||
Json = 'json',
|
||||
Parquet = 'parquet'
|
||||
}
|
||||
|
||||
export interface AggregateArgs extends RollupArgs {
|
||||
/**
|
||||
* Column to group by
|
||||
|
@ -722,6 +731,11 @@ export interface PrintArgs {
|
|||
limit?: number
|
||||
}
|
||||
|
||||
export interface SnapshotArgs {
|
||||
name: string
|
||||
fileType: FileType
|
||||
}
|
||||
|
||||
export interface StringsReplaceArgs extends StringsArgs {
|
||||
pattern: string
|
||||
replacement: string
|
||||
|
|
|
@ -9,6 +9,7 @@ from .engine import (
|
|||
Criterion,
|
||||
DataType,
|
||||
FieldAggregateOperation,
|
||||
FileType,
|
||||
FilterArgs,
|
||||
FilterCompareType,
|
||||
InputColumnArgs,
|
||||
|
@ -116,6 +117,7 @@ __all__ = [
|
|||
"Category",
|
||||
"Criterion",
|
||||
"FieldAggregateOperation",
|
||||
"FileType",
|
||||
"FilterArgs",
|
||||
"FilterCompareType",
|
||||
"InputColumnArgs",
|
||||
|
|
|
@ -15,6 +15,7 @@ from .types import (
|
|||
Criterion,
|
||||
DataType,
|
||||
FieldAggregateOperation,
|
||||
FileType,
|
||||
FilterArgs,
|
||||
FilterCompareType,
|
||||
InputColumnArgs,
|
||||
|
@ -55,6 +56,7 @@ __all__ = [
|
|||
"Category",
|
||||
"Criterion",
|
||||
"FieldAggregateOperation",
|
||||
"FileType",
|
||||
"FilterArgs",
|
||||
"FilterCompareType",
|
||||
"InputColumnArgs",
|
||||
|
|
|
@ -243,6 +243,14 @@ class WindowFunction(str, Enum):
|
|||
UUID = "uuid"
|
||||
|
||||
|
||||
class FileType(str, Enum):
|
||||
"""File type used for the snapshot verb.."""
|
||||
|
||||
Json = "json"
|
||||
Csv = "csv"
|
||||
Parquet = "parquet"
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrderByInstruction:
|
||||
"""Details regarding how to order a column."""
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE file in the project.
|
||||
#
|
||||
"""Snapshot verb implementation."""
|
||||
from typing import cast
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from datashaper.engine.types import FileType
|
||||
from datashaper.engine.verbs.verbs_mapping import verb
|
||||
from datashaper.table_store.types import VerbResult, create_verb_result
|
||||
|
||||
from .verb_input import VerbInput
|
||||
|
||||
|
||||
@verb(name="snapshot")
|
||||
def snapshot(
|
||||
input: VerbInput,
|
||||
name: str,
|
||||
file_type: FileType,
|
||||
**_kwargs: dict,
|
||||
) -> VerbResult:
|
||||
"""Snapshot verb implementation."""
|
||||
output = cast(pd.DataFrame, input.get_input())
|
||||
file_name = "./" + name + "." + file_type
|
||||
|
||||
if file_type == FileType.Csv:
|
||||
output.to_csv(file_name)
|
||||
|
||||
if file_type == FileType.Json:
|
||||
output.to_json(file_name, orient="records", compression="infer")
|
||||
|
||||
if file_type == FileType.Parquet:
|
||||
output.to_parquet(file_name)
|
||||
|
||||
return create_verb_result(output)
|
|
@ -0,0 +1,33 @@
|
|||
import pandas as pd
|
||||
|
||||
from datashaper.engine.verbs import VerbInput, VerbManager
|
||||
from datashaper.table_store.types import TableContainer, VerbResult
|
||||
|
||||
|
||||
def make_verb_input(data: list, columns: list[str]):
|
||||
pd_table = pd.DataFrame(data=data, columns=columns)
|
||||
table_container = TableContainer(pd_table)
|
||||
return VerbInput(table_container)
|
||||
|
||||
|
||||
def test_snapshot_csv():
|
||||
verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
|
||||
snapshot = VerbManager.get().get_verb("snapshot").func
|
||||
output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="csv")
|
||||
output: TableContainer = output.output
|
||||
|
||||
|
||||
def test_snapshot_json():
|
||||
verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
|
||||
snapshot = VerbManager.get().get_verb("snapshot").func
|
||||
output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="json")
|
||||
output: TableContainer = output.output
|
||||
|
||||
|
||||
def test_snapshot_parquet():
|
||||
verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
|
||||
snapshot = VerbManager.get().get_verb("snapshot").func
|
||||
output: VerbResult = snapshot(
|
||||
input=verb_input, name="test-file", file_type="parquet"
|
||||
)
|
||||
output: TableContainer = output.output
|
Загрузка…
Ссылка в новой задаче