snapshot python implementation (#714)

This commit is contained in:
gaudyb 2024-04-22 13:58:41 -06:00 коммит произвёл GitHub
Родитель 128b15bb78
Коммит a4ba6529f3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
14 изменённых файлов: 343 добавлений и 0 удалений

9
.yarn/versions/cb6d33ac.yml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
releases:
"@datashaper/schema": minor
declined:
- "@datashaper/app-framework"
- "@datashaper/react"
- "@datashaper/tables"
- "@datashaper/webapp"
- "@datashaper/workflow"

22
javascript/schema/docs/markdown/schema.filetype.md сгенерированный Normal file
Просмотреть файл

@ -0,0 +1,22 @@
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [FileType](./schema.filetype.md)
## FileType enum
These are the available formats for the snapshot verb.
<b>Signature:</b>
```typescript
export declare enum FileType
```
## Enumeration Members
| Member | Value | Description |
| --- | --- | --- |
| Csv | <code>&quot;csv&quot;</code> | |
| Json | <code>&quot;json&quot;</code> | |
| Parquet | <code>&quot;parquet&quot;</code> | |

2
javascript/schema/docs/markdown/schema.md сгенерированный
Просмотреть файл

@ -19,6 +19,7 @@
| [DateComparisonOperator](./schema.datecomparisonoperator.md) | |
| [ErrorCode](./schema.errorcode.md) | |
| [FieldAggregateOperation](./schema.fieldaggregateoperation.md) | This is the subset of aggregate functions that can operate on a single field so we don't accommodate additional args. See https://uwdata.github.io/arquero/api/op\#aggregate-functions |
| [FileType](./schema.filetype.md) | These are the available formats for the snapshot verb. |
| [FilterCompareType](./schema.filtercomparetype.md) | Indicates the comparison type used for a filter operation. This is done on a row-by-row basis. |
| [JoinStrategy](./schema.joinstrategy.md) | |
| [KnownProfile](./schema.knownprofile.md) | |
@ -98,6 +99,7 @@
| [ResourceSchema](./schema.resourceschema.md) | Parent class for any resource type understood by the system. Any object type that extends from Resource is expected to have a standalone schema published. For project state, this can be left as generic as possible for now. |
| [RollupArgs](./schema.rollupargs.md) | |
| [SampleArgs](./schema.sampleargs.md) | |
| [SnapshotArgs](./schema.snapshotargs.md) | |
| [SpreadArgs](./schema.spreadargs.md) | |
| [StepJsonCommon](./schema.stepjsoncommon.md) | Common step properties |
| [StringsArgs](./schema.stringsargs.md) | |

11
javascript/schema/docs/markdown/schema.snapshotargs.filetype.md сгенерированный Normal file
Просмотреть файл

@ -0,0 +1,11 @@
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [SnapshotArgs](./schema.snapshotargs.md) &gt; [fileType](./schema.snapshotargs.filetype.md)
## SnapshotArgs.fileType property
<b>Signature:</b>
```typescript
fileType: FileType;
```

19
javascript/schema/docs/markdown/schema.snapshotargs.md сгенерированный Normal file
Просмотреть файл

@ -0,0 +1,19 @@
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [SnapshotArgs](./schema.snapshotargs.md)
## SnapshotArgs interface
<b>Signature:</b>
```typescript
export interface SnapshotArgs
```
## Properties
| Property | Modifiers | Type | Description |
| --- | --- | --- | --- |
| [fileType](./schema.snapshotargs.filetype.md) | | [FileType](./schema.filetype.md) | |
| [name](./schema.snapshotargs.name.md) | | string | |

11
javascript/schema/docs/markdown/schema.snapshotargs.name.md сгенерированный Normal file
Просмотреть файл

@ -0,0 +1,11 @@
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
[Home](./index.md) &gt; [@datashaper/schema](./schema.md) &gt; [SnapshotArgs](./schema.snapshotargs.md) &gt; [name](./schema.snapshotargs.name.md)
## SnapshotArgs.name property
<b>Signature:</b>
```typescript
name: string;
```

151
javascript/schema/docs/report/schema.api.json сгенерированный
Просмотреть файл

@ -5619,6 +5619,85 @@
],
"extendsTokenRanges": []
},
{
"kind": "Enum",
"canonicalReference": "@datashaper/schema!FileType:enum",
"docComment": "/**\n * These are the available formats for the snapshot verb.\n */\n",
"excerptTokens": [
{
"kind": "Content",
"text": "export declare enum FileType "
}
],
"releaseTag": "Public",
"name": "FileType",
"preserveMemberOrder": false,
"members": [
{
"kind": "EnumMember",
"canonicalReference": "@datashaper/schema!FileType.Csv:member",
"docComment": "",
"excerptTokens": [
{
"kind": "Content",
"text": "Csv = "
},
{
"kind": "Content",
"text": "\"csv\""
}
],
"initializerTokenRange": {
"startIndex": 1,
"endIndex": 2
},
"releaseTag": "Public",
"name": "Csv"
},
{
"kind": "EnumMember",
"canonicalReference": "@datashaper/schema!FileType.Json:member",
"docComment": "",
"excerptTokens": [
{
"kind": "Content",
"text": "Json = "
},
{
"kind": "Content",
"text": "\"json\""
}
],
"initializerTokenRange": {
"startIndex": 1,
"endIndex": 2
},
"releaseTag": "Public",
"name": "Json"
},
{
"kind": "EnumMember",
"canonicalReference": "@datashaper/schema!FileType.Parquet:member",
"docComment": "",
"excerptTokens": [
{
"kind": "Content",
"text": "Parquet = "
},
{
"kind": "Content",
"text": "\"parquet\""
}
],
"initializerTokenRange": {
"startIndex": 1,
"endIndex": 2
},
"releaseTag": "Public",
"name": "Parquet"
}
]
},
{
"kind": "Interface",
"canonicalReference": "@datashaper/schema!FillArgs:interface",
@ -9269,6 +9348,78 @@
}
]
},
{
"kind": "Interface",
"canonicalReference": "@datashaper/schema!SnapshotArgs:interface",
"docComment": "",
"excerptTokens": [
{
"kind": "Content",
"text": "export interface SnapshotArgs "
}
],
"releaseTag": "Public",
"name": "SnapshotArgs",
"preserveMemberOrder": false,
"members": [
{
"kind": "PropertySignature",
"canonicalReference": "@datashaper/schema!SnapshotArgs#fileType:member",
"docComment": "",
"excerptTokens": [
{
"kind": "Content",
"text": "fileType: "
},
{
"kind": "Reference",
"text": "FileType",
"canonicalReference": "@datashaper/schema!FileType:enum"
},
{
"kind": "Content",
"text": ";"
}
],
"isReadonly": false,
"isOptional": false,
"releaseTag": "Public",
"name": "fileType",
"propertyTypeTokenRange": {
"startIndex": 1,
"endIndex": 2
}
},
{
"kind": "PropertySignature",
"canonicalReference": "@datashaper/schema!SnapshotArgs#name:member",
"docComment": "",
"excerptTokens": [
{
"kind": "Content",
"text": "name: "
},
{
"kind": "Content",
"text": "string"
},
{
"kind": "Content",
"text": ";"
}
],
"isReadonly": false,
"isOptional": false,
"releaseTag": "Public",
"name": "name",
"propertyTypeTokenRange": {
"startIndex": 1,
"endIndex": 2
}
}
],
"extendsTokenRanges": []
},
{
"kind": "Enum",
"canonicalReference": "@datashaper/schema!SortDirection:enum",

22
javascript/schema/docs/report/schema.api.md сгенерированный
Просмотреть файл

@ -490,6 +490,18 @@ export interface FieldMetadata {
type?: DataType;
}
// Warning: (ae-missing-release-tag) "FileType" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public
export enum FileType {
// (undocumented)
Csv = "csv",
// (undocumented)
Json = "json",
// (undocumented)
Parquet = "parquet"
}
// Warning: (ae-missing-release-tag) "FillArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public (undocumented)
@ -897,6 +909,16 @@ export enum SetOp {
Union = "union"
}
// Warning: (ae-missing-release-tag) "SnapshotArgs" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public (undocumented)
export interface SnapshotArgs {
// (undocumented)
fileType: FileType;
// (undocumented)
name: string;
}
// Warning: (ae-missing-release-tag) "SortDirection" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public (undocumented)

Просмотреть файл

@ -358,6 +358,15 @@ export enum WindowFunction {
UUID = 'uuid'
}
/**
* These are the available formats for the snapshot verb.
*/
export enum FileType {
Csv = 'csv',
Json = 'json',
Parquet = 'parquet'
}
export interface AggregateArgs extends RollupArgs {
/**
* Column to group by
@ -722,6 +731,11 @@ export interface PrintArgs {
limit?: number
}
export interface SnapshotArgs {
name: string
fileType: FileType
}
export interface StringsReplaceArgs extends StringsArgs {
pattern: string
replacement: string

Просмотреть файл

@ -9,6 +9,7 @@ from .engine import (
Criterion,
DataType,
FieldAggregateOperation,
FileType,
FilterArgs,
FilterCompareType,
InputColumnArgs,
@ -116,6 +117,7 @@ __all__ = [
"Category",
"Criterion",
"FieldAggregateOperation",
"FileType",
"FilterArgs",
"FilterCompareType",
"InputColumnArgs",

Просмотреть файл

@ -15,6 +15,7 @@ from .types import (
Criterion,
DataType,
FieldAggregateOperation,
FileType,
FilterArgs,
FilterCompareType,
InputColumnArgs,
@ -55,6 +56,7 @@ __all__ = [
"Category",
"Criterion",
"FieldAggregateOperation",
"FileType",
"FilterArgs",
"FilterCompareType",
"InputColumnArgs",

Просмотреть файл

@ -243,6 +243,14 @@ class WindowFunction(str, Enum):
UUID = "uuid"
class FileType(str, Enum):
"""File type used for the snapshot verb.."""
Json = "json"
Csv = "csv"
Parquet = "parquet"
@dataclass
class OrderByInstruction:
"""Details regarding how to order a column."""

Просмотреть файл

@ -0,0 +1,37 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project.
#
"""Snapshot verb implementation."""
from typing import cast
import pandas as pd
from datashaper.engine.types import FileType
from datashaper.engine.verbs.verbs_mapping import verb
from datashaper.table_store.types import VerbResult, create_verb_result
from .verb_input import VerbInput
@verb(name="snapshot")
def snapshot(
input: VerbInput,
name: str,
file_type: FileType,
**_kwargs: dict,
) -> VerbResult:
"""Snapshot verb implementation."""
output = cast(pd.DataFrame, input.get_input())
file_name = "./" + name + "." + file_type
if file_type == FileType.Csv:
output.to_csv(file_name)
if file_type == FileType.Json:
output.to_json(file_name, orient="records", compression="infer")
if file_type == FileType.Parquet:
output.to_parquet(file_name)
return create_verb_result(output)

Просмотреть файл

@ -0,0 +1,33 @@
import pandas as pd
from datashaper.engine.verbs import VerbInput, VerbManager
from datashaper.table_store.types import TableContainer, VerbResult
def make_verb_input(data: list, columns: list[str]):
pd_table = pd.DataFrame(data=data, columns=columns)
table_container = TableContainer(pd_table)
return VerbInput(table_container)
def test_snapshot_csv():
verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
snapshot = VerbManager.get().get_verb("snapshot").func
output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="csv")
output: TableContainer = output.output
def test_snapshot_json():
verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
snapshot = VerbManager.get().get_verb("snapshot").func
output: VerbResult = snapshot(input=verb_input, name="test-file", file_type="json")
output: TableContainer = output.output
def test_snapshot_parquet():
verb_input = make_verb_input([[1], [2], [3], [4], [5]], ["id"])
snapshot = VerbManager.get().get_verb("snapshot").func
output: VerbResult = snapshot(
input=verb_input, name="test-file", file_type="parquet"
)
output: TableContainer = output.output