* Zanran Scaffolder initial version added

* "intro.md" renamed to "readme.md"
This commit is contained in:
adrianwhyte 2020-08-05 05:05:30 +02:00 коммит произвёл GitHub
Родитель e5c1d47762
Коммит ef81c9809d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 251 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,199 @@
{
"swagger": "2.0",
"info": {
"title": "Zanran Scaffolder",
"description": "The Zanran Scaffolder extracts tables and text from PDF or image files. Tables are extracted as Excel or XML, text as XML. The Scaffolder is best for reports like financial statements, scientific papers, brokers reports... Initially, you can test your documents using the manual, anonymous, practice site: www.zanrandemoapi.com",
"contact": {
"name": "Zanran contact",
"url": "https://pdf.zanran.com/contact-us",
"email": "helpdesk@zanran.com"
},
"version": "1.0"
},
"host": "scaffolderlink.zanran.com",
"basePath": "/",
"schemes": [
"http"
],
"paths": {
"/api/Upload/UploadFile": {
"post": {
"summary": "Upload Document",
"description": "Upload Document",
"operationId": "UploadDocument",
"consumes": [
"multipart/form-data"
],
"parameters": [
{
"name": "file",
"in": "formData",
"required": true,
"type": "file",
"x-ms-summary": "file",
"description": "The document file to upload"
},
{
"name": "startPage",
"in": "formData",
"required": false,
"type": "integer",
"x-ms-summary": "Start page",
"description": "Start page if analysing only a range"
},
{
"name": "endPage",
"in": "formData",
"required": false,
"type": "integer",
"x-ms-summary": "End page",
"description": "End page if analysing only a range"
},
{
"name": "Coords",
"in": "formData",
"required": false,
"type": "string",
"x-ms-summary": "Coords",
"description": "Coordinates of table to analyse (for processing a single page.) NOTE: this is a specialized requirement; if you wish to use this parameter, please contact us at helpdesk@zanran.com to ask how to proceed"
}
],
"produces": ["text/plain"],
"responses": {
"200": {
"description": "The name of the file to which the document is saved on the server e.g. user@domain.com~#namedoc.pdf",
"schema": {
"type": "string"
}
}
}
}
},
"/api/DocSearch/GetStatus": {
"post": {
"summary": "Get Status",
"description": "Get the status of the document being uploaded - i.e. whether it is in the queue to be processed, being processed or has finished processing",
"operationId": "GetStatus",
"consumes": [
"multipart/form-data"
],
"parameters": [
{
"name": "docname",
"in": "formData",
"required": true,
"type": "string",
"x-ms-summary": "Document name without extension",
"description": "the original document file-name without the extension"
}
],
"produces": ["text/plain"],
"responses": {
"200": {
"description": "Get the status of the uploaded document. Values can be 'in-queue', 'scaffolder-not-running', 'processing', 'processed', 'scanned', 'error-processing', 'absent', 'problematic', 'retrieving-status' or 'incorrect-filetype'",
"schema": {
"type": "string"
}
}
}
}
},
"/files/{docname}.xlsx": {
"get": {
"summary": "Download results as Xlsx",
"description": "Downloads the results of the table analysis as an Excel (Xlsx) document with separate worksheets for each table.",
"operationId": "DownloadFile-Xlsx",
"parameters": [
{
"in": "path",
"name": "docname",
"type": "string",
"required": true,
"x-ms-summary": "Document name without extension",
"x-ms-url-encoding": "single",
"description": "the original document filename without the extension"
}
],
"responses": {
"200": {
"description": "The resulting Xlsx file",
"schema": {
"type": "file"
}
}
}
}
},
"/files/allxml/{docname}": {
"get": {
"summary": "Download results as Zipped up Xml files",
"description": "Downloads a zip file containing the analysis results in Xml format (one Xml file per page)",
"operationId": "DownloadFile-AllXml",
"parameters": [
{
"in": "path",
"name": "docname",
"type": "string",
"required": true,
"x-ms-summary": "Document name without extension",
"x-ms-url-encoding": "single",
"description": "the original document filename without the extension"
}
],
"responses": {
"200": {
"description": "The resulting Zip file",
"schema": {
"type": "file"
}
}
}
}
},
"/files/znr/{docname}": {
"get": {
"summary": "Download results as a Znr file",
"description": "Downloads the results in the form of a Znr file which can then be viewed and edited by Pdf Workbench (a Zanran tool designed for this purpose)",
"operationId": "DownloadFile-Znr",
"parameters": [
{
"in": "path",
"name": "docname",
"type": "string",
"required": true,
"x-ms-summary": "Document name without extension",
"x-ms-url-encoding": "single",
"description": "the original document filename without the extension"
}
],
"responses": {
"200": {
"description": "The resulting Znr file",
"schema": {
"type": "file"
}
}
}
}
}
},
"x-ms-connector-metadata": [
{
"propertyName": "Website",
"propertyValue": "http://www.zanran.com"
},
{
"propertyName": "Privacy policy",
"propertyValue": "https://pdf.zanran.com/privacy-policy"
},
{
"propertyName": "Categories",
"propertyValue": "Content and Files;Productivity"
}
],
"securityDefinitions": {
"basic_auth": {
"type": "basic"
}
}
}

Просмотреть файл

@ -0,0 +1,36 @@
{
"properties": {
"connectionParameters": {
"username": {
"type": "securestring",
"uiDefinition": {
"displayName": "username",
"description": "The username for this api",
"tooltip": "Provide the username (if you don't yet have an account go to http://scaffolderlink.zanran.com/ to register)",
"constraints": {
"tabIndex": 2,
"clearText": true,
"required": "true"
}
}
},
"password": {
"type": "securestring",
"uiDefinition": {
"displayName": "password",
"description": "The password for this api",
"tooltip": "Provide the password",
"constraints": {
"tabIndex": 3,
"clearText": false,
"required": "true"
}
}
}
},
"iconBrandColor": "#000000",
"capabilities": [],
"publisher": "Zanran Ltd",
"stackOwner": "Zanran Ltd"
}
}

Просмотреть файл

@ -0,0 +1,16 @@
The Zanran Scaffolder server provides a web API which enables users to automatically extract content from PDFs and images. It is designed primarily for extracting from reports (annual accounts, scientific papers, market reports, etc.) Zanran's Scaffolder engine automatically determines the structure and layout of these documents and extracts content into constituent parts: blocks of text (e.g. paragraphs); tables; and images/graphics. It uses Computer Vision and Machine Learning and outputs data in structured formats like Excel and XML. It is scalable and does not require any manual intervention or pre-defined templates, any training or configuration. The software is language agnostic and it is built for automation / RPA environments to process millions of files.
## Prerequisites
This connector accesses a free service for low-volume extraction of text and tables from PDFs.
Prerequisite: a user name (email address) and password (which you invent).
## How to get credentials
Please register at: http://scaffolderlink.zanran.com/
## Known issues and limitations
We recommend testing using 'native' PDFs, rather than scanned ones - to remove any effects of OCR.