fix: Some tidying and build fixes (#1984)

* fix: add interpret-ml install

* chore: remove precommit and extra requirements.txt

* chore: fix test errors

* chore: notebook pip install fixes
This commit is contained in:
Mark Hamilton 2023-06-14 10:49:39 +01:00 коммит произвёл GitHub
Родитель fa3734ba2f
Коммит b23f050599
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 39 добавлений и 50 удалений

Просмотреть файл

@ -1,13 +0,0 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
- id: black-jupyter

Просмотреть файл

@ -179,7 +179,7 @@ class AnalyzeDocumentSuite extends TransformerFuzzing[AnalyzeDocument] with Form
"Contoso\nAddress:\n1 Redmond way Suite\n6000 Redmond, WA\n99243\n" +
"Invoice For: Microsoft\n1020 Enterprise Way",
"CustomerAddress,CustomerAddressRecipient," +
"CustomerName,DueDate,InvoiceDate,InvoiceId,Items,VendorAddress,VendorName")
"CustomerName,DueDate,InvoiceDate,InvoiceId,InvoiceTotal,Items,VendorAddress,VendorName")
}
}

Просмотреть файл

@ -44,3 +44,5 @@ dependencies:
- huggingface-hub>=0.8.1
- langchain==0.0.151
- openai==0.27.5
- black==22.3.0
- black[jupyter]==22.3.0

Просмотреть файл

@ -1,7 +0,0 @@
#!/usr/bin/env bash
# If any command fails, exit immediately with that command's exit status
set -eo pipefail
echo "Running scalastyle.."
sbt scalastyle test:scalastyle

Просмотреть файл

@ -517,4 +517,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -27,6 +27,24 @@
" - If you are running it on Synapse, you'll need to [create an AML workspace and set up linked Service](https://microsoft.github.io/SynapseML/docs/next/mlflow/installation/). \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"import subprocess\n",
"import sys\n",
"\n",
"for package in [\"sqlparse\", \"raiwidgets\", \"interpret-community\"]:\n",
" subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", package])"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"metadata": {
@ -145,27 +163,26 @@
"experiment_name = f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\"\n",
"model_name = f\"isolation-forest-model\"\n",
"if running_on_synapse():\n",
" import subprocess\n",
" import sys\n",
" from pyspark.sql.functions import udf\n",
" from synapse.ml.core.platform import materializing_display as display\n",
"\n",
" # use regular display when running on interactive notebook\n",
" # from notebookutils.visualization import display\n",
"\n",
" for package in [\"sqlparse\", \"raiwidgets\", \"interpret-community\"]:\n",
" subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", package])"
" # from notebookutils.visualization import display"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Bootstrap Spark Session\n",
"spark = SparkSession.builder.getOrCreate()"
]
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
@ -564,7 +581,7 @@
"outputs": [],
"source": [
"# Define UDF\n",
"vec2array = udf(lambda vec: vec.toArray().tolist(), ArrayType(FloatType()))"
"vec2array = F.udf(lambda vec: vec.toArray().tolist(), ArrayType(FloatType()))"
]
},
{
@ -1013,4 +1030,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}

Просмотреть файл

@ -51,7 +51,6 @@
"\n",
"if running_on_synapse():\n",
" shell = TerminalInteractiveShell.instance()\n",
" shell.define_macro(\"foo\", \"\"\"a,b=10,20\"\"\")\n",
" from notebookutils.visualization import display\n",
"\n",
"\n",
@ -481,4 +480,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -65,12 +65,10 @@ jobs:
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt scalastyle test:scalastyle'
- task: UsePythonVersion@00
inputs:
versionSpec: '3.8'
- script: pip install -r requirements.txt
displayName: 'Install requirements'
- template: templates/conda.yml
- bash: |
set -e
source activate synapseml
black --diff --color . && black --check -q .
displayName: 'Python Style Check'

Просмотреть файл

@ -1,7 +0,0 @@
# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.
# Required to auto-format python code
black==22.3.0
black[jupyter]==22.3.0
pre-commit==2.19.0

Просмотреть файл

@ -24,7 +24,7 @@ from synapse.ml.train import *
from numpy import random
df = spark.createDataFrame(
[(random.rand(), random.rand()) for _ in range(4096)], ["label", "prediction"]
[(random.rand(), random.rand()) for _ in range(2048)], ["label", "prediction"]
)
cms = (ComputeModelStatistics()
@ -43,8 +43,8 @@ import com.microsoft.azure.synapse.ml.train._
import scala.util.Random
val rand = new Random(1337)
val df = (Seq.fill(4096)(rand.nextDouble())
.zip(Seq.fill(4096)(rand.nextDouble()))
val df = (Seq.fill(2048)(rand.nextDouble())
.zip(Seq.fill(2048)(rand.nextDouble()))
.toDF("label", "prediction"))
val cms = (new ComputeModelStatistics()