[hf_api] delete deprecated methods and tests (#10159)

* [hf_api] delete deprecated methods and tests

cc @lhoestq

* Update test_hf_api.py
This commit is contained in:
Julien Chaumond 2021-02-12 21:35:06 +01:00 коммит произвёл GitHub
Родитель 1321356bdf
Коммит eed31db948
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 1 добавлений и 135 удалений

Просмотреть файл

@ -39,25 +39,6 @@ class RepoObj:
self.size = size
class S3Obj:
"""
HuggingFace S3-based system, data structure that represents a file belonging to the current user.
"""
def __init__(self, filename: str, LastModified: str, ETag: str, Size: int, **kwargs):
self.filename = filename
self.LastModified = LastModified
self.ETag = ETag
self.Size = Size
class PresignedUrl:
def __init__(self, write: str, access: str, type: str, **kwargs):
self.write = write
self.access = access
self.type = type # mime-type to send to S3.
class ModelSibling:
"""
Data structure that represents a public file inside a model, accessible from huggingface.co
@ -77,16 +58,12 @@ class ModelInfo:
def __init__(
self,
modelId: Optional[str] = None, # id of model
author: Optional[str] = None,
downloads: Optional[int] = None,
tags: List[str] = [],
pipeline_tag: Optional[str] = None,
siblings: Optional[List[Dict]] = None, # list of files that constitute the model
**kwargs
):
self.modelId = modelId
self.author = author
self.downloads = downloads
self.tags = tags
self.pipeline_tag = pipeline_tag
self.siblings = [ModelSibling(**x) for x in siblings] if siblings is not None else None
@ -95,8 +72,6 @@ class ModelInfo:
class HfApi:
ALLOWED_S3_FILE_TYPES = ["datasets", "metrics"]
def __init__(self, endpoint=None):
self.endpoint = endpoint if endpoint is not None else ENDPOINT
@ -132,78 +107,6 @@ class HfApi:
r = requests.post(path, headers={"authorization": "Bearer {}".format(token)})
r.raise_for_status()
def presign(self, token: str, filetype: str, filename: str, organization: Optional[str] = None) -> PresignedUrl:
"""
HuggingFace S3-based system, used for datasets and metrics.
Call HF API to get a presigned url to upload `filename` to S3.
"""
assert filetype in self.ALLOWED_S3_FILE_TYPES, f"Please specify filetype from {self.ALLOWED_S3_FILE_TYPES}"
path = f"{self.endpoint}/api/{filetype}/presign"
r = requests.post(
path,
headers={"authorization": "Bearer {}".format(token)},
json={"filename": filename, "organization": organization},
)
r.raise_for_status()
d = r.json()
return PresignedUrl(**d)
def presign_and_upload(
self, token: str, filetype: str, filename: str, filepath: str, organization: Optional[str] = None
) -> str:
"""
HuggingFace S3-based system, used for datasets and metrics.
Get a presigned url, then upload file to S3.
Outputs: url: Read-only url for the stored file on S3.
"""
assert filetype in self.ALLOWED_S3_FILE_TYPES, f"Please specify filetype from {self.ALLOWED_S3_FILE_TYPES}"
urls = self.presign(token, filetype=filetype, filename=filename, organization=organization)
# streaming upload:
# https://2.python-requests.org/en/master/user/advanced/#streaming-uploads
#
# Even though we presign with the correct content-type,
# the client still has to specify it when uploading the file.
with open(filepath, "rb") as f:
pf = TqdmProgressFileReader(f)
data = f if pf.total_size > 0 else ""
r = requests.put(urls.write, data=data, headers={"content-type": urls.type})
r.raise_for_status()
pf.close()
return urls.access
def list_objs(self, token: str, filetype: str, organization: Optional[str] = None) -> List[S3Obj]:
"""
HuggingFace S3-based system, used for datasets and metrics.
Call HF API to list all stored files for user (or one of their organizations).
"""
assert filetype in self.ALLOWED_S3_FILE_TYPES, f"Please specify filetype from {self.ALLOWED_S3_FILE_TYPES}"
path = "{}/api/{}/listObjs".format(self.endpoint, filetype)
params = {"organization": organization} if organization is not None else None
r = requests.get(path, params=params, headers={"authorization": "Bearer {}".format(token)})
r.raise_for_status()
d = r.json()
return [S3Obj(**x) for x in d]
def delete_obj(self, token: str, filetype: str, filename: str, organization: Optional[str] = None):
"""
HuggingFace S3-based system, used for datasets and metrics.
Call HF API to delete a file stored by user
"""
assert filetype in self.ALLOWED_S3_FILE_TYPES, f"Please specify filetype from {self.ALLOWED_S3_FILE_TYPES}"
path = "{}/api/{}/deleteObj".format(self.endpoint, filetype)
r = requests.delete(
path,
headers={"authorization": "Bearer {}".format(token)},
json={"filename": filename, "organization": organization},
)
r.raise_for_status()
def model_list(self) -> List[ModelInfo]:
"""
Get the public list of all the models on huggingface.co

Просмотреть файл

@ -20,9 +20,8 @@ import subprocess
import time
import unittest
import requests
from requests.exceptions import HTTPError
from transformers.hf_api import HfApi, HfFolder, ModelInfo, PresignedUrl, RepoObj, S3Obj
from transformers.hf_api import HfApi, HfFolder, ModelInfo, RepoObj
from transformers.testing_utils import require_git_lfs
@ -80,42 +79,6 @@ class HfApiEndpointsTest(HfApiCommonTest):
self.assertEqual(user, USER)
self.assertIsInstance(orgs, list)
def test_presign_invalid_org(self):
with self.assertRaises(HTTPError):
_ = self._api.presign(
token=self._token, filetype="datasets", filename="nested/fake_org.txt", organization="fake"
)
def test_presign_valid_org(self):
urls = self._api.presign(
token=self._token, filetype="datasets", filename="nested/valid_org.txt", organization="valid_org"
)
self.assertIsInstance(urls, PresignedUrl)
def test_presign(self):
for FILE_KEY, FILE_PATH in FILES:
urls = self._api.presign(token=self._token, filetype="datasets", filename=FILE_KEY)
self.assertIsInstance(urls, PresignedUrl)
self.assertEqual(urls.type, "text/plain")
def test_presign_and_upload(self):
for FILE_KEY, FILE_PATH in FILES:
access_url = self._api.presign_and_upload(
token=self._token, filetype="datasets", filename=FILE_KEY, filepath=FILE_PATH
)
self.assertIsInstance(access_url, str)
with open(FILE_PATH, "r") as f:
body = f.read()
r = requests.get(access_url)
self.assertEqual(r.text, body)
def test_list_objs(self):
objs = self._api.list_objs(token=self._token, filetype="datasets")
self.assertIsInstance(objs, list)
if len(objs) > 0:
o = objs[-1]
self.assertIsInstance(o, S3Obj)
def test_list_repos_objs(self):
objs = self._api.list_repos_objs(token=self._token)
self.assertIsInstance(objs, list)