2018-12-11 14:20:22 +03:00
"""
Simple check list from AllenNLP repo : https : / / github . com / allenai / allennlp / blob / master / setup . py
To create the package for pypi .
2019-10-09 19:14:03 +03:00
1. Change the version in __init__ . py , setup . py as well as docs / source / conf . py .
2018-12-11 14:20:22 +03:00
2020-09-29 21:17:34 +03:00
2. Unpin specific versions from setup . py that use a git install .
2020-05-07 21:15:20 +03:00
2018-12-11 14:20:22 +03:00
2. Commit these changes with the message : " Release: VERSION "
3. Add a tag in git to mark the release : " git tag VERSION -m ' Adds tag VERSION for pypi ' "
Push the tag to git : git push - - tags origin master
4. Build both the sources and the wheel . Do not change anything in setup . py between
creating the wheel and the source distribution ( obviously ) .
2019-09-26 14:47:58 +03:00
For the wheel , run : " python setup.py bdist_wheel " in the top level directory .
2019-12-22 20:22:29 +03:00
( this will build a wheel for the python version you use to build it ) .
2018-12-11 14:20:22 +03:00
For the sources , run : " python setup.py sdist "
2019-09-26 14:47:58 +03:00
You should now have a / dist directory with both . whl and . tar . gz source versions .
2018-12-11 14:20:22 +03:00
5. Check that everything looks correct by uploading the package to the pypi test server :
twine upload dist / * - r pypitest
( pypi suggest using twine as other methods upload files via plaintext . )
2020-01-31 17:48:15 +03:00
You may have to specify the repository url , use the following command then :
twine upload dist / * - r pypitest - - repository - url = https : / / test . pypi . org / legacy /
2018-12-11 14:20:22 +03:00
Check that you can install it in a virtualenv by running :
2019-09-26 11:15:53 +03:00
pip install - i https : / / testpypi . python . org / pypi transformers
2018-12-11 14:20:22 +03:00
6. Upload the final version to actual pypi :
twine upload dist / * - r pypi
7. Copy the release notes from RELEASE . md to the tag in github once everything is looking hunky - dory .
2020-06-29 17:38:34 +03:00
8. Add the release version to docs / source / _static / js / custom . js and . circleci / deploy . sh
2020-02-19 19:57:17 +03:00
9. Update README . md to redirect to correct documentation .
2018-12-11 14:20:22 +03:00
"""
2019-12-21 17:57:32 +03:00
2019-12-23 22:06:39 +03:00
import shutil
from pathlib import Path
2018-11-15 22:56:10 +03:00
from setuptools import find_packages , setup
2019-12-04 08:52:23 +03:00
2019-12-23 22:06:39 +03:00
# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
stale_egg_info = Path ( __file__ ) . parent / " transformers.egg-info "
if stale_egg_info . exists ( ) :
print (
(
" Warning: {} exists. \n \n "
" If you recently updated transformers to 3.0 or later, this is expected, \n "
" but it may prevent transformers from installing in editable mode. \n \n "
" This directory is automatically generated by Python ' s packaging tools. \n "
" I will remove it now. \n \n "
" See https://github.com/pypa/pip/issues/5466 for details. \n "
) . format ( stale_egg_info )
)
shutil . rmtree ( stale_egg_info )
2019-12-22 22:28:26 +03:00
extras = { }
2020-08-17 07:00:23 +03:00
extras [ " ja " ] = [ " fugashi>=1.0 " , " ipadic>=1.0.0,<2.0 " , " unidic_lite>=1.0.7 " , " unidic>=1.0.2 " ]
2020-03-24 01:38:09 +03:00
extras [ " sklearn " ] = [ " scikit-learn " ]
2020-05-14 23:35:52 +03:00
# keras2onnx and onnxconverter-common version is specific through a commit until 1.7.0 lands on pypi
extras [ " tf " ] = [
2020-09-14 21:08:51 +03:00
" tensorflow>=2.0 " ,
2020-09-01 15:27:52 +03:00
" onnxconverter-common " ,
" keras2onnx "
# "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
# "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
2020-05-14 23:35:52 +03:00
]
extras [ " tf-cpu " ] = [
2020-09-14 21:08:51 +03:00
" tensorflow-cpu>=2.0 " ,
2020-09-01 15:27:52 +03:00
" onnxconverter-common " ,
" keras2onnx "
# "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
# "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
2020-05-14 23:35:52 +03:00
]
2020-09-14 21:08:51 +03:00
extras [ " torch " ] = [ " torch>=1.0 " ]
2020-08-25 20:24:40 +03:00
extras [ " onnxruntime " ] = [ " onnxruntime>=1.4.0 " , " onnxruntime-tools>=1.4.2 " ]
2019-12-22 22:28:26 +03:00
2020-01-28 03:58:00 +03:00
extras [ " serving " ] = [ " pydantic " , " uvicorn " , " fastapi " , " starlette " ]
2020-07-29 19:20:00 +03:00
extras [ " all " ] = extras [ " serving " ] + [ " tensorflow " , " torch " ]
2019-12-22 22:28:26 +03:00
2020-09-22 19:29:58 +03:00
extras [ " retrieval " ] = [ " faiss-cpu " , " datasets " ]
extras [ " testing " ] = [ " pytest " , " pytest-xdist " , " timeout-decorator " , " parameterized " , " psutil " ] + extras [ " retrieval " ]
2020-06-19 01:07:59 +03:00
# sphinx-rtd-theme==0.5.0 introduced big changes in the style.
2020-06-25 23:46:00 +03:00
extras [ " docs " ] = [ " recommonmark " , " sphinx " , " sphinx-markdown-tables " , " sphinx-rtd-theme==0.4.3 " , " sphinx-copybutton " ]
2020-09-30 20:44:58 +03:00
extras [ " quality " ] = [ " black >= 20.8b1 " , " isort >= 5.5.4 " , " flake8 >= 3.8.3 " ]
Replace mecab-python3 with fugashi for Japanese tokenization (#6086)
* Replace mecab-python3 with fugashi
This replaces mecab-python3 with fugashi for Japanese tokenization. I am
the maintainer of both projects.
Both projects are MeCab wrappers, so the underlying C++ code is the
same. fugashi is the newer wrapper and doesn't use SWIG, so for basic
use of the MeCab API it's easier to use.
This code insures the use of a version of ipadic installed via pip,
which should make versioning and tracking down issues easier.
fugashi has wheels for Windows, OSX, and Linux, which will help with
issues with installing old versions of mecab-python3 on Windows.
Compared to mecab-python3, because fugashi doesn't use SWIG, it doesn't
require a C++ runtime to be installed on Windows.
In adding this change I removed some code dealing with `cursor`,
`token_start`, and `token_end` variables. These variables didn't seem to
be used for anything, it is unclear to me why they were there.
I ran the tests and they passed, though I couldn't figure out how to run
the slow tests (`--runslow` gave an error) and didn't try testing with
Tensorflow.
* Style fix
* Remove unused variable
Forgot to delete this...
* Adapt doc with install instructions
* Fix typo
Co-authored-by: sgugger <sylvain.gugger@gmail.com>
Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2020-07-31 11:41:14 +03:00
extras [ " dev " ] = extras [ " testing " ] + extras [ " quality " ] + extras [ " ja " ] + [ " scikit-learn " , " tensorflow " , " torch " ]
2019-12-04 08:52:23 +03:00
2018-11-15 22:56:10 +03:00
setup (
2019-09-26 11:15:53 +03:00
name = " transformers " ,
2020-09-29 21:17:34 +03:00
version = " 3.3.1 " ,
2020-09-14 21:08:51 +03:00
author = " Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors " ,
2018-11-15 22:56:10 +03:00
author_email = " thomas@huggingface.co " ,
2019-09-26 14:52:24 +03:00
description = " State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch " ,
2019-12-21 17:46:46 +03:00
long_description = open ( " README.md " , " r " , encoding = " utf-8 " ) . read ( ) ,
2018-11-15 22:56:10 +03:00
long_description_content_type = " text/markdown " ,
2019-12-21 17:46:46 +03:00
keywords = " NLP deep learning transformer pytorch tensorflow BERT GPT GPT-2 google openai CMU " ,
license = " Apache " ,
2019-09-26 11:15:53 +03:00
url = " https://github.com/huggingface/transformers " ,
2019-12-22 21:14:07 +03:00
package_dir = { " " : " src " } ,
2019-12-22 15:54:22 +03:00
packages = find_packages ( " src " ) ,
2019-12-21 17:46:46 +03:00
install_requires = [
" numpy " ,
2020-07-18 15:20:11 +03:00
" tokenizers == 0.8.1.rc2 " ,
2020-03-25 18:10:20 +03:00
# dataclasses for Python versions that don't have it
" dataclasses;python_version< ' 3.7 ' " ,
2020-05-23 00:27:47 +03:00
# utilities from PyPA to e.g. compare versions
" packaging " ,
2019-12-22 22:33:08 +03:00
# filesystem locks e.g. to prevent parallel downloads
2019-12-21 17:46:46 +03:00
" filelock " ,
2019-12-22 22:33:08 +03:00
# for downloading models over HTTPS
2019-12-21 17:46:46 +03:00
" requests " ,
2019-12-22 22:33:08 +03:00
# progress bars in model download and training scripts
2020-01-18 00:49:28 +03:00
" tqdm >= 4.27 " ,
2019-12-22 22:33:08 +03:00
# for OpenAI GPT
2019-12-21 17:46:46 +03:00
" regex != 2019.12.17 " ,
2019-12-22 22:33:08 +03:00
# for XLNet
2020-07-02 16:23:00 +03:00
" sentencepiece != 0.1.92 " ,
2019-12-22 22:33:08 +03:00
# for XLM
2019-12-21 17:46:46 +03:00
" sacremoses " ,
2019-10-16 15:17:58 +03:00
] ,
2019-12-21 17:46:46 +03:00
extras_require = extras ,
2020-05-26 17:00:51 +03:00
entry_points = {
" console_scripts " : [ " transformers-cli=transformers.commands.transformers_cli:main " ]
} ,
2020-03-17 17:17:11 +03:00
python_requires = " >=3.6.0 " ,
2018-11-15 22:56:10 +03:00
classifiers = [
2019-12-22 21:14:07 +03:00
" Development Status :: 5 - Production/Stable " ,
" Intended Audience :: Developers " ,
" Intended Audience :: Education " ,
2019-12-21 17:46:46 +03:00
" Intended Audience :: Science/Research " ,
" License :: OSI Approved :: Apache Software License " ,
2019-12-22 21:14:07 +03:00
" Operating System :: OS Independent " ,
2019-12-21 17:46:46 +03:00
" Programming Language :: Python :: 3 " ,
2019-12-22 21:14:07 +03:00
" Programming Language :: Python :: 3.6 " ,
" Programming Language :: Python :: 3.7 " ,
2019-12-21 17:46:46 +03:00
" Topic :: Scientific/Engineering :: Artificial Intelligence " ,
2018-11-15 22:56:10 +03:00
] ,
)