зеркало из https://github.com/mozilla/TTS.git
Merge branch 'pr235' into dev
This commit is contained in:
Коммит
97c7a8ff13
|
@ -0,0 +1,2 @@
|
|||
linters:
|
||||
- pylint:
|
|
@ -0,0 +1,585 @@
|
|||
[MASTER]
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code.
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
|
||||
# number of processors available to use.
|
||||
jobs=1
|
||||
|
||||
# Control the amount of potential inferred values when inferring a single
|
||||
# object. This can help the performance when dealing with large functions or
|
||||
# complex, nested conditions.
|
||||
limit-inference-results=100
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||
# user-friendly hints instead of false-positive error messages.
|
||||
suggestion-mode=yes
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
|
||||
confidence=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once). You can also use "--disable=all" to
|
||||
# disable everything first and then reenable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||
# --disable=W".
|
||||
disable=missing-docstring,
|
||||
line-too-long,
|
||||
fixme,
|
||||
wrong-import-order,
|
||||
ungrouped-imports,
|
||||
wrong-import-position,
|
||||
import-error,
|
||||
invalid-name,
|
||||
too-many-instance-attributes,
|
||||
arguments-differ,
|
||||
no-name-in-module,
|
||||
no-member,
|
||||
unsubscriptable-object,
|
||||
print-statement,
|
||||
parameter-unpacking,
|
||||
unpacking-in-except,
|
||||
old-raise-syntax,
|
||||
backtick,
|
||||
long-suffix,
|
||||
old-ne-operator,
|
||||
old-octal-literal,
|
||||
import-star-module-level,
|
||||
non-ascii-bytes-literal,
|
||||
raw-checker-failed,
|
||||
bad-inline-option,
|
||||
locally-disabled,
|
||||
file-ignored,
|
||||
suppressed-message,
|
||||
useless-suppression,
|
||||
deprecated-pragma,
|
||||
use-symbolic-message-instead,
|
||||
useless-object-inheritance,
|
||||
too-few-public-methods,
|
||||
too-many-branches,
|
||||
too-many-arguments,
|
||||
too-many-locals,
|
||||
too-many-statements,
|
||||
apply-builtin,
|
||||
basestring-builtin,
|
||||
buffer-builtin,
|
||||
cmp-builtin,
|
||||
coerce-builtin,
|
||||
execfile-builtin,
|
||||
file-builtin,
|
||||
long-builtin,
|
||||
raw_input-builtin,
|
||||
reduce-builtin,
|
||||
standarderror-builtin,
|
||||
unicode-builtin,
|
||||
xrange-builtin,
|
||||
coerce-method,
|
||||
delslice-method,
|
||||
getslice-method,
|
||||
setslice-method,
|
||||
no-absolute-import,
|
||||
old-division,
|
||||
dict-iter-method,
|
||||
dict-view-method,
|
||||
next-method-called,
|
||||
metaclass-assignment,
|
||||
indexing-exception,
|
||||
raising-string,
|
||||
reload-builtin,
|
||||
oct-method,
|
||||
hex-method,
|
||||
nonzero-method,
|
||||
cmp-method,
|
||||
input-builtin,
|
||||
round-builtin,
|
||||
intern-builtin,
|
||||
unichr-builtin,
|
||||
map-builtin-not-iterating,
|
||||
zip-builtin-not-iterating,
|
||||
range-builtin-not-iterating,
|
||||
filter-builtin-not-iterating,
|
||||
using-cmp-argument,
|
||||
eq-without-hash,
|
||||
div-method,
|
||||
idiv-method,
|
||||
rdiv-method,
|
||||
exception-message-attribute,
|
||||
invalid-str-codec,
|
||||
sys-max-int,
|
||||
bad-python3-import,
|
||||
deprecated-string-function,
|
||||
deprecated-str-translate-call,
|
||||
deprecated-itertools-function,
|
||||
deprecated-types-field,
|
||||
next-method-defined,
|
||||
dict-items-not-iterating,
|
||||
dict-keys-not-iterating,
|
||||
dict-values-not-iterating,
|
||||
deprecated-operator-function,
|
||||
deprecated-urllib-function,
|
||||
xreadlines-attribute,
|
||||
deprecated-sys-function,
|
||||
exception-escape,
|
||||
comprehension-escape
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once). See also the "--disable" option for examples.
|
||||
enable=c-extension-no-member
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details.
|
||||
#msg-template=
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, json
|
||||
# and msvs (visual studio). You can also give a reporter class, e.g.
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
output-format=text
|
||||
|
||||
# Tells whether to display a full report or only the messages.
|
||||
reports=no
|
||||
|
||||
# Activate the evaluation score.
|
||||
score=yes
|
||||
|
||||
|
||||
[REFACTORING]
|
||||
|
||||
# Maximum number of nested blocks for function / method body
|
||||
max-nested-blocks=5
|
||||
|
||||
# Complete name of functions that never returns. When checking for
|
||||
# inconsistent-return-statements if a never returning function is called then
|
||||
# it will be considered as an explicit return statement and no message will be
|
||||
# printed.
|
||||
never-returning-functions=sys.exit
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# Format style used to check logging format string. `old` means using %
|
||||
# formatting, while `new` is for `{}` formatting.
|
||||
logging-format-style=old
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format.
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Limits count of emitted suggestions for spelling mistakes.
|
||||
max-spelling-suggestions=4
|
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it working
|
||||
# install python-enchant package..
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to indicated private dictionary in
|
||||
# --spelling-private-dict-file option instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,
|
||||
XXX,
|
||||
TODO
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# List of decorators that produce context managers, such as
|
||||
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||
# produce valid context managers.
|
||||
contextmanager-decorators=contextlib.contextmanager
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# Tells whether to warn about missing members when the owner of the attribute
|
||||
# is inferred to be None.
|
||||
ignore-none=yes
|
||||
|
||||
# This flag controls whether pylint should warn about no-member and similar
|
||||
# checks whenever an opaque object is returned when inferring. The inference
|
||||
# can return multiple potential results while evaluating a Python object, but
|
||||
# some branches might not be evaluated, which results in partial inference. In
|
||||
# that case, it might be useful to still emit no-member and other checks for
|
||||
# the rest of the inferred objects.
|
||||
ignore-on-opaque-inference=yes
|
||||
|
||||
# List of class names for which member attributes should not be checked (useful
|
||||
# for classes with dynamically set attributes). This supports the use of
|
||||
# qualified names.
|
||||
ignored-classes=optparse.Values,thread._local,_thread._local
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis. It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
# Show a hint with possible names when a member name was not found. The aspect
|
||||
# of finding the hint is based on edit distance.
|
||||
missing-member-hint=yes
|
||||
|
||||
# The minimum edit distance a name should have in order to be considered a
|
||||
# similar match for a missing member name.
|
||||
missing-member-hint-distance=1
|
||||
|
||||
# The total number of similar names that should be taken in consideration when
|
||||
# showing a hint for a missing member.
|
||||
missing-member-max-choices=1
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid defining new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# Tells whether unused global variables should be treated as a violation.
|
||||
allow-global-unused-variables=yes
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,
|
||||
_cb
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expected to
|
||||
# not be used).
|
||||
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore.
|
||||
ignored-argument-names=_.*|^ignored_|^unused_
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# List of qualified module names which can have objects that can redefine
|
||||
# builtins.
|
||||
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=4
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Maximum number of lines in a module.
|
||||
max-module-lines=1000
|
||||
|
||||
# List of optional constructs for which whitespace checking is disabled. `dict-
|
||||
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
|
||||
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
|
||||
# `empty-line` allows space-only lines.
|
||||
no-space-check=trailing-comma,
|
||||
dict-separator
|
||||
|
||||
# Allow the body of a class to be on the same line as the declaration if body
|
||||
# contains single statement.
|
||||
single-line-class-stmt=no
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Ignore imports when computing similarities.
|
||||
ignore-imports=no
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Naming style matching correct argument names.
|
||||
argument-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct argument names. Overrides argument-
|
||||
# naming-style.
|
||||
argument-rgx=[a-z_][a-z0-9_]{0,30}$
|
||||
|
||||
# Naming style matching correct attribute names.
|
||||
attr-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct attribute names. Overrides attr-naming-
|
||||
# style.
|
||||
#attr-rgx=
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma.
|
||||
bad-names=
|
||||
|
||||
# Naming style matching correct class attribute names.
|
||||
class-attribute-naming-style=any
|
||||
|
||||
# Regular expression matching correct class attribute names. Overrides class-
|
||||
# attribute-naming-style.
|
||||
#class-attribute-rgx=
|
||||
|
||||
# Naming style matching correct class names.
|
||||
class-naming-style=PascalCase
|
||||
|
||||
# Regular expression matching correct class names. Overrides class-naming-
|
||||
# style.
|
||||
#class-rgx=
|
||||
|
||||
# Naming style matching correct constant names.
|
||||
const-naming-style=UPPER_CASE
|
||||
|
||||
# Regular expression matching correct constant names. Overrides const-naming-
|
||||
# style.
|
||||
#const-rgx=
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
# Naming style matching correct function names.
|
||||
function-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct function names. Overrides function-
|
||||
# naming-style.
|
||||
#function-rgx=
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma.
|
||||
good-names=i,
|
||||
j,
|
||||
k,
|
||||
x,
|
||||
ex,
|
||||
Run,
|
||||
_
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name.
|
||||
include-naming-hint=no
|
||||
|
||||
# Naming style matching correct inline iteration names.
|
||||
inlinevar-naming-style=any
|
||||
|
||||
# Regular expression matching correct inline iteration names. Overrides
|
||||
# inlinevar-naming-style.
|
||||
#inlinevar-rgx=
|
||||
|
||||
# Naming style matching correct method names.
|
||||
method-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct method names. Overrides method-naming-
|
||||
# style.
|
||||
#method-rgx=
|
||||
|
||||
# Naming style matching correct module names.
|
||||
module-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct module names. Overrides module-naming-
|
||||
# style.
|
||||
#module-rgx=
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=^_
|
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||
# to this list to register other decorators that produce valid properties.
|
||||
# These decorators are taken in consideration only for invalid-name.
|
||||
property-classes=abc.abstractproperty
|
||||
|
||||
# Naming style matching correct variable names.
|
||||
variable-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct variable names. Overrides variable-
|
||||
# naming-style.
|
||||
variable-rgx=[a-z_][a-z0-9_]{0,30}$
|
||||
|
||||
|
||||
[STRING]
|
||||
|
||||
# This flag controls whether the implicit-str-concat-in-sequence should
|
||||
# generate a warning on implicit string concatenation in sequences defined over
|
||||
# several lines.
|
||||
check-str-concat-over-line-jumps=no
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Allow wildcard imports from modules that define __all__.
|
||||
allow-wildcard-with-all=no
|
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||
# 3 compatible code, which means that the block might have code that exists
|
||||
# only in one or another interpreter, leading to false positives when analysed.
|
||||
analyse-fallback-blocks=no
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma.
|
||||
deprecated-modules=optparse,tkinter.tix
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled).
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled).
|
||||
import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled).
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,
|
||||
__new__,
|
||||
setUp
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,
|
||||
_fields,
|
||||
_replace,
|
||||
_source,
|
||||
_make
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=cls
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method.
|
||||
max-args=5
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Maximum number of boolean expressions in an if statement.
|
||||
max-bool-expr=5
|
||||
|
||||
# Maximum number of branch for function / method body.
|
||||
max-branches=12
|
||||
|
||||
# Maximum number of locals for function / method body.
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
# Maximum number of return / yield for function / method body.
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of statements in function / method body.
|
||||
max-statements=50
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "BaseException, Exception".
|
||||
overgeneral-exceptions=BaseException,
|
||||
Exception
|
|
@ -0,0 +1,21 @@
|
|||
language: python
|
||||
|
||||
git:
|
||||
quiet: true
|
||||
|
||||
cache: pip
|
||||
before_cache:
|
||||
- rm ~/.cache/pip/log/debug.log
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- name: "Lint check"
|
||||
python: "3.6"
|
||||
install: pip install --quiet --upgrade cardboardlint pylint
|
||||
env: TEST_SUITE="lint"
|
||||
- name: "Unit tests"
|
||||
python: "3.6"
|
||||
install: pip install --quiet -r requirements.txt
|
||||
env: TEST_SUITE="unittest"
|
||||
|
||||
script: ./.travis/script
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
git remote set-branches --add origin $TRAVIS_BRANCH
|
||||
git fetch
|
||||
|
||||
if [[ ( "$TRAVIS_PULL_REQUEST" != "false" ) && ( "$TEST_SUITE" == "lint" ) ]]; then
|
||||
# Run cardboardlinter, in case of pull requests
|
||||
cardboardlinter --refspec origin/$TRAVIS_BRANCH -n auto
|
||||
fi
|
||||
|
||||
if [[ "$TEST_SUITE" == "unittest" ]]; then
|
||||
# Run tests on all pushes
|
||||
python -m unittest
|
||||
fi
|
|
@ -0,0 +1,51 @@
|
|||
# Contribution guidelines
|
||||
|
||||
This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
|
||||
|
||||
Before making a Pull Request, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the differences between your work and master, you can use the follow command:
|
||||
|
||||
```bash
|
||||
pip install pylint cardboardlint
|
||||
cardboardlinter --refspec master
|
||||
```
|
||||
|
||||
This will compare the code against master and run the linter on all the changes. To run it automatically as a git pre-commit hook, you can do do the following:
|
||||
|
||||
```bash
|
||||
cat <<\EOF > .git/hooks/pre-commit
|
||||
#!/bin/bash
|
||||
if [ ! -x "$(command -v cardboardlinter)" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# First, stash index and work dir, keeping only the
|
||||
# to-be-committed changes in the working directory.
|
||||
echo "Stashing working tree changes..." 1>&2
|
||||
old_stash=$(git rev-parse -q --verify refs/stash)
|
||||
git stash save -q --keep-index
|
||||
new_stash=$(git rev-parse -q --verify refs/stash)
|
||||
|
||||
# If there were no changes (e.g., `--amend` or `--allow-empty`)
|
||||
# then nothing was stashed, and we should skip everything,
|
||||
# including the tests themselves. (Presumably the tests passed
|
||||
# on the previous commit, so there is no need to re-run them.)
|
||||
if [ "$old_stash" = "$new_stash" ]; then
|
||||
echo "No changes, skipping lint." 1>&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Run tests
|
||||
cardboardlinter --refspec HEAD -n auto
|
||||
status=$?
|
||||
|
||||
# Restore changes
|
||||
echo "Restoring working tree changes..." 1>&2
|
||||
git reset --hard -q && git stash apply --index -q && git stash drop -q
|
||||
|
||||
# Exit with status from test-run: nonzero prevents commit
|
||||
exit $status
|
||||
EOF
|
||||
chmod +x .git/hooks/pre-commit
|
||||
```
|
||||
|
||||
This will run the linters on just the changes made in your commit.
|
|
@ -1,12 +1,10 @@
|
|||
# visualisation tools for mimic2
|
||||
# visualisation tools for mimic2
|
||||
import matplotlib.pyplot as plt
|
||||
from statistics import stdev, mode, mean, median
|
||||
from statistics import StatisticsError
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import csv
|
||||
import copy
|
||||
import seaborn as sns
|
||||
import random
|
||||
from text.cmudict import CMUDict
|
||||
|
@ -32,7 +30,7 @@ def append_data_statistics(meta_data):
|
|||
std = stdev(
|
||||
d["audio_len"] for d in data
|
||||
)
|
||||
except:
|
||||
except StatisticsError:
|
||||
std = 0
|
||||
|
||||
meta_data[char_cnt]["mean"] = mean_audio_len
|
||||
|
@ -114,7 +112,7 @@ def plot(meta_data, save_path=None):
|
|||
y_mode = graph_data['y_mode']
|
||||
y_median = graph_data['y_median']
|
||||
y_num_samples = graph_data['y_num_samples']
|
||||
|
||||
|
||||
plt.figure()
|
||||
plt.plot(x, y_avg, 'ro')
|
||||
plt.xlabel("character lengths", fontsize=30)
|
||||
|
@ -122,7 +120,7 @@ def plot(meta_data, save_path=None):
|
|||
if save:
|
||||
name = "char_len_vs_avg_secs"
|
||||
plt.savefig(os.path.join(save_path, name))
|
||||
|
||||
|
||||
plt.figure()
|
||||
plt.plot(x, y_mode, 'ro')
|
||||
plt.xlabel("character lengths", fontsize=30)
|
||||
|
@ -182,12 +180,12 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):
|
|||
for key in phonemes:
|
||||
x.append(key)
|
||||
y.append(phonemes[key])
|
||||
|
||||
|
||||
plt.figure()
|
||||
plt.rcParams["figure.figsize"] = (50, 20)
|
||||
plot = sns.barplot(x, y)
|
||||
barplot = sns.barplot(x, y)
|
||||
if save_path:
|
||||
fig = plot.get_figure()
|
||||
fig = barplot.get_figure()
|
||||
fig.savefig(os.path.join(save_path, "phoneme_dist"))
|
||||
|
||||
|
||||
|
@ -201,7 +199,7 @@ def main():
|
|||
'--save_to', help='path to save charts of data to'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--cmu_dict_path', help='give cmudict-0.7b to see phoneme distribution'
|
||||
'--cmu_dict_path', help='give cmudict-0.7b to see phoneme distribution'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
meta_data = process_meta_data(args.train_file_path)
|
||||
|
@ -210,8 +208,8 @@ def main():
|
|||
if args.cmu_dict_path:
|
||||
plt.rcParams["figure.figsize"] = (30, 10)
|
||||
plot_phonemes(args.train_file_path, args.cmu_dict_path, args.save_to)
|
||||
|
||||
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import collections
|
||||
import librosa
|
||||
import torch
|
||||
import random
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from utils.text import text_to_sequence, phoneme_to_sequence
|
||||
from utils.data import (prepare_data, pad_per_step, prepare_tensor,
|
||||
prepare_stop_target)
|
||||
from utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||
|
||||
|
||||
class MyDataset(Dataset):
|
||||
|
@ -31,14 +29,14 @@ class MyDataset(Dataset):
|
|||
text_cleaner (str): text cleaner used for the dataset.
|
||||
ap (TTS.utils.AudioProcessor): audio processor object.
|
||||
meta_data (list): list of dataset instances.
|
||||
batch_group_size (int): (0) range of batch randomization after sorting
|
||||
sequences by length.
|
||||
min_seq_len (int): (0) minimum sequence length to be processed
|
||||
batch_group_size (int): (0) range of batch randomization after sorting
|
||||
sequences by length.
|
||||
min_seq_len (int): (0) minimum sequence length to be processed
|
||||
by the loader.
|
||||
max_seq_len (int): (float("inf")) maximum sequence length.
|
||||
use_phonemes (bool): (true) if true, text converted to phonemes.
|
||||
phoneme_cache_path (str): path to cache phoneme features.
|
||||
phoneme_language (str): one the languages from
|
||||
phoneme_cache_path (str): path to cache phoneme features.
|
||||
phoneme_language (str): one the languages from
|
||||
https://github.com/bootphon/phonemizer#languages
|
||||
enable_eos_bos (bool): enable end of sentence and beginning of sentences characters.
|
||||
verbose (bool): print diagnostic information.
|
||||
|
@ -70,7 +68,8 @@ class MyDataset(Dataset):
|
|||
audio = self.ap.load_wav(filename)
|
||||
return audio
|
||||
|
||||
def load_np(self, filename):
|
||||
@staticmethod
|
||||
def load_np(filename):
|
||||
data = np.load(filename).astype('float32')
|
||||
return data
|
||||
|
||||
|
@ -81,7 +80,7 @@ class MyDataset(Dataset):
|
|||
if os.path.isfile(tmp_path):
|
||||
try:
|
||||
text = np.load(tmp_path)
|
||||
except:
|
||||
except (IOError, ValueError):
|
||||
print(" > ERROR: phoneme connot be loaded for {}. Recomputing.".format(wav_file))
|
||||
text = np.asarray(
|
||||
phoneme_to_sequence(
|
||||
|
@ -120,7 +119,7 @@ class MyDataset(Dataset):
|
|||
def sort_items(self):
|
||||
r"""Sort instances based on text length in ascending order"""
|
||||
lengths = np.array([len(ins[0]) for ins in self.items])
|
||||
|
||||
|
||||
idxs = np.argsort(lengths)
|
||||
new_items = []
|
||||
ignored = []
|
||||
|
@ -144,10 +143,10 @@ class MyDataset(Dataset):
|
|||
print(" | > Max length sequence: {}".format(np.max(lengths)))
|
||||
print(" | > Min length sequence: {}".format(np.min(lengths)))
|
||||
print(" | > Avg length sequence: {}".format(np.mean(lengths)))
|
||||
print(" | > Num. instances discarded by max-min seq limits: {}".format(
|
||||
len(ignored), self.min_seq_len))
|
||||
print(" | > Num. instances discarded by max-min (max={}, min={}) seq limits: {}".format(
|
||||
self.max_seq_len, self.min_seq_len, len(ignored)))
|
||||
print(" | > Batch group size: {}.".format(self.batch_group_size))
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return len(self.items)
|
||||
|
||||
|
@ -176,7 +175,7 @@ class MyDataset(Dataset):
|
|||
]
|
||||
text = [batch[idx]['text'] for idx in ids_sorted_decreasing]
|
||||
speaker_name = [batch[idx]['speaker_name']
|
||||
for idx in ids_sorted_decreasing]
|
||||
for idx in ids_sorted_decreasing]
|
||||
|
||||
mel = [self.ap.melspectrogram(w).astype('float32') for w in wav]
|
||||
linear = [self.ap.spectrogram(w).astype('float32') for w in wav]
|
||||
|
|
|
@ -11,7 +11,7 @@ def get_preprocessor_by_name(name):
|
|||
|
||||
|
||||
def tweb(root_path, meta_file):
|
||||
"""Normalize TWEB dataset.
|
||||
"""Normalize TWEB dataset.
|
||||
https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
|
||||
"""
|
||||
txt_file = os.path.join(root_path, meta_file)
|
||||
|
@ -123,9 +123,9 @@ def nancy(root_path, meta_file):
|
|||
speaker_name = "nancy"
|
||||
with open(txt_file, 'r') as ttf:
|
||||
for line in ttf:
|
||||
id = line.split()[1]
|
||||
utt_id = line.split()[1]
|
||||
text = line[line.find('"') + 1:line.rfind('"') - 1]
|
||||
wav_file = os.path.join(root_path, "wavn", id + ".wav")
|
||||
wav_file = os.path.join(root_path, "wavn", utt_id + ".wav")
|
||||
items.append([text, wav_file, speaker_name])
|
||||
return items
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
# edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
import time
|
||||
import subprocess
|
||||
|
@ -19,6 +18,7 @@ class DistributedSampler(Sampler):
|
|||
"""
|
||||
|
||||
def __init__(self, dataset, num_replicas=None, rank=None):
|
||||
super(DistributedSampler, self).__init__(dataset)
|
||||
if num_replicas is None:
|
||||
if not dist.is_available():
|
||||
raise RuntimeError("Requires distributed package to be available")
|
||||
|
@ -54,12 +54,6 @@ class DistributedSampler(Sampler):
|
|||
self.epoch = epoch
|
||||
|
||||
|
||||
def reduce_tensor(tensor, n_gpus):
|
||||
rt = tensor.clone()
|
||||
dist.all_reduce(rt, op=dist.reduce_op.SUM)
|
||||
rt /= n_gpus
|
||||
return rt
|
||||
|
||||
def reduce_tensor(tensor, num_gpus):
|
||||
rt = tensor.clone()
|
||||
dist.all_reduce(rt, op=dist.reduce_op.SUM)
|
||||
|
@ -91,7 +85,7 @@ def apply_gradient_allreduce(module):
|
|||
dist.broadcast(p, 0)
|
||||
|
||||
def allreduce_params():
|
||||
if (module.needs_reduction):
|
||||
if module.needs_reduction:
|
||||
module.needs_reduction = False
|
||||
# bucketing params based on value types
|
||||
buckets = {}
|
||||
|
@ -113,23 +107,39 @@ def apply_gradient_allreduce(module):
|
|||
|
||||
for param in list(module.parameters()):
|
||||
|
||||
def allreduce_hook(*unused):
|
||||
def allreduce_hook(*_):
|
||||
Variable._execution_engine.queue_callback(allreduce_params)
|
||||
|
||||
if param.requires_grad:
|
||||
param.register_hook(allreduce_hook)
|
||||
|
||||
def set_needs_reduction(self, input, output):
|
||||
def set_needs_reduction(self, *_):
|
||||
self.needs_reduction = True
|
||||
|
||||
module.register_forward_hook(set_needs_reduction)
|
||||
return module
|
||||
|
||||
|
||||
def main(args):
|
||||
def main():
|
||||
"""
|
||||
Call train.py as a new process and pass command arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--restore_path',
|
||||
type=str,
|
||||
help='Folder path to checkpoints',
|
||||
default='')
|
||||
parser.add_argument(
|
||||
'--config_path',
|
||||
type=str,
|
||||
help='path to config file for training',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--data_path', type=str, help='dataset path.', default='')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
CONFIG = load_config(args.config_path)
|
||||
OUT_PATH = create_experiment_folder(CONFIG.output_path, CONFIG.run_name,
|
||||
True)
|
||||
|
@ -150,7 +160,7 @@ def main(args):
|
|||
if not os.path.isdir(stdout_path):
|
||||
os.makedirs(stdout_path)
|
||||
os.chmod(stdout_path, 0o775)
|
||||
|
||||
|
||||
# run processes
|
||||
processes = []
|
||||
for i in range(num_gpus):
|
||||
|
@ -159,7 +169,7 @@ def main(args):
|
|||
command[6] = '--rank={}'.format(i)
|
||||
stdout = None if i == 0 else open(
|
||||
os.path.join(stdout_path, "process_{}.log".format(i)), "w")
|
||||
p = subprocess.Popen(['python3'.format(i)] + command, stdout=stdout, env=my_env)
|
||||
p = subprocess.Popen(['python3'] + command, stdout=stdout, env=my_env)
|
||||
processes.append(p)
|
||||
print(command)
|
||||
|
||||
|
@ -168,19 +178,4 @@ def main(args):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--restore_path',
|
||||
type=str,
|
||||
help='Folder path to checkpoints',
|
||||
default='')
|
||||
parser.add_argument(
|
||||
'--config_path',
|
||||
type=str,
|
||||
help='path to config file for training',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--data_path', type=str, help='dataset path.', default='')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
main()
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from math import sqrt
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
from torch import nn
|
||||
from torch.autograd import Variable
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
|
@ -107,6 +106,8 @@ class LocationLayer(nn.Module):
|
|||
|
||||
|
||||
class Attention(nn.Module):
|
||||
# Pylint gets confused by PyTorch conventions here
|
||||
#pylint: disable=attribute-defined-outside-init
|
||||
def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
|
||||
location_attention, attention_location_n_filters,
|
||||
attention_location_kernel_size, windowing, norm, forward_attn,
|
||||
|
@ -262,4 +263,4 @@ class Attention(nn.Module):
|
|||
context = torch.bmm(alignment.unsqueeze(1), inputs)
|
||||
context = context.squeeze(1)
|
||||
self.attention_weights = alignment
|
||||
return context
|
||||
return context
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# coding: utf-8
|
||||
import torch
|
||||
from torch import nn
|
||||
# import torch
|
||||
# from torch import nn
|
||||
|
||||
# class StopProjection(nn.Module):
|
||||
# r""" Simple projection layer to predict the "stop token"
|
||||
|
|
|
@ -77,10 +77,11 @@ class ReferenceEncoder(nn.Module):
|
|||
|
||||
return out.squeeze(0)
|
||||
|
||||
def calculate_post_conv_height(self, height, kernel_size, stride, pad,
|
||||
@staticmethod
|
||||
def calculate_post_conv_height(height, kernel_size, stride, pad,
|
||||
n_convs):
|
||||
"""Height of spec after n convolutions with fixed kernel/stride/pad."""
|
||||
for i in range(n_convs):
|
||||
for _ in range(n_convs):
|
||||
height = (height - kernel_size + 2 * pad) // stride + 1
|
||||
return height
|
||||
|
||||
|
@ -165,4 +166,4 @@ class MultiHeadAttention(nn.Module):
|
|||
torch.split(out, 1, dim=0),
|
||||
dim=3).squeeze(0) # [N, T_q, num_units]
|
||||
|
||||
return out
|
||||
return out
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
import torch
|
||||
from torch.nn import functional
|
||||
from torch import nn
|
||||
from torch.nn import functional
|
||||
from utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
class L1LossMasked(nn.Module):
|
||||
def __init__(self):
|
||||
super(L1LossMasked, self).__init__()
|
||||
|
||||
def forward(self, input, target, length):
|
||||
def forward(self, x, target, length):
|
||||
"""
|
||||
Args:
|
||||
input: A Variable containing a FloatTensor of size
|
||||
x: A Variable containing a FloatTensor of size
|
||||
(batch, max_len, dim) which contains the
|
||||
unnormalized probability for each class.
|
||||
target: A Variable containing a LongTensor of size
|
||||
|
@ -26,21 +22,18 @@ class L1LossMasked(nn.Module):
|
|||
target.requires_grad = False
|
||||
mask = sequence_mask(
|
||||
sequence_length=length, max_len=target.size(1)).unsqueeze(2).float()
|
||||
mask = mask.expand_as(input)
|
||||
mask = mask.expand_as(x)
|
||||
loss = functional.l1_loss(
|
||||
input * mask, target * mask, reduction="sum")
|
||||
x * mask, target * mask, reduction="sum")
|
||||
loss = loss / mask.sum()
|
||||
return loss
|
||||
|
||||
|
||||
class MSELossMasked(nn.Module):
|
||||
def __init__(self):
|
||||
super(MSELossMasked, self).__init__()
|
||||
|
||||
def forward(self, input, target, length):
|
||||
def forward(self, x, target, length):
|
||||
"""
|
||||
Args:
|
||||
input: A Variable containing a FloatTensor of size
|
||||
x: A Variable containing a FloatTensor of size
|
||||
(batch, max_len, dim) which contains the
|
||||
unnormalized probability for each class.
|
||||
target: A Variable containing a LongTensor of size
|
||||
|
@ -55,9 +48,8 @@ class MSELossMasked(nn.Module):
|
|||
target.requires_grad = False
|
||||
mask = sequence_mask(
|
||||
sequence_length=length, max_len=target.size(1)).unsqueeze(2).float()
|
||||
mask = mask.expand_as(input)
|
||||
mask = mask.expand_as(x)
|
||||
loss = functional.mse_loss(
|
||||
input * mask, target * mask, reduction="sum")
|
||||
x * mask, target * mask, reduction="sum")
|
||||
loss = loss / mask.sum()
|
||||
return loss
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ class CBHG(nn.Module):
|
|||
# (B, in_features, T_in)
|
||||
if x.size(-1) == self.in_features:
|
||||
x = x.transpose(1, 2)
|
||||
T = x.size(-1)
|
||||
# T = x.size(-1)
|
||||
# (B, hid_features*K, T_in)
|
||||
# Concat conv1d bank outputs
|
||||
outs = []
|
||||
|
@ -261,7 +261,7 @@ class PostCBHG(nn.Module):
|
|||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
r"""Decoder module.
|
||||
"""Decoder module.
|
||||
|
||||
Args:
|
||||
in_features (int): input vector (encoder output) sample size.
|
||||
|
@ -270,6 +270,8 @@ class Decoder(nn.Module):
|
|||
memory_size (int): size of the past window. if <= 0 memory_size = r
|
||||
TODO: arguments
|
||||
"""
|
||||
# Pylint gets confused by PyTorch conventions here
|
||||
#pylint: disable=attribute-defined-outside-init
|
||||
|
||||
def __init__(self, in_features, memory_dim, r, memory_size, attn_windowing,
|
||||
attn_norm, prenet_type, prenet_dropout, forward_attn,
|
||||
|
@ -290,16 +292,16 @@ class Decoder(nn.Module):
|
|||
# processed_inputs, processed_memory -> |Attention| -> Attention, attention, RNN_State
|
||||
self.attention_rnn = nn.GRUCell(in_features + 128, 256)
|
||||
self.attention_layer = Attention(attention_rnn_dim=256,
|
||||
embedding_dim=in_features,
|
||||
attention_dim=128,
|
||||
location_attention=location_attn,
|
||||
attention_location_n_filters=32,
|
||||
attention_location_kernel_size=31,
|
||||
windowing=attn_windowing,
|
||||
norm=attn_norm,
|
||||
forward_attn=forward_attn,
|
||||
trans_agent=trans_agent,
|
||||
forward_attn_mask=forward_attn_mask)
|
||||
embedding_dim=in_features,
|
||||
attention_dim=128,
|
||||
location_attention=location_attn,
|
||||
attention_location_n_filters=32,
|
||||
attention_location_kernel_size=31,
|
||||
windowing=attn_windowing,
|
||||
norm=attn_norm,
|
||||
forward_attn=forward_attn,
|
||||
trans_agent=trans_agent,
|
||||
forward_attn_mask=forward_attn_mask)
|
||||
# (processed_memory | attention context) -> |Linear| -> decoder_RNN_input
|
||||
self.project_to_decoder_in = nn.Linear(256 + in_features, 256)
|
||||
# decoder_RNN_input -> |RNN| -> RNN_state
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
from math import sqrt
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from .common_layers import Attention, Prenet, Linear, LinearBN
|
||||
from .common_layers import Attention, Prenet, Linear
|
||||
|
||||
|
||||
class ConvBNBlock(nn.Module):
|
||||
|
@ -33,7 +32,7 @@ class Postnet(nn.Module):
|
|||
self.convolutions = nn.ModuleList()
|
||||
self.convolutions.append(
|
||||
ConvBNBlock(mel_dim, 512, kernel_size=5, nonlinear='tanh'))
|
||||
for i in range(1, num_convs - 1):
|
||||
for _ in range(1, num_convs - 1):
|
||||
self.convolutions.append(
|
||||
ConvBNBlock(512, 512, kernel_size=5, nonlinear='tanh'))
|
||||
self.convolutions.append(
|
||||
|
@ -95,6 +94,8 @@ class Encoder(nn.Module):
|
|||
|
||||
# adapted from https://github.com/NVIDIA/tacotron2/
|
||||
class Decoder(nn.Module):
|
||||
# Pylint gets confused by PyTorch conventions here
|
||||
#pylint: disable=attribute-defined-outside-init
|
||||
def __init__(self, in_features, inputs_dim, r, attn_win, attn_norm,
|
||||
prenet_type, prenet_dropout, forward_attn, trans_agent,
|
||||
forward_attn_mask, location_attn, separate_stopnet):
|
||||
|
@ -118,15 +119,15 @@ class Decoder(nn.Module):
|
|||
self.attention_rnn = nn.LSTMCell(self.prenet_dim + in_features,
|
||||
self.attention_rnn_dim)
|
||||
|
||||
self.attention_layer = Attention(attention_rnn_dim=self.attention_rnn_dim,
|
||||
self.attention_layer = Attention(attention_rnn_dim=self.attention_rnn_dim,
|
||||
embedding_dim=in_features,
|
||||
attention_dim=128,
|
||||
location_attention=location_attn,
|
||||
attention_dim=128,
|
||||
location_attention=location_attn,
|
||||
attention_location_n_filters=32,
|
||||
attention_location_kernel_size=31,
|
||||
windowing=attn_win,
|
||||
norm=attn_norm,
|
||||
forward_attn=forward_attn,
|
||||
norm=attn_norm,
|
||||
forward_attn=forward_attn,
|
||||
trans_agent=trans_agent,
|
||||
forward_attn_mask=forward_attn_mask)
|
||||
|
||||
|
@ -156,7 +157,7 @@ class Decoder(nn.Module):
|
|||
|
||||
def _init_states(self, inputs, mask, keep_states=False):
|
||||
B = inputs.size(0)
|
||||
T = inputs.size(1)
|
||||
# T = inputs.size(1)
|
||||
|
||||
if not keep_states:
|
||||
self.attention_hidden = self.attention_rnn_init(
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
# coding: utf-8
|
||||
import torch
|
||||
from torch import nn
|
||||
from math import sqrt
|
||||
from layers.tacotron import Prenet, Encoder, Decoder, PostCBHG
|
||||
from layers.tacotron import Encoder, Decoder, PostCBHG
|
||||
from utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
from math import sqrt
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from layers.tacotron2 import Encoder, Decoder, Postnet
|
||||
from utils.generic_utils import sequence_mask
|
||||
|
||||
|
@ -39,7 +36,8 @@ class Tacotron2(nn.Module):
|
|||
location_attn, separate_stopnet)
|
||||
self.postnet = Postnet(self.n_mel_channels)
|
||||
|
||||
def shape_outputs(self, mel_outputs, mel_outputs_postnet, alignments):
|
||||
@staticmethod
|
||||
def shape_outputs(mel_outputs, mel_outputs_postnet, alignments):
|
||||
mel_outputs = mel_outputs.transpose(1, 2)
|
||||
mel_outputs_postnet = mel_outputs_postnet.transpose(1, 2)
|
||||
return mel_outputs, mel_outputs_postnet, alignments
|
||||
|
@ -90,8 +88,8 @@ class Tacotron2(nn.Module):
|
|||
|
||||
def _add_speaker_embedding(self, encoder_outputs, speaker_ids):
|
||||
if hasattr(self, "speaker_embedding") and speaker_ids is None:
|
||||
raise RuntimeError(" [!] Model has speaker embedding layer but speaker_id is not provided")
|
||||
elif hasattr(self, "speaker_embedding") and speaker_ids is not None:
|
||||
raise RuntimeError(" [!] Model has speaker embedding layer but speaker_id is not provided")
|
||||
if hasattr(self, "speaker_embedding") and speaker_ids is not None:
|
||||
speaker_embeddings = self.speaker_embedding(speaker_ids)
|
||||
|
||||
speaker_embeddings.unsqueeze_(1)
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
# coding: utf-8
|
||||
import torch
|
||||
from torch import nn
|
||||
from math import sqrt
|
||||
from layers.tacotron import Prenet, Encoder, Decoder, PostCBHG
|
||||
from layers.tacotron import Encoder, Decoder, PostCBHG
|
||||
from layers.gst_layers import GST
|
||||
from utils.generic_utils import sequence_mask
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
import argparse
|
||||
from synthesizer import Synthesizer
|
||||
from utils.generic_utils import load_config
|
||||
from flask import Flask, Response, request, render_template, send_file
|
||||
from flask import Flask, request, render_template, send_file
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
|
|
|
@ -5,31 +5,23 @@ import numpy as np
|
|||
import torch
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from models.tacotron import Tacotron
|
||||
from utils.audio import AudioProcessor
|
||||
from utils.generic_utils import load_config, setup_model
|
||||
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence, sequence_to_phoneme
|
||||
|
||||
import re
|
||||
alphabets= "([A-Za-z])"
|
||||
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
|
||||
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
|
||||
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||||
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||||
websites = "[.](com|net|org|io|gov)"
|
||||
alphabets = r"([A-Za-z])"
|
||||
prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
|
||||
suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
|
||||
starters = r"(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||||
acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||||
websites = r"[.](com|net|org|io|gov)"
|
||||
|
||||
from models.tacotron import Tacotron
|
||||
from utils.audio import AudioProcessor
|
||||
from utils.generic_utils import load_config
|
||||
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence
|
||||
|
||||
class Synthesizer(object):
|
||||
def __init__(self, config):
|
||||
self.wavernn = None
|
||||
self.config = config
|
||||
self.config = config
|
||||
self.use_cuda = config.use_cuda
|
||||
if self.use_cuda:
|
||||
assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
|
||||
|
@ -52,7 +44,7 @@ class Synthesizer(object):
|
|||
else:
|
||||
self.input_size = len(symbols)
|
||||
self.input_adapter = lambda sen: text_to_sequence(sen, [self.tts_config.text_cleaner])
|
||||
self.tts_model = setup_model(self.input_size, self.tts_config)
|
||||
self.tts_model = setup_model(self.input_size, c=self.tts_config) #FIXME: missing num_speakers argument to setup_model
|
||||
# load model state
|
||||
if use_cuda:
|
||||
cp = torch.load(self.model_file)
|
||||
|
@ -75,18 +67,18 @@ class Synthesizer(object):
|
|||
print(" | > model file: ", model_file)
|
||||
self.wavernn_config = load_config(wavernn_config)
|
||||
self.wavernn = Model(
|
||||
rnn_dims=512,
|
||||
fc_dims=512,
|
||||
mode=self.wavernn_config.mode,
|
||||
pad=2,
|
||||
upsample_factors=self.wavernn_config.upsample_factors, # set this depending on dataset
|
||||
feat_dims=80,
|
||||
compute_dims=128,
|
||||
res_out_dims=128,
|
||||
res_blocks=10,
|
||||
hop_length=self.ap.hop_length,
|
||||
sample_rate=self.ap.sample_rate,
|
||||
).cuda()
|
||||
rnn_dims=512,
|
||||
fc_dims=512,
|
||||
mode=self.wavernn_config.mode,
|
||||
pad=2,
|
||||
upsample_factors=self.wavernn_config.upsample_factors, # set this depending on dataset
|
||||
feat_dims=80,
|
||||
compute_dims=128,
|
||||
res_out_dims=128,
|
||||
res_blocks=10,
|
||||
hop_length=self.ap.hop_length,
|
||||
sample_rate=self.ap.sample_rate,
|
||||
).cuda()
|
||||
|
||||
check = torch.load(model_file)
|
||||
self.wavernn.load_state_dict(check['model'])
|
||||
|
@ -101,25 +93,30 @@ class Synthesizer(object):
|
|||
|
||||
def split_into_sentences(self, text):
|
||||
text = " " + text + " "
|
||||
text = text.replace("\n"," ")
|
||||
text = re.sub(prefixes,"\\1<prd>",text)
|
||||
text = re.sub(websites,"<prd>\\1",text)
|
||||
if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
|
||||
text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
|
||||
text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
|
||||
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
|
||||
text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
|
||||
text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
|
||||
text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
|
||||
text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
|
||||
if "”" in text: text = text.replace(".”","”.")
|
||||
if "\"" in text: text = text.replace(".\"","\".")
|
||||
if "!" in text: text = text.replace("!\"","\"!")
|
||||
if "?" in text: text = text.replace("?\"","\"?")
|
||||
text = text.replace(".",".<stop>")
|
||||
text = text.replace("?","?<stop>")
|
||||
text = text.replace("!","!<stop>")
|
||||
text = text.replace("<prd>",".")
|
||||
text = text.replace("\n", " ")
|
||||
text = re.sub(prefixes, "\\1<prd>", text)
|
||||
text = re.sub(websites, "<prd>\\1", text)
|
||||
if "Ph.D" in text:
|
||||
text = text.replace("Ph.D.", "Ph<prd>D<prd>")
|
||||
text = re.sub(r"\s" + alphabets + "[.] ", " \\1<prd> ", text)
|
||||
text = re.sub(acronyms+" "+starters, "\\1<stop> \\2", text)
|
||||
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]", "\\1<prd>\\2<prd>\\3<prd>", text)
|
||||
text = re.sub(alphabets + "[.]" + alphabets + "[.]", "\\1<prd>\\2<prd>", text)
|
||||
text = re.sub(" "+suffixes+"[.] "+starters, " \\1<stop> \\2", text)
|
||||
text = re.sub(" "+suffixes+"[.]", " \\1<prd>", text)
|
||||
text = re.sub(" " + alphabets + "[.]", " \\1<prd>", text)
|
||||
if "”" in text:
|
||||
text = text.replace(".”", "”.")
|
||||
if "\"" in text:
|
||||
text = text.replace(".\"", "\".")
|
||||
if "!" in text:
|
||||
text = text.replace("!\"", "\"!")
|
||||
if "?" in text:
|
||||
text = text.replace("?\"", "\"?")
|
||||
text = text.replace(".", ".<stop>")
|
||||
text = text.replace("?", "?<stop>")
|
||||
text = text.replace("!", "!<stop>")
|
||||
text = text.replace("<prd>", ".")
|
||||
sentences = text.split("<stop>")
|
||||
sentences = sentences[:-1]
|
||||
sentences = [s.strip() for s in sentences]
|
||||
|
@ -128,7 +125,7 @@ class Synthesizer(object):
|
|||
def tts(self, text):
|
||||
wavs = []
|
||||
sens = self.split_into_sentences(text)
|
||||
if len(sens) == 0:
|
||||
if not sens:
|
||||
sens = [text+'.']
|
||||
for sen in sens:
|
||||
if len(sen) < 3:
|
||||
|
|
3
setup.py
3
setup.py
|
@ -5,7 +5,6 @@ import setuptools.command.develop
|
|||
import setuptools.command.build_py
|
||||
import os
|
||||
import subprocess
|
||||
from os.path import exists
|
||||
|
||||
version = '0.0.1'
|
||||
|
||||
|
@ -31,7 +30,6 @@ class build_py(setuptools.command.build_py.build_py):
|
|||
|
||||
@staticmethod
|
||||
def create_version_file():
|
||||
global version, cwd
|
||||
print('-- Building version ' + version)
|
||||
version_path = os.path.join(cwd, 'version.py')
|
||||
with open(version_path, 'w') as f:
|
||||
|
@ -45,7 +43,6 @@ class develop(setuptools.command.develop.develop):
|
|||
|
||||
|
||||
def create_readme_rst():
|
||||
global cwd
|
||||
try:
|
||||
subprocess.check_call(
|
||||
[
|
||||
|
|
|
@ -1 +1 @@
|
|||
print("Python is running!!")
|
||||
print("Python is running!!")
|
||||
|
|
|
@ -2,7 +2,7 @@ import unittest
|
|||
import torch as T
|
||||
|
||||
from utils.generic_utils import save_checkpoint, save_best_model
|
||||
from layers.tacotron import Prenet, CBHG, Decoder, Encoder
|
||||
from layers.tacotron import Prenet
|
||||
|
||||
OUT_PATH = '/tmp/test.pth.tar'
|
||||
|
||||
|
@ -11,14 +11,14 @@ class ModelSavingTests(unittest.TestCase):
|
|||
def save_checkpoint_test(self):
|
||||
# create a dummy model
|
||||
model = Prenet(128, out_features=[256, 128])
|
||||
model = T.nn.DataParallel(layer)
|
||||
model = T.nn.DataParallel(layer) #FIXME: undefined variable layer
|
||||
|
||||
# save the model
|
||||
save_checkpoint(model, None, 100, OUTPATH, 1, 1)
|
||||
save_checkpoint(model, None, 100, OUT_PATH, 1, 1)
|
||||
|
||||
# load the model to CPU
|
||||
model_dict = torch.load(
|
||||
MODEL_PATH, map_location=lambda storage, loc: storage)
|
||||
model_dict = T.load(
|
||||
MODEL_PATH, map_location=lambda storage, loc: storage) #FIXME: undefined variable MODEL_PATH
|
||||
model.load_state_dict(model_dict['model'])
|
||||
|
||||
def save_best_model_test(self):
|
||||
|
@ -27,9 +27,9 @@ class ModelSavingTests(unittest.TestCase):
|
|||
model = T.nn.DataParallel(layer)
|
||||
|
||||
# save the model
|
||||
best_loss = save_best_model(model, None, 0, 100, OUT_PATH, 10, 1)
|
||||
save_best_model(model, None, 0, 100, OUT_PATH, 10, 1)
|
||||
|
||||
# load the model to CPU
|
||||
model_dict = torch.load(
|
||||
model_dict = T.load(
|
||||
MODEL_PATH, map_location=lambda storage, loc: storage)
|
||||
model.load_state_dict(model_dict['model'])
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
import os
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torch as T
|
||||
|
||||
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||
from utils.audio import AudioProcessor
|
||||
|
|
|
@ -5,6 +5,7 @@ from layers.tacotron import Prenet, CBHG, Decoder, Encoder
|
|||
from layers.losses import L1LossMasked
|
||||
from utils.generic_utils import sequence_mask
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
class PrenetTests(unittest.TestCase):
|
||||
def test_in_out(self):
|
||||
|
@ -19,6 +20,7 @@ class PrenetTests(unittest.TestCase):
|
|||
|
||||
class CBHGTests(unittest.TestCase):
|
||||
def test_in_out(self):
|
||||
#pylint: disable=attribute-defined-outside-init
|
||||
layer = self.cbhg = CBHG(
|
||||
128,
|
||||
K=8,
|
||||
|
@ -37,7 +39,8 @@ class CBHGTests(unittest.TestCase):
|
|||
|
||||
|
||||
class DecoderTests(unittest.TestCase):
|
||||
def test_in_out(self):
|
||||
@staticmethod
|
||||
def test_in_out():
|
||||
layer = Decoder(
|
||||
in_features=256,
|
||||
memory_dim=80,
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import os
|
||||
import unittest
|
||||
import shutil
|
||||
import numpy as np
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
from utils.generic_utils import load_config
|
||||
|
@ -9,6 +8,8 @@ from utils.audio import AudioProcessor
|
|||
from datasets import TTSDataset
|
||||
from datasets.preprocess import ljspeech
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
file_path = os.path.dirname(os.path.realpath(__file__))
|
||||
OUTPATH = os.path.join(file_path, "outputs/loader_tests/")
|
||||
os.makedirs(OUTPATH, exist_ok=True)
|
||||
|
@ -135,7 +136,7 @@ class TestTTSDataset(unittest.TestCase):
|
|||
self.ap.save_wav(wav, OUTPATH + '/mel_inv_dataloader.wav')
|
||||
shutil.copy(item_idx[0], OUTPATH + '/mel_target_dataloader.wav')
|
||||
|
||||
# check linear-spec
|
||||
# check linear-spec
|
||||
linear_spec = linear_input[0].cpu().numpy()
|
||||
wav = self.ap.inv_spectrogram(linear_spec.T)
|
||||
self.ap.save_wav(wav, OUTPATH + '/linear_inv_dataloader.wav')
|
||||
|
|
|
@ -10,6 +10,8 @@ from utils.generic_utils import load_config
|
|||
from layers.losses import MSELossMasked
|
||||
from models.tacotron2 import Tacotron2
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
torch.manual_seed(1)
|
||||
use_cuda = torch.cuda.is_available()
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
|
|
@ -2,7 +2,6 @@ import os
|
|||
import copy
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from torch import optim
|
||||
from torch import nn
|
||||
|
@ -10,6 +9,8 @@ from utils.generic_utils import load_config
|
|||
from layers.losses import L1LossMasked
|
||||
from models.tacotron import Tacotron
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
torch.manual_seed(1)
|
||||
use_cuda = torch.cuda.is_available()
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
@ -50,7 +51,7 @@ class TacotronTrainTest(unittest.TestCase):
|
|||
linear_dim=c.audio['num_freq'],
|
||||
mel_dim=c.audio['num_mels'],
|
||||
r=c.r,
|
||||
memory_size=c.memory_size).to(device)
|
||||
memory_size=c.memory_size).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor
|
||||
model.train()
|
||||
print(" > Num parameters for Tacotron model:%s"%(count_parameters(model)))
|
||||
model_ref = copy.deepcopy(model)
|
||||
|
@ -60,7 +61,7 @@ class TacotronTrainTest(unittest.TestCase):
|
|||
assert (param - param_ref).sum() == 0, param
|
||||
count += 1
|
||||
optimizer = optim.Adam(model.parameters(), lr=c.lr)
|
||||
for i in range(5):
|
||||
for _ in range(5):
|
||||
mel_out, linear_out, align, stop_tokens = model.forward(
|
||||
input, input_lengths, mel_spec, speaker_ids)
|
||||
optimizer.zero_grad()
|
||||
|
@ -79,4 +80,4 @@ class TacotronTrainTest(unittest.TestCase):
|
|||
assert (param != param_ref).any(
|
||||
), "param {} with shape {} not updated!! \n{}\n{}".format(
|
||||
count, param.shape, param, param_ref)
|
||||
count += 1
|
||||
count += 1
|
||||
|
|
|
@ -69,7 +69,6 @@ def test_phoneme_to_sequence():
|
|||
|
||||
def test_text2phone():
|
||||
text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
|
||||
text_cleaner = ["phoneme_cleaners"]
|
||||
gt = "ɹ|iː|s|ə|n|t| |ɹ|ɪ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i|| |ɪ|n|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ɪ|s|p|ɑː|n|s|ə|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|uː|l|eɪ|ʃ|ə|n||| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!"
|
||||
lang = "en-us"
|
||||
phonemes = text2phone(text, lang)
|
||||
|
|
72
train.py
72
train.py
|
@ -7,7 +7,6 @@ import traceback
|
|||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tensorboardX import SummaryWriter
|
||||
from torch import optim
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
@ -18,9 +17,8 @@ from layers.losses import L1LossMasked, MSELossMasked
|
|||
from utils.audio import AudioProcessor
|
||||
from utils.generic_utils import (NoamLR, check_update, count_parameters,
|
||||
create_experiment_folder, get_git_branch,
|
||||
load_config, lr_decay,
|
||||
remove_experiment_folder, save_best_model,
|
||||
save_checkpoint, sequence_mask, weight_decay,
|
||||
load_config, remove_experiment_folder,
|
||||
save_best_model, save_checkpoint, weight_decay,
|
||||
set_init_dict, copy_config_file, setup_model,
|
||||
split_dataset)
|
||||
from utils.logger import Logger
|
||||
|
@ -40,8 +38,7 @@ print(" > Using CUDA: ", use_cuda)
|
|||
print(" > Number of GPUs: ", num_gpus)
|
||||
|
||||
|
||||
def setup_loader(is_val=False, verbose=False):
|
||||
global ap
|
||||
def setup_loader(ap, is_val=False, verbose=False):
|
||||
global meta_data_train
|
||||
global meta_data_eval
|
||||
if "meta_data_train" not in globals():
|
||||
|
@ -86,7 +83,7 @@ def setup_loader(is_val=False, verbose=False):
|
|||
|
||||
def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
||||
ap, epoch):
|
||||
data_loader = setup_loader(is_val=False, verbose=(epoch==0))
|
||||
data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0))
|
||||
if c.use_speaker_embedding:
|
||||
speaker_mapping = load_speaker_mapping(OUT_PATH)
|
||||
model.train()
|
||||
|
@ -130,7 +127,8 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
if c.lr_decay:
|
||||
scheduler.step()
|
||||
optimizer.zero_grad()
|
||||
if optimizer_st: optimizer_st.zero_grad();
|
||||
if optimizer_st:
|
||||
optimizer_st.zero_grad()
|
||||
|
||||
# dispatch data to GPU
|
||||
if use_cuda:
|
||||
|
@ -145,7 +143,7 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
|
||||
# forward pass model
|
||||
decoder_output, postnet_output, alignments, stop_tokens = model(
|
||||
text_input, text_lengths, mel_input, speaker_ids=speaker_ids)
|
||||
text_input, text_lengths, mel_input, speaker_ids=speaker_ids)
|
||||
|
||||
# loss computation
|
||||
stop_loss = criterion_st(stop_tokens, stop_targets) if c.stopnet else torch.zeros(1)
|
||||
|
@ -202,16 +200,16 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
if args.rank == 0:
|
||||
avg_postnet_loss += float(postnet_loss.item())
|
||||
avg_decoder_loss += float(decoder_loss.item())
|
||||
avg_stop_loss += stop_loss if type(stop_loss) is float else float(stop_loss.item())
|
||||
avg_stop_loss += stop_loss if isinstance(stop_loss, float) else float(stop_loss.item())
|
||||
avg_step_time += step_time
|
||||
|
||||
# Plot Training Iter Stats
|
||||
iter_stats = {"loss_posnet": postnet_loss.item(),
|
||||
"loss_decoder": decoder_loss.item(),
|
||||
"lr": current_lr,
|
||||
"grad_norm": grad_norm,
|
||||
"grad_norm_st": grad_norm_st,
|
||||
"step_time": step_time}
|
||||
"loss_decoder": decoder_loss.item(),
|
||||
"lr": current_lr,
|
||||
"grad_norm": grad_norm,
|
||||
"grad_norm_st": grad_norm_st,
|
||||
"step_time": step_time}
|
||||
tb_logger.tb_train_iter_stats(current_step, iter_stats)
|
||||
|
||||
if current_step % c.save_step == 0:
|
||||
|
@ -223,7 +221,7 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
|
||||
# Diagnostic visualizations
|
||||
const_spec = postnet_output[0].data.cpu().numpy()
|
||||
gt_spec = linear_input[0].data.cpu().numpy() if c.model in ["Tacotron", "TacotronGST"] else mel_input[0].data.cpu().numpy()
|
||||
gt_spec = linear_input[0].data.cpu().numpy() if c.model in ["Tacotron", "TacotronGST"] else mel_input[0].data.cpu().numpy()
|
||||
align_img = alignments[0].data.cpu().numpy()
|
||||
|
||||
figures = {
|
||||
|
@ -238,9 +236,9 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
train_audio = ap.inv_spectrogram(const_spec.T)
|
||||
else:
|
||||
train_audio = ap.inv_mel_spectrogram(const_spec.T)
|
||||
tb_logger.tb_train_audios(current_step,
|
||||
{'TrainAudio': train_audio},
|
||||
c.audio["sample_rate"])
|
||||
tb_logger.tb_train_audios(current_step,
|
||||
{'TrainAudio': train_audio},
|
||||
c.audio["sample_rate"])
|
||||
|
||||
avg_postnet_loss /= (num_iter + 1)
|
||||
avg_decoder_loss /= (num_iter + 1)
|
||||
|
@ -262,9 +260,9 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
if args.rank == 0:
|
||||
# Plot Training Epoch Stats
|
||||
epoch_stats = {"loss_postnet": avg_postnet_loss,
|
||||
"loss_decoder": avg_decoder_loss,
|
||||
"stop_loss": avg_stop_loss,
|
||||
"epoch_time": epoch_time}
|
||||
"loss_decoder": avg_decoder_loss,
|
||||
"stop_loss": avg_stop_loss,
|
||||
"epoch_time": epoch_time}
|
||||
tb_logger.tb_train_epoch_stats(current_step, epoch_stats)
|
||||
if c.tb_model_param_stats:
|
||||
tb_logger.tb_model_weights(model, current_step)
|
||||
|
@ -273,7 +271,7 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
|
||||
|
||||
def evaluate(model, criterion, criterion_st, ap, current_step, epoch):
|
||||
data_loader = setup_loader(is_val=True)
|
||||
data_loader = setup_loader(ap, is_val=True)
|
||||
if c.use_speaker_embedding:
|
||||
speaker_mapping = load_speaker_mapping(OUT_PATH)
|
||||
model.eval()
|
||||
|
@ -401,8 +399,8 @@ def evaluate(model, criterion, criterion_st, ap, current_step, epoch):
|
|||
|
||||
# Plot Validation Stats
|
||||
epoch_stats = {"loss_postnet": avg_postnet_loss,
|
||||
"loss_decoder": avg_decoder_loss,
|
||||
"stop_loss": avg_stop_loss}
|
||||
"loss_decoder": avg_decoder_loss,
|
||||
"stop_loss": avg_stop_loss}
|
||||
tb_logger.tb_eval_stats(current_step, epoch_stats)
|
||||
|
||||
if args.rank == 0 and epoch > c.test_delay_epochs:
|
||||
|
@ -419,7 +417,7 @@ def evaluate(model, criterion, criterion_st, ap, current_step, epoch):
|
|||
file_path = os.path.join(AUDIO_PATH, str(current_step))
|
||||
os.makedirs(file_path, exist_ok=True)
|
||||
file_path = os.path.join(file_path,
|
||||
"TestSentence_{}.wav".format(idx))
|
||||
"TestSentence_{}.wav".format(idx))
|
||||
ap.save_wav(wav, file_path)
|
||||
test_audios['{}-audio'.format(idx)] = wav
|
||||
test_figures['{}-prediction'.format(idx)] = plot_spectrogram(postnet_output, ap)
|
||||
|
@ -432,7 +430,11 @@ def evaluate(model, criterion, criterion_st, ap, current_step, epoch):
|
|||
return avg_postnet_loss
|
||||
|
||||
|
||||
def main(args):
|
||||
#FIXME: move args definition/parsing inside of main?
|
||||
def main(args): #pylint: disable=redefined-outer-name
|
||||
# Audio processor
|
||||
ap = AudioProcessor(**c.audio)
|
||||
|
||||
# DISTRUBUTED
|
||||
if num_gpus > 1:
|
||||
init_distributed(args.rank, num_gpus, args.group_id,
|
||||
|
@ -481,12 +483,11 @@ def main(args):
|
|||
# TODO: fix optimizer init, model.cuda() needs to be called before
|
||||
# optimizer restore
|
||||
# optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if len(c.reinit_layers) > 0:
|
||||
if c.reinit_layers:
|
||||
raise RuntimeError
|
||||
model.load_state_dict(checkpoint['model'])
|
||||
except:
|
||||
print(" > Partial model initialization.")
|
||||
partial_init_flag = True
|
||||
model_dict = model.state_dict()
|
||||
model_dict = set_init_dict(model_dict, checkpoint, c)
|
||||
model.load_state_dict(model_dict)
|
||||
|
@ -495,7 +496,6 @@ def main(args):
|
|||
group['lr'] = c.lr
|
||||
print(
|
||||
" > Model restored from step %d" % checkpoint['step'], flush=True)
|
||||
start_epoch = checkpoint['epoch']
|
||||
args.restore_step = checkpoint['step']
|
||||
else:
|
||||
args.restore_step = 0
|
||||
|
@ -503,7 +503,8 @@ def main(args):
|
|||
if use_cuda:
|
||||
model = model.cuda()
|
||||
criterion.cuda()
|
||||
if criterion_st: criterion_st.cuda();
|
||||
if criterion_st:
|
||||
criterion_st.cuda()
|
||||
|
||||
# DISTRUBUTED
|
||||
if num_gpus > 1:
|
||||
|
@ -614,13 +615,10 @@ if __name__ == '__main__':
|
|||
os.chmod(AUDIO_PATH, 0o775)
|
||||
os.chmod(OUT_PATH, 0o775)
|
||||
|
||||
if args.rank==0:
|
||||
if args.rank == 0:
|
||||
LOG_DIR = OUT_PATH
|
||||
tb_logger = Logger(LOG_DIR)
|
||||
|
||||
# Audio processor
|
||||
ap = AudioProcessor(**c.audio)
|
||||
|
||||
try:
|
||||
main(args)
|
||||
except KeyboardInterrupt:
|
||||
|
@ -628,8 +626,8 @@ if __name__ == '__main__':
|
|||
try:
|
||||
sys.exit(0)
|
||||
except SystemExit:
|
||||
os._exit(0)
|
||||
except Exception:
|
||||
os._exit(0) #pylint: disable=protected-access
|
||||
except Exception: #pylint: disable=broad-except
|
||||
remove_experiment_folder(OUT_PATH)
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
import os
|
||||
import librosa
|
||||
import soundfile as sf
|
||||
import pickle
|
||||
import copy
|
||||
import numpy as np
|
||||
from pprint import pprint
|
||||
from scipy import signal, io
|
||||
import scipy.io
|
||||
import scipy.signal
|
||||
|
||||
|
||||
class AudioProcessor(object):
|
||||
|
@ -27,13 +24,13 @@ class AudioProcessor(object):
|
|||
clip_norm=True,
|
||||
griffin_lim_iters=None,
|
||||
do_trim_silence=False,
|
||||
**kwargs):
|
||||
**_):
|
||||
|
||||
print(" > Setting up Audio Processor...")
|
||||
|
||||
self.sample_rate = sample_rate
|
||||
self.num_mels = num_mels
|
||||
self.min_level_db = min_level_db
|
||||
self.min_level_db = min_level_db or 0
|
||||
self.frame_shift_ms = frame_shift_ms
|
||||
self.frame_length_ms = frame_length_ms
|
||||
self.ref_level_db = ref_level_db
|
||||
|
@ -43,7 +40,7 @@ class AudioProcessor(object):
|
|||
self.griffin_lim_iters = griffin_lim_iters
|
||||
self.signal_norm = signal_norm
|
||||
self.symmetric_norm = symmetric_norm
|
||||
self.mel_fmin = 0 if mel_fmin is None else mel_fmin
|
||||
self.mel_fmin = mel_fmin or 0
|
||||
self.mel_fmax = mel_fmax
|
||||
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
||||
self.clip_norm = clip_norm
|
||||
|
@ -55,7 +52,7 @@ class AudioProcessor(object):
|
|||
|
||||
def save_wav(self, wav, path):
|
||||
wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
|
||||
io.wavfile.write(path, self.sample_rate, wav_norm.astype(np.int16))
|
||||
scipy.io.wavfile.write(path, self.sample_rate, wav_norm.astype(np.int16))
|
||||
|
||||
def _linear_to_mel(self, spectrogram):
|
||||
_mel_basis = self._build_mel_basis()
|
||||
|
@ -78,11 +75,12 @@ class AudioProcessor(object):
|
|||
|
||||
def _normalize(self, S):
|
||||
"""Put values in [0, self.max_norm] or [-self.max_norm, self.max_norm]"""
|
||||
#pylint: disable=no-else-return
|
||||
if self.signal_norm:
|
||||
S_norm = ((S - self.min_level_db) / - self.min_level_db)
|
||||
if self.symmetric_norm:
|
||||
S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm
|
||||
if self.clip_norm :
|
||||
if self.clip_norm:
|
||||
S_norm = np.clip(S_norm, -self.max_norm, self.max_norm)
|
||||
return S_norm
|
||||
else:
|
||||
|
@ -95,18 +93,19 @@ class AudioProcessor(object):
|
|||
|
||||
def _denormalize(self, S):
|
||||
"""denormalize values"""
|
||||
#pylint: disable=no-else-return
|
||||
S_denorm = S
|
||||
if self.signal_norm:
|
||||
if self.symmetric_norm:
|
||||
if self.clip_norm:
|
||||
S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm)
|
||||
S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm)
|
||||
S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db
|
||||
return S_denorm
|
||||
else:
|
||||
if self.clip_norm:
|
||||
S_denorm = np.clip(S_denorm, 0, self.max_norm)
|
||||
S_denorm = (S_denorm * -self.min_level_db /
|
||||
self.max_norm) + self.min_level_db
|
||||
self.max_norm) + self.min_level_db
|
||||
return S_denorm
|
||||
else:
|
||||
return S
|
||||
|
@ -122,18 +121,19 @@ class AudioProcessor(object):
|
|||
min_level = np.exp(self.min_level_db / 20 * np.log(10))
|
||||
return 20 * np.log10(np.maximum(min_level, x))
|
||||
|
||||
def _db_to_amp(self, x):
|
||||
@staticmethod
|
||||
def _db_to_amp(x):
|
||||
return np.power(10.0, x * 0.05)
|
||||
|
||||
def apply_preemphasis(self, x):
|
||||
if self.preemphasis == 0:
|
||||
raise RuntimeError(" !! Preemphasis is applied with factor 0.0. ")
|
||||
return signal.lfilter([1, -self.preemphasis], [1], x)
|
||||
return scipy.signal.lfilter([1, -self.preemphasis], [1], x)
|
||||
|
||||
def apply_inv_preemphasis(self, x):
|
||||
if self.preemphasis == 0:
|
||||
raise RuntimeError(" !! Preemphasis is applied with factor 0.0. ")
|
||||
return signal.lfilter([1], [1, -self.preemphasis], x)
|
||||
return scipy.signal.lfilter([1], [1, -self.preemphasis], x)
|
||||
|
||||
def spectrogram(self, y):
|
||||
if self.preemphasis != 0:
|
||||
|
@ -158,8 +158,7 @@ class AudioProcessor(object):
|
|||
# Reconstruct phase
|
||||
if self.preemphasis != 0:
|
||||
return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
|
||||
else:
|
||||
return self._griffin_lim(S**self.power)
|
||||
return self._griffin_lim(S**self.power)
|
||||
|
||||
def inv_mel_spectrogram(self, mel_spectrogram):
|
||||
'''Converts mel spectrogram to waveform using librosa'''
|
||||
|
@ -168,12 +167,11 @@ class AudioProcessor(object):
|
|||
S = self._mel_to_linear(S) # Convert back to linear
|
||||
if self.preemphasis != 0:
|
||||
return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
|
||||
else:
|
||||
return self._griffin_lim(S**self.power)
|
||||
return self._griffin_lim(S**self.power)
|
||||
|
||||
def out_linear_to_mel(self, linear_spec):
|
||||
S = self._denormalize(linear_spec)
|
||||
S = self._db_to_amp(S + self.ref_level_db)
|
||||
S = self._db_to_amp(S + self.ref_level_db)
|
||||
S = self._linear_to_mel(np.abs(S))
|
||||
S = self._amp_to_db(S) - self.ref_level_db
|
||||
mel = self._normalize(S)
|
||||
|
@ -183,7 +181,7 @@ class AudioProcessor(object):
|
|||
angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
|
||||
S_complex = np.abs(S).astype(np.complex)
|
||||
y = self._istft(S_complex * angles)
|
||||
for i in range(self.griffin_lim_iters):
|
||||
for _ in range(self.griffin_lim_iters):
|
||||
angles = np.exp(1j * np.angle(self._stft(y)))
|
||||
y = self._istft(S_complex * angles)
|
||||
return y
|
||||
|
@ -240,16 +238,19 @@ class AudioProcessor(object):
|
|||
if self.do_trim_silence:
|
||||
try:
|
||||
x = self.trim_silence(x)
|
||||
except ValueError as e:
|
||||
except ValueError:
|
||||
print(f' [!] File cannot be trimmed for silence - {filename}')
|
||||
assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr)
|
||||
return x
|
||||
|
||||
def encode_16bits(self, x):
|
||||
@staticmethod
|
||||
def encode_16bits(x):
|
||||
return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16)
|
||||
|
||||
def quantize(self, x, bits):
|
||||
@staticmethod
|
||||
def quantize(x, bits):
|
||||
return (x + 1.) * (2**bits - 1) / 2
|
||||
|
||||
def dequantize(self, x, bits):
|
||||
@staticmethod
|
||||
def dequantize(x, bits):
|
||||
return 2 * x / (2**bits - 1) - 1
|
||||
|
|
|
@ -45,7 +45,6 @@ def prepare_stop_target(inputs, out_steps):
|
|||
|
||||
|
||||
def pad_per_step(inputs, pad_len):
|
||||
timesteps = inputs.shape[-1]
|
||||
return np.pad(
|
||||
inputs, [[0, 0], [0, 0], [0, pad_len]],
|
||||
mode='constant',
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import glob
|
||||
import time
|
||||
import shutil
|
||||
import datetime
|
||||
import json
|
||||
|
@ -11,8 +9,6 @@ import subprocess
|
|||
import importlib
|
||||
import numpy as np
|
||||
from collections import OrderedDict, Counter
|
||||
from torch.autograd import Variable
|
||||
from utils.text import text_to_sequence
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
|
@ -78,7 +74,7 @@ def remove_experiment_folder(experiment_path):
|
|||
"""Check folder if there is a checkpoint, otherwise remove the folder"""
|
||||
|
||||
checkpoint_files = glob.glob(experiment_path + "/*.pth.tar")
|
||||
if len(checkpoint_files) < 1:
|
||||
if not checkpoint_files:
|
||||
if os.path.exists(experiment_path):
|
||||
shutil.rmtree(experiment_path)
|
||||
print(" ! Run is removed from {}".format(experiment_path))
|
||||
|
@ -87,7 +83,6 @@ def remove_experiment_folder(experiment_path):
|
|||
|
||||
|
||||
def copy_config_file(config_file, out_path, new_fields):
|
||||
config_name = os.path.basename(config_file)
|
||||
config_lines = open(config_file, "r").readlines()
|
||||
# add extra information fields
|
||||
for key, value in new_fields.items():
|
||||
|
|
|
@ -46,7 +46,7 @@ class Logger(object):
|
|||
|
||||
def tb_train_iter_stats(self, step, stats):
|
||||
self.dict_to_tb_scalar("TrainIterStats", stats, step)
|
||||
|
||||
|
||||
def tb_train_epoch_stats(self, step, stats):
|
||||
self.dict_to_tb_scalar("TrainEpochStats", stats, step)
|
||||
|
||||
|
@ -64,12 +64,9 @@ class Logger(object):
|
|||
|
||||
def tb_eval_audios(self, step, audios, sample_rate):
|
||||
self.dict_to_tb_audios("EvalAudios", audios, step, sample_rate)
|
||||
|
||||
|
||||
def tb_test_audios(self, step, audios, sample_rate):
|
||||
self.dict_to_tb_audios("TestAudios", audios, step, sample_rate)
|
||||
|
||||
def tb_test_figures(self, step, figures):
|
||||
self.dict_to_tb_figure("TestFigures", figures, step)
|
||||
|
||||
|
||||
|
|
@ -1,11 +1,6 @@
|
|||
import io
|
||||
import time
|
||||
import librosa
|
||||
import torch
|
||||
import numpy as np
|
||||
from .text import text_to_sequence, phoneme_to_sequence, sequence_to_phoneme
|
||||
from .visual import visualize
|
||||
from matplotlib import pylab as plt
|
||||
from .text import text_to_sequence, phoneme_to_sequence
|
||||
|
||||
|
||||
def text_to_seqvec(text, CONFIG, use_cuda):
|
||||
|
@ -31,8 +26,7 @@ def compute_style_mel(style_wav, ap, use_cuda):
|
|||
ap.load_wav(style_wav))).unsqueeze(0)
|
||||
if use_cuda:
|
||||
return style_mel.cuda()
|
||||
else:
|
||||
return style_mel
|
||||
return style_mel
|
||||
|
||||
|
||||
def run_model(model, inputs, CONFIG, truncated, speaker_id=None, style_mel=None):
|
||||
|
@ -83,8 +77,8 @@ def synthesis(model,
|
|||
speaker_id=None,
|
||||
style_wav=None,
|
||||
truncated=False,
|
||||
enable_eos_bos_chars=False,
|
||||
trim_silence=False):
|
||||
enable_eos_bos_chars=False, #pylint: disable=unused-argument
|
||||
do_trim_silence=False):
|
||||
"""Synthesize voice for the given text.
|
||||
|
||||
Args:
|
||||
|
@ -99,7 +93,7 @@ def synthesis(model,
|
|||
truncated (bool): keep model states after inference. It can be used
|
||||
for continuous inference at long texts.
|
||||
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.
|
||||
trim_silence (bool): trim silence after synthesis.
|
||||
do_trim_silence (bool): trim silence after synthesis.
|
||||
"""
|
||||
# GST processing
|
||||
style_mel = None
|
||||
|
@ -119,6 +113,6 @@ def synthesis(model,
|
|||
# plot results
|
||||
wav = inv_spectrogram(postnet_output, ap, CONFIG)
|
||||
# trim silence
|
||||
if trim_silence:
|
||||
if do_trim_silence:
|
||||
wav = trim_silence(wav)
|
||||
return wav, alignment, decoder_output, postnet_output, stop_tokens
|
||||
|
|
|
@ -7,17 +7,17 @@ from utils.text import cleaners
|
|||
from utils.text.symbols import symbols, phonemes, _phoneme_punctuations
|
||||
|
||||
# Mappings from symbol to numeric ID and vice versa:
|
||||
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
||||
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
|
||||
_SYMBOL_TO_ID = {s: i for i, s in enumerate(symbols)}
|
||||
_ID_TO_SYMBOL = {i: s for i, s in enumerate(symbols)}
|
||||
|
||||
_phonemes_to_id = {s: i for i, s in enumerate(phonemes)}
|
||||
_id_to_phonemes = {i: s for i, s in enumerate(phonemes)}
|
||||
_PHONEMES_TO_ID = {s: i for i, s in enumerate(phonemes)}
|
||||
_ID_TO_PHONEMES = {i: s for i, s in enumerate(phonemes)}
|
||||
|
||||
# Regular expression matching text enclosed in curly braces:
|
||||
_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
|
||||
_CURLY_RE = re.compile(r'(.*?)\{(.+?)\}(.*)')
|
||||
|
||||
# Regular expression matchinf punctuations, ignoring empty space
|
||||
pat = r'['+_phoneme_punctuations+']+'
|
||||
PHONEME_PUNCTUATION_PATTERN = r'['+_phoneme_punctuations+']+'
|
||||
|
||||
|
||||
def text2phone(text, language):
|
||||
|
@ -26,11 +26,11 @@ def text2phone(text, language):
|
|||
'''
|
||||
seperator = phonemizer.separator.Separator(' |', '', '|')
|
||||
#try:
|
||||
punctuations = re.findall(pat, text)
|
||||
punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
|
||||
ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language)
|
||||
ph = ph[:-1].strip() # skip the last empty character
|
||||
# Replace \n with matching punctuations.
|
||||
if len(punctuations) > 0:
|
||||
if punctuations:
|
||||
# if text ends with a punctuation.
|
||||
if text[-1] == punctuations[-1]:
|
||||
for punct in punctuations[:-1]:
|
||||
|
@ -47,20 +47,20 @@ def text2phone(text, language):
|
|||
|
||||
def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False):
|
||||
if enable_eos_bos:
|
||||
sequence = [_phonemes_to_id['^']]
|
||||
sequence = [_PHONEMES_TO_ID['^']]
|
||||
else:
|
||||
sequence = []
|
||||
text = text.replace(":", "")
|
||||
clean_text = _clean_text(text, cleaner_names)
|
||||
phonemes = text2phone(clean_text, language)
|
||||
if phonemes is None:
|
||||
to_phonemes = text2phone(clean_text, language)
|
||||
if to_phonemes is None:
|
||||
print("!! After phoneme conversion the result is None. -- {} ".format(clean_text))
|
||||
# iterate by skipping empty strings - NOTE: might be useful to keep it to have a better intonation.
|
||||
for phoneme in filter(None, phonemes.split('|')):
|
||||
for phoneme in filter(None, to_phonemes.split('|')):
|
||||
sequence += _phoneme_to_sequence(phoneme)
|
||||
# Append EOS char
|
||||
if enable_eos_bos:
|
||||
sequence.append(_phonemes_to_id['~'])
|
||||
sequence.append(_PHONEMES_TO_ID['~'])
|
||||
return sequence
|
||||
|
||||
|
||||
|
@ -68,8 +68,8 @@ def sequence_to_phoneme(sequence):
|
|||
'''Converts a sequence of IDs back to a string'''
|
||||
result = ''
|
||||
for symbol_id in sequence:
|
||||
if symbol_id in _id_to_phonemes:
|
||||
s = _id_to_phonemes[symbol_id]
|
||||
if symbol_id in _ID_TO_PHONEMES:
|
||||
s = _ID_TO_PHONEMES[symbol_id]
|
||||
result += s
|
||||
return result.replace('}{', ' ')
|
||||
|
||||
|
@ -89,8 +89,8 @@ def text_to_sequence(text, cleaner_names):
|
|||
'''
|
||||
sequence = []
|
||||
# Check for curly braces and treat their contents as ARPAbet:
|
||||
while len(text):
|
||||
m = _curly_re.match(text)
|
||||
while text:
|
||||
m = _CURLY_RE.match(text)
|
||||
if not m:
|
||||
sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
|
||||
break
|
||||
|
@ -105,8 +105,8 @@ def sequence_to_text(sequence):
|
|||
'''Converts a sequence of IDs back to a string'''
|
||||
result = ''
|
||||
for symbol_id in sequence:
|
||||
if symbol_id in _id_to_symbol:
|
||||
s = _id_to_symbol[symbol_id]
|
||||
if symbol_id in _ID_TO_SYMBOL:
|
||||
s = _ID_TO_SYMBOL[symbol_id]
|
||||
# Enclose ARPAbet back in curly braces:
|
||||
if len(s) > 1 and s[0] == '@':
|
||||
s = '{%s}' % s[1:]
|
||||
|
@ -123,12 +123,12 @@ def _clean_text(text, cleaner_names):
|
|||
return text
|
||||
|
||||
|
||||
def _symbols_to_sequence(symbols):
|
||||
return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
|
||||
def _symbols_to_sequence(syms):
|
||||
return [_SYMBOL_TO_ID[s] for s in syms if _should_keep_symbol(s)]
|
||||
|
||||
|
||||
def _phoneme_to_sequence(phonemes):
|
||||
return [_phonemes_to_id[s] for s in list(phonemes) if _should_keep_phoneme(s)]
|
||||
def _phoneme_to_sequence(phons):
|
||||
return [_PHONEMES_TO_ID[s] for s in list(phons) if _should_keep_phoneme(s)]
|
||||
|
||||
|
||||
def _arpabet_to_sequence(text):
|
||||
|
@ -136,8 +136,8 @@ def _arpabet_to_sequence(text):
|
|||
|
||||
|
||||
def _should_keep_symbol(s):
|
||||
return s in _symbol_to_id and s not in ['~', '^', '_']
|
||||
return s in _SYMBOL_TO_ID and s not in ['~', '^', '_']
|
||||
|
||||
|
||||
def _should_keep_phoneme(p):
|
||||
return p in _phonemes_to_id and p not in ['~', '^', '_']
|
||||
return p in _PHONEMES_TO_ID and p not in ['~', '^', '_']
|
||||
|
|
|
@ -2,16 +2,16 @@
|
|||
|
||||
import re
|
||||
|
||||
# valid_symbols = [
|
||||
# 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
|
||||
# 'AH2', 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0',
|
||||
# 'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0',
|
||||
# 'ER1', 'ER2', 'EY', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0',
|
||||
# 'IH1', 'IH2', 'IY', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG',
|
||||
# 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH',
|
||||
# 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W',
|
||||
# 'Y', 'Z', 'ZH'
|
||||
# ]
|
||||
VALID_SYMBOLS = [
|
||||
'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
|
||||
'AH2', 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0',
|
||||
'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0',
|
||||
'ER1', 'ER2', 'EY', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0',
|
||||
'IH1', 'IH2', 'IY', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG',
|
||||
'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH',
|
||||
'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W',
|
||||
'Y', 'Z', 'ZH'
|
||||
]
|
||||
|
||||
|
||||
class CMUDict:
|
||||
|
@ -37,19 +37,19 @@ class CMUDict:
|
|||
'''Returns list of ARPAbet pronunciations of the given word.'''
|
||||
return self._entries.get(word.upper())
|
||||
|
||||
def get_arpabet(self, word, cmudict, punctuation_symbols):
|
||||
@staticmethod
|
||||
def get_arpabet(word, cmudict, punctuation_symbols):
|
||||
first_symbol, last_symbol = '', ''
|
||||
if len(word) > 0 and word[0] in punctuation_symbols:
|
||||
if word and word[0] in punctuation_symbols:
|
||||
first_symbol = word[0]
|
||||
word = word[1:]
|
||||
if len(word) > 0 and word[-1] in punctuation_symbols:
|
||||
if word and word[-1] in punctuation_symbols:
|
||||
last_symbol = word[-1]
|
||||
word = word[:-1]
|
||||
arpabet = cmudict.lookup(word)
|
||||
if arpabet is not None:
|
||||
return first_symbol + '{%s}' % arpabet[0] + last_symbol
|
||||
else:
|
||||
return first_symbol + word + last_symbol
|
||||
return first_symbol + word + last_symbol
|
||||
|
||||
|
||||
_alt_re = re.compile(r'\([0-9]+\)')
|
||||
|
@ -58,7 +58,7 @@ _alt_re = re.compile(r'\([0-9]+\)')
|
|||
def _parse_cmudict(file):
|
||||
cmudict = {}
|
||||
for line in file:
|
||||
if len(line) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"):
|
||||
if line and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"):
|
||||
parts = line.split(' ')
|
||||
word = re.sub(_alt_re, '', parts[0])
|
||||
pronunciation = _get_pronunciation(parts[1])
|
||||
|
@ -73,6 +73,6 @@ def _parse_cmudict(file):
|
|||
def _get_pronunciation(s):
|
||||
parts = s.strip().split(' ')
|
||||
for part in parts:
|
||||
if part not in _valid_symbol_set:
|
||||
if part not in VALID_SYMBOLS:
|
||||
return None
|
||||
return ' '.join(parts)
|
||||
|
|
|
@ -66,14 +66,13 @@ def _expand_dollars(m):
|
|||
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
|
||||
cent_unit = 'cent' if cents == 1 else 'cents'
|
||||
return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
|
||||
elif dollars:
|
||||
if dollars:
|
||||
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
|
||||
return '%s %s' % (dollars, dollar_unit)
|
||||
elif cents:
|
||||
if cents:
|
||||
cent_unit = 'cent' if cents == 1 else 'cents'
|
||||
return '%s %s' % (cents, cent_unit)
|
||||
else:
|
||||
return 'zero dollars'
|
||||
return 'zero dollars'
|
||||
|
||||
|
||||
def _standard_number_to_words(n, digit_group):
|
||||
|
@ -99,12 +98,11 @@ def _number_to_words(n):
|
|||
# Handle special cases first, then go to the standard case:
|
||||
if n >= 1000000000000000000:
|
||||
return str(n) # Too large, just return the digits
|
||||
elif n == 0:
|
||||
if n == 0:
|
||||
return 'zero'
|
||||
elif n % 100 == 0 and n % 1000 != 0 and n < 3000:
|
||||
if n % 100 == 0 and n % 1000 != 0 and n < 3000:
|
||||
return _standard_number_to_words(n // 100, 0) + ' hundred'
|
||||
else:
|
||||
return _standard_number_to_words(n, 0)
|
||||
return _standard_number_to_words(n, 0)
|
||||
|
||||
|
||||
def _expand_number(m):
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import numpy as np
|
||||
import librosa
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
@ -49,7 +48,7 @@ def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CON
|
|||
print(text)
|
||||
plt.yticks(range(len(text)), list(text))
|
||||
plt.colorbar()
|
||||
|
||||
|
||||
stop_tokens = stop_tokens.squeeze().detach().to('cpu').numpy()
|
||||
plt.subplot(num_plot, 1, 2)
|
||||
plt.plot(range(len(stop_tokens)), list(stop_tokens))
|
||||
|
@ -65,12 +64,12 @@ def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CON
|
|||
if spectrogram is not None:
|
||||
plt.subplot(num_plot, 1, 4)
|
||||
librosa.display.specshow(spectrogram.T, sr=CONFIG.audio['sample_rate'],
|
||||
hop_length=hop_length, x_axis="time", y_axis="linear")
|
||||
hop_length=hop_length, x_axis="time", y_axis="linear")
|
||||
plt.xlabel("Time", fontsize=label_fontsize)
|
||||
plt.ylabel("Hz", fontsize=label_fontsize)
|
||||
plt.tight_layout()
|
||||
plt.colorbar()
|
||||
|
||||
|
||||
if output_path:
|
||||
print(output_path)
|
||||
fig.savefig(output_path)
|
||||
|
|
Загрузка…
Ссылка в новой задаче