Fix typos discovered by codespell (#6)

This commit is contained in:
Christian Clauss 2021-09-28 03:20:34 +02:00 коммит произвёл GitHub
Родитель e63f9cff45
Коммит 584a0b272a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 16 добавлений и 16 удалений

Просмотреть файл

@ -85,7 +85,7 @@ To generate by our provided pretrained DeepRapper, first unzip the pretrained De
│   └── config.json
```
Finaly, run the following command to generate:
Finally, run the following command to generate:
```bash
bash generate_from_pretrain.sh
```

Просмотреть файл

@ -258,7 +258,7 @@ def _control_rhymes(node, probs, tokenizer, beater, pinyin_dict, rhyme_words_lis
rhyme_words = node.rhyme[0]
for w in rhyme_words:
probs = _rescale_rhymes(probs, w, tokenizer, beater, pinyin_dict, alpha)
else: # detect the begining of new sentences
else: # detect the beginning of new sentences
last_token_id = node.wordid[0][0][-1] # d2:batch size
last_token = tokenizer.convert_ids_to_tokens([last_token_id])[0]

Просмотреть файл

@ -92,9 +92,9 @@ def main():
parser.add_argument('--reverse', action='store_true', help='whether to use reverse language model')
parser.add_argument('--with_beat', action='store_true', help='whether to generate beats')
parser.add_argument('--beat_mode', default=0, type=int, help='beat mode0.no control2.global3.local', required=False)
parser.add_argument('--tempo', default=1, type=int, required=False, help='pace of beats:0-local controll; 1-slow; 2-medium; 3-fast')
parser.add_argument('--tempo', default=1, type=int, required=False, help='pace of beats:0-local control; 1-slow; 2-medium; 3-fast')
# beam seach param
# beam search param
parser.add_argument('--beam_width', default=2, type=int, required=False, help='beam width')
parser.add_argument('--beam_samples_num', default=5, type=int, required=False, help='beam searching samples')
parser.add_argument('--beam_sample_select_sg', default='sample', type=str, required=False,

Просмотреть файл

@ -120,7 +120,7 @@ class BertTokenizer(PreTrainedTokenizer):
Only has an effect when do_basic_tokenize=True
**tokenize_chinese_chars**: (`optional`) boolean (default True)
Whether to tokenize Chinese characters.
This should likely be desactivated for Japanese:
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
super(BertTokenizer, self).__init__(unk_token=unk_token, sep_token=sep_token,
@ -215,7 +215,7 @@ class BasicTokenizer(object):
List of token not to split.
**tokenize_chinese_chars**: (`optional`) boolean (default True)
Whether to tokenize Chinese characters.
This should likely be desactivated for Japanese:
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
if never_split is None:
@ -401,7 +401,7 @@ class WordpieceTokenizer(object):
def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# \t, \n, and \r are technically control characters but we treat them
# as whitespace since they are generally considered as such.
if char == " " or char == "\t" or char == "\n" or char == "\r":
return True

Просмотреть файл

@ -123,7 +123,7 @@ class BertTokenizer(PreTrainedTokenizer):
Only has an effect when do_basic_tokenize=True
**tokenize_chinese_chars**: (`optional`) boolean (default True)
Whether to tokenize Chinese characters.
This should likely be desactivated for Japanese:
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
super(BertTokenizer, self).__init__(unk_token=unk_token, sep_token=sep_token,
@ -218,7 +218,7 @@ class BasicTokenizer(object):
List of token not to split.
**tokenize_chinese_chars**: (`optional`) boolean (default True)
Whether to tokenize Chinese characters.
This should likely be desactivated for Japanese:
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
if never_split is None:
@ -416,7 +416,7 @@ class WordpieceTokenizer(object):
def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# \t, \n, and \r are technically control characters but we treat them
# as whitespace since they are generally considered as such.
if char == " " or char == "\t" or char == "\n" or char == "\r":
return True

Просмотреть файл

@ -6,7 +6,7 @@ PREFIX=$1
[[ -d "${PREFIX}_data_bin" ]] && { echo "output directory ${PREFIX}_data_bin already exists" ; exit 1; }
for i in {0..4}
do
echo "Proccessing fold $i"
echo "Processing fold $i"
mkdir -p ${PREFIX}_data_bin/$i
fairseq-preprocess \
--only-source \

Просмотреть файл

@ -132,7 +132,7 @@ metadata_dir # metadata information of dataset, e.g., data/speech/pho
output_duration_dir # output directory of duration-augmented wav, e.g., data/duration
output_pitch_dir # output directory of pitch-augmented wav, e.g., data/pitch
output_pdaugment_dir # output directory of PDAugmented wav, e.g., data/pdaugment
selected_dir # select directory for training, validing and testing, e.g., ${output_pdaugment_dir}
selected_dir # select directory for training, validating and testing, e.g., ${output_pdaugment_dir}
```
You can augment the speech dataset with multiple threads by setting:

Просмотреть файл

@ -13,7 +13,7 @@ import os
def remove_empty_track(midi_file):
'''
1. read pretty midi data
2. remove emtpy track,
2. remove empty track,
also remove track with fewer than 10% notes of the track
with most notes
********

Просмотреть файл

@ -60,7 +60,7 @@ _DEFAULT_TIME_SIGNATURE_CHORDS_PER_BAR = {
def steps_per_bar_in_quantized_sequence(steps_per_quarter=4):
"""Calculates steps per bar in a NoteSequence that has been quantized. assum time signature is 4/4
"""Calculates steps per bar in a NoteSequence that has been quantized. assume time signature is 4/4
Returns:
Steps per bar as a floating point number.
"""

Просмотреть файл

@ -115,7 +115,7 @@ class SongSequenceGenerator(SequenceGenerator):
raise Exception("expected src_tokens or source in net input")
# bsz: total number of sentences in beam
# Note that src_tokens may have more than 2 dimenions (i.e. audio features)
# Note that src_tokens may have more than 2 dimensions (i.e. audio features)
bsz, src_len = src_tokens.size()[:2]
beam_size = self.beam_size
# Get sentence numbers
@ -176,7 +176,7 @@ class SongSequenceGenerator(SequenceGenerator):
finalized = torch.jit.annotate(
List[List[Dict[str, Tensor]]],
[torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step
) # contains lists of dictionaries of information about the hypothesis being finalized at each step
finished = [
False for i in range(bsz)