зеркало из https://github.com/microsoft/muzic.git
Fix typos discovered by codespell (#6)
This commit is contained in:
Родитель
e63f9cff45
Коммит
584a0b272a
|
@ -85,7 +85,7 @@ To generate by our provided pretrained DeepRapper, first unzip the pretrained De
|
|||
│ └── config.json
|
||||
```
|
||||
|
||||
Finaly, run the following command to generate:
|
||||
Finally, run the following command to generate:
|
||||
```bash
|
||||
bash generate_from_pretrain.sh
|
||||
```
|
||||
|
|
|
@ -258,7 +258,7 @@ def _control_rhymes(node, probs, tokenizer, beater, pinyin_dict, rhyme_words_lis
|
|||
rhyme_words = node.rhyme[0]
|
||||
for w in rhyme_words:
|
||||
probs = _rescale_rhymes(probs, w, tokenizer, beater, pinyin_dict, alpha)
|
||||
else: # detect the begining of new sentences
|
||||
else: # detect the beginning of new sentences
|
||||
last_token_id = node.wordid[0][0][-1] # d2:batch size
|
||||
last_token = tokenizer.convert_ids_to_tokens([last_token_id])[0]
|
||||
|
||||
|
|
|
@ -92,9 +92,9 @@ def main():
|
|||
parser.add_argument('--reverse', action='store_true', help='whether to use reverse language model')
|
||||
parser.add_argument('--with_beat', action='store_true', help='whether to generate beats')
|
||||
parser.add_argument('--beat_mode', default=0, type=int, help='beat mode:0.no control;2.global;3.local', required=False)
|
||||
parser.add_argument('--tempo', default=1, type=int, required=False, help='pace of beats:0-local controll; 1-slow; 2-medium; 3-fast')
|
||||
parser.add_argument('--tempo', default=1, type=int, required=False, help='pace of beats:0-local control; 1-slow; 2-medium; 3-fast')
|
||||
|
||||
# beam seach param
|
||||
# beam search param
|
||||
parser.add_argument('--beam_width', default=2, type=int, required=False, help='beam width')
|
||||
parser.add_argument('--beam_samples_num', default=5, type=int, required=False, help='beam searching samples')
|
||||
parser.add_argument('--beam_sample_select_sg', default='sample', type=str, required=False,
|
||||
|
|
|
@ -120,7 +120,7 @@ class BertTokenizer(PreTrainedTokenizer):
|
|||
Only has an effect when do_basic_tokenize=True
|
||||
**tokenize_chinese_chars**: (`optional`) boolean (default True)
|
||||
Whether to tokenize Chinese characters.
|
||||
This should likely be desactivated for Japanese:
|
||||
This should likely be deactivated for Japanese:
|
||||
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
|
||||
"""
|
||||
super(BertTokenizer, self).__init__(unk_token=unk_token, sep_token=sep_token,
|
||||
|
@ -215,7 +215,7 @@ class BasicTokenizer(object):
|
|||
List of token not to split.
|
||||
**tokenize_chinese_chars**: (`optional`) boolean (default True)
|
||||
Whether to tokenize Chinese characters.
|
||||
This should likely be desactivated for Japanese:
|
||||
This should likely be deactivated for Japanese:
|
||||
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
|
||||
"""
|
||||
if never_split is None:
|
||||
|
@ -401,7 +401,7 @@ class WordpieceTokenizer(object):
|
|||
|
||||
def _is_whitespace(char):
|
||||
"""Checks whether `chars` is a whitespace character."""
|
||||
# \t, \n, and \r are technically contorl characters but we treat them
|
||||
# \t, \n, and \r are technically control characters but we treat them
|
||||
# as whitespace since they are generally considered as such.
|
||||
if char == " " or char == "\t" or char == "\n" or char == "\r":
|
||||
return True
|
||||
|
|
|
@ -123,7 +123,7 @@ class BertTokenizer(PreTrainedTokenizer):
|
|||
Only has an effect when do_basic_tokenize=True
|
||||
**tokenize_chinese_chars**: (`optional`) boolean (default True)
|
||||
Whether to tokenize Chinese characters.
|
||||
This should likely be desactivated for Japanese:
|
||||
This should likely be deactivated for Japanese:
|
||||
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
|
||||
"""
|
||||
super(BertTokenizer, self).__init__(unk_token=unk_token, sep_token=sep_token,
|
||||
|
@ -218,7 +218,7 @@ class BasicTokenizer(object):
|
|||
List of token not to split.
|
||||
**tokenize_chinese_chars**: (`optional`) boolean (default True)
|
||||
Whether to tokenize Chinese characters.
|
||||
This should likely be desactivated for Japanese:
|
||||
This should likely be deactivated for Japanese:
|
||||
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
|
||||
"""
|
||||
if never_split is None:
|
||||
|
@ -416,7 +416,7 @@ class WordpieceTokenizer(object):
|
|||
|
||||
def _is_whitespace(char):
|
||||
"""Checks whether `chars` is a whitespace character."""
|
||||
# \t, \n, and \r are technically contorl characters but we treat them
|
||||
# \t, \n, and \r are technically control characters but we treat them
|
||||
# as whitespace since they are generally considered as such.
|
||||
if char == " " or char == "\t" or char == "\n" or char == "\r":
|
||||
return True
|
||||
|
|
|
@ -6,7 +6,7 @@ PREFIX=$1
|
|||
[[ -d "${PREFIX}_data_bin" ]] && { echo "output directory ${PREFIX}_data_bin already exists" ; exit 1; }
|
||||
for i in {0..4}
|
||||
do
|
||||
echo "Proccessing fold $i"
|
||||
echo "Processing fold $i"
|
||||
mkdir -p ${PREFIX}_data_bin/$i
|
||||
fairseq-preprocess \
|
||||
--only-source \
|
||||
|
|
|
@ -132,7 +132,7 @@ metadata_dir # metadata information of dataset, e.g., data/speech/pho
|
|||
output_duration_dir # output directory of duration-augmented wav, e.g., data/duration
|
||||
output_pitch_dir # output directory of pitch-augmented wav, e.g., data/pitch
|
||||
output_pdaugment_dir # output directory of PDAugmented wav, e.g., data/pdaugment
|
||||
selected_dir # select directory for training, validing and testing, e.g., ${output_pdaugment_dir}
|
||||
selected_dir # select directory for training, validating and testing, e.g., ${output_pdaugment_dir}
|
||||
```
|
||||
|
||||
You can augment the speech dataset with multiple threads by setting:
|
||||
|
|
|
@ -13,7 +13,7 @@ import os
|
|||
def remove_empty_track(midi_file):
|
||||
'''
|
||||
1. read pretty midi data
|
||||
2. remove emtpy track,
|
||||
2. remove empty track,
|
||||
also remove track with fewer than 10% notes of the track
|
||||
with most notes
|
||||
********
|
||||
|
|
|
@ -60,7 +60,7 @@ _DEFAULT_TIME_SIGNATURE_CHORDS_PER_BAR = {
|
|||
|
||||
|
||||
def steps_per_bar_in_quantized_sequence(steps_per_quarter=4):
|
||||
"""Calculates steps per bar in a NoteSequence that has been quantized. assum time signature is 4/4
|
||||
"""Calculates steps per bar in a NoteSequence that has been quantized. assume time signature is 4/4
|
||||
Returns:
|
||||
Steps per bar as a floating point number.
|
||||
"""
|
||||
|
|
|
@ -115,7 +115,7 @@ class SongSequenceGenerator(SequenceGenerator):
|
|||
raise Exception("expected src_tokens or source in net input")
|
||||
|
||||
# bsz: total number of sentences in beam
|
||||
# Note that src_tokens may have more than 2 dimenions (i.e. audio features)
|
||||
# Note that src_tokens may have more than 2 dimensions (i.e. audio features)
|
||||
bsz, src_len = src_tokens.size()[:2]
|
||||
beam_size = self.beam_size
|
||||
# Get sentence numbers
|
||||
|
@ -176,7 +176,7 @@ class SongSequenceGenerator(SequenceGenerator):
|
|||
finalized = torch.jit.annotate(
|
||||
List[List[Dict[str, Tensor]]],
|
||||
[torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
|
||||
) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step
|
||||
) # contains lists of dictionaries of information about the hypothesis being finalized at each step
|
||||
|
||||
finished = [
|
||||
False for i in range(bsz)
|
||||
|
|
Загрузка…
Ссылка в новой задаче