Fix word pipleline (#749)

This commit is contained in:
co63oc
2024-02-21 14:35:11 +08:00
committed by GitHub
parent aaedf05181
commit 473bf33705
10 changed files with 16 additions and 16 deletions

View File

@@ -16,7 +16,7 @@ Second internal release.
* add palm2.0
* add space model
* add MPLUG model
* add dialog_intent, dialog_modeling, dialog state tracking pipleline
* add dialog_intent, dialog_modeling, dialog state tracking pipeline
* add maskedlm model and fill_mask pipeline
* add nli pipeline
* add sentence similarity pipeline
@@ -28,7 +28,7 @@ Second internal release.
#### Audio
* add tts pipeline
* add kws kwsbp pipline
* add kws kwsbp pipeline
* add linear aec pipeline
* add ans pipeline

View File

@@ -157,7 +157,7 @@ def whitespace_tokenize(text):
class FullTokenizer(object):
"""Runs end-to-end tokenziation."""
"""Runs end-to-end tokenization."""
def __init__(self, vocab_file, do_lower_case=True):
self.vocab = load_vocab(vocab_file)
@@ -185,7 +185,7 @@ class FullTokenizer(object):
def clean_up_tokenization(out_string):
""" Clean up a list of simple English tokenization artifacts
like spaces before punctuations and abreviated forms.
like spaces before punctuations and abbreviated forms.
"""
out_string = (
out_string.replace(' .', '.').replace(' ?', '?').replace(
@@ -321,7 +321,7 @@ class BasicTokenizer(object):
class WordpieceTokenizer(object):
"""Runs WordPiece tokenziation."""
"""Runs WordPiece tokenization."""
def __init__(self, vocab, unk_token='[UNK]', max_input_chars_per_word=200):
self.vocab = vocab
@@ -384,7 +384,7 @@ class WordpieceTokenizer(object):
def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# \t, \n, and \r are technically control characters but we treat them
# as whitespace since they are generally considered as such.
if char == ' ' or char == '\t' or char == '\n' or char == '\r':
return True

View File

@@ -37,7 +37,7 @@ class BertConfig(object):
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.

View File

@@ -485,7 +485,7 @@ class BertModel(BertPreTrainedModel):
head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(
-1) # We can specify head_mask for each layer
head_mask = head_mask.to(dtype=next(self.parameters(
)).dtype) # switch to fload if need + fp16 compatibility
)).dtype) # switch to float if need + fp16 compatibility
else:
head_mask = [None] * self.config.num_hidden_layers

View File

@@ -79,7 +79,7 @@ class BertConfig(object):
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.

View File

@@ -51,7 +51,7 @@ class CrossConfig(PreCrossConfig):
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.

View File

@@ -203,7 +203,7 @@ class BertConfig(object):
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.
@@ -743,7 +743,7 @@ class BertPreTrainingHeads(nn.Module):
class PreTrainedBertModel(nn.Module):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
a simple interface for downloading and loading pretrained models.
"""
def __init__(self, config, *inputs, **kwargs):
@@ -799,7 +799,7 @@ class PreTrainedBertModel(nn.Module):
. `bert_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification)
""" # noqa

View File

@@ -155,7 +155,7 @@ class ParallelSelfAttention(torch.nn.Module):
"""Parallel self-attention layer for GPT2.
Self-attention layer takes input with size [b, s, h] where b is
the batch size, s is the sequence lenght, and h is the hidden size
the batch size, s is the sequence length, and h is the hidden size
and creates output of the same size.
Arguments:
hidden_size: total hidden size of the layer (h).

View File

@@ -656,7 +656,7 @@ class PreTrainedBertModel(nn.Module):
. `bert_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object)
state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of Google pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification)

View File

@@ -52,7 +52,7 @@ class SpaceTCnConfig(object):
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.