Fix word pipleline (#749)

This commit is contained in:
co63oc
2024-02-21 14:35:11 +08:00
committed by GitHub
parent aaedf05181
commit 473bf33705
10 changed files with 16 additions and 16 deletions

View File

@@ -16,7 +16,7 @@ Second internal release.
* add palm2.0 * add palm2.0
* add space model * add space model
* add MPLUG model * add MPLUG model
* add dialog_intent, dialog_modeling, dialog state tracking pipleline * add dialog_intent, dialog_modeling, dialog state tracking pipeline
* add maskedlm model and fill_mask pipeline * add maskedlm model and fill_mask pipeline
* add nli pipeline * add nli pipeline
* add sentence similarity pipeline * add sentence similarity pipeline
@@ -28,7 +28,7 @@ Second internal release.
#### Audio #### Audio
* add tts pipeline * add tts pipeline
* add kws kwsbp pipline * add kws kwsbp pipeline
* add linear aec pipeline * add linear aec pipeline
* add ans pipeline * add ans pipeline

View File

@@ -157,7 +157,7 @@ def whitespace_tokenize(text):
class FullTokenizer(object): class FullTokenizer(object):
"""Runs end-to-end tokenziation.""" """Runs end-to-end tokenization."""
def __init__(self, vocab_file, do_lower_case=True): def __init__(self, vocab_file, do_lower_case=True):
self.vocab = load_vocab(vocab_file) self.vocab = load_vocab(vocab_file)
@@ -185,7 +185,7 @@ class FullTokenizer(object):
def clean_up_tokenization(out_string): def clean_up_tokenization(out_string):
""" Clean up a list of simple English tokenization artifacts """ Clean up a list of simple English tokenization artifacts
like spaces before punctuations and abreviated forms. like spaces before punctuations and abbreviated forms.
""" """
out_string = ( out_string = (
out_string.replace(' .', '.').replace(' ?', '?').replace( out_string.replace(' .', '.').replace(' ?', '?').replace(
@@ -321,7 +321,7 @@ class BasicTokenizer(object):
class WordpieceTokenizer(object): class WordpieceTokenizer(object):
"""Runs WordPiece tokenziation.""" """Runs WordPiece tokenization."""
def __init__(self, vocab, unk_token='[UNK]', max_input_chars_per_word=200): def __init__(self, vocab, unk_token='[UNK]', max_input_chars_per_word=200):
self.vocab = vocab self.vocab = vocab
@@ -384,7 +384,7 @@ class WordpieceTokenizer(object):
def _is_whitespace(char): def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character.""" """Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them # \t, \n, and \r are technically control characters but we treat them
# as whitespace since they are generally considered as such. # as whitespace since they are generally considered as such.
if char == ' ' or char == '\t' or char == '\n' or char == '\r': if char == ' ' or char == '\t' or char == '\n' or char == '\r':
return True return True

View File

@@ -37,7 +37,7 @@ class BertConfig(object):
layer in the Transformer encoder. layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported. encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention attention_probs_dropout_prob: The dropout ratio for the attention
probabilities. probabilities.

View File

@@ -485,7 +485,7 @@ class BertModel(BertPreTrainedModel):
head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze( head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(
-1) # We can specify head_mask for each layer -1) # We can specify head_mask for each layer
head_mask = head_mask.to(dtype=next(self.parameters( head_mask = head_mask.to(dtype=next(self.parameters(
)).dtype) # switch to fload if need + fp16 compatibility )).dtype) # switch to float if need + fp16 compatibility
else: else:
head_mask = [None] * self.config.num_hidden_layers head_mask = [None] * self.config.num_hidden_layers

View File

@@ -79,7 +79,7 @@ class BertConfig(object):
layer in the Transformer encoder. layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. encoder and pooler.
hidden_dropout_prob: The dropout probabilitiy for all fully connected hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention attention_probs_dropout_prob: The dropout ratio for the attention
probabilities. probabilities.

View File

@@ -51,7 +51,7 @@ class CrossConfig(PreCrossConfig):
layer in the Transformer encoder. layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported. encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention attention_probs_dropout_prob: The dropout ratio for the attention
probabilities. probabilities.

View File

@@ -203,7 +203,7 @@ class BertConfig(object):
layer in the Transformer encoder. layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported. encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention attention_probs_dropout_prob: The dropout ratio for the attention
probabilities. probabilities.
@@ -743,7 +743,7 @@ class BertPreTrainingHeads(nn.Module):
class PreTrainedBertModel(nn.Module): class PreTrainedBertModel(nn.Module):
""" An abstract class to handle weights initialization and """ An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models. a simple interface for downloading and loading pretrained models.
""" """
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
@@ -799,7 +799,7 @@ class PreTrainedBertModel(nn.Module):
. `bert_config.json` a configuration file for the model . `bert_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models
*inputs, **kwargs: additional input for the specific Bert class *inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification) (ex: num_labels for BertForSequenceClassification)
""" # noqa """ # noqa

View File

@@ -155,7 +155,7 @@ class ParallelSelfAttention(torch.nn.Module):
"""Parallel self-attention layer for GPT2. """Parallel self-attention layer for GPT2.
Self-attention layer takes input with size [b, s, h] where b is Self-attention layer takes input with size [b, s, h] where b is
the batch size, s is the sequence lenght, and h is the hidden size the batch size, s is the sequence length, and h is the hidden size
and creates output of the same size. and creates output of the same size.
Arguments: Arguments:
hidden_size: total hidden size of the layer (h). hidden_size: total hidden size of the layer (h).

View File

@@ -656,7 +656,7 @@ class PreTrainedBertModel(nn.Module):
. `bert_config.json` a configuration file for the model . `bert_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of Google pre-trained models to use instead of Google pre-trained models
*inputs, **kwargs: additional input for the specific Bert class *inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification) (ex: num_labels for BertForSequenceClassification)

View File

@@ -52,7 +52,7 @@ class SpaceTCnConfig(object):
layer in the Transformer encoder. layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported. encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention attention_probs_dropout_prob: The dropout ratio for the attention
probabilities. probabilities.