mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
Fix word pipleline (#749)
This commit is contained in:
@@ -16,7 +16,7 @@ Second internal release.
|
|||||||
* add palm2.0
|
* add palm2.0
|
||||||
* add space model
|
* add space model
|
||||||
* add MPLUG model
|
* add MPLUG model
|
||||||
* add dialog_intent, dialog_modeling, dialog state tracking pipleline
|
* add dialog_intent, dialog_modeling, dialog state tracking pipeline
|
||||||
* add maskedlm model and fill_mask pipeline
|
* add maskedlm model and fill_mask pipeline
|
||||||
* add nli pipeline
|
* add nli pipeline
|
||||||
* add sentence similarity pipeline
|
* add sentence similarity pipeline
|
||||||
@@ -28,7 +28,7 @@ Second internal release.
|
|||||||
|
|
||||||
#### Audio
|
#### Audio
|
||||||
* add tts pipeline
|
* add tts pipeline
|
||||||
* add kws kwsbp pipline
|
* add kws kwsbp pipeline
|
||||||
* add linear aec pipeline
|
* add linear aec pipeline
|
||||||
* add ans pipeline
|
* add ans pipeline
|
||||||
|
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ def whitespace_tokenize(text):
|
|||||||
|
|
||||||
|
|
||||||
class FullTokenizer(object):
|
class FullTokenizer(object):
|
||||||
"""Runs end-to-end tokenziation."""
|
"""Runs end-to-end tokenization."""
|
||||||
|
|
||||||
def __init__(self, vocab_file, do_lower_case=True):
|
def __init__(self, vocab_file, do_lower_case=True):
|
||||||
self.vocab = load_vocab(vocab_file)
|
self.vocab = load_vocab(vocab_file)
|
||||||
@@ -185,7 +185,7 @@ class FullTokenizer(object):
|
|||||||
|
|
||||||
def clean_up_tokenization(out_string):
|
def clean_up_tokenization(out_string):
|
||||||
""" Clean up a list of simple English tokenization artifacts
|
""" Clean up a list of simple English tokenization artifacts
|
||||||
like spaces before punctuations and abreviated forms.
|
like spaces before punctuations and abbreviated forms.
|
||||||
"""
|
"""
|
||||||
out_string = (
|
out_string = (
|
||||||
out_string.replace(' .', '.').replace(' ?', '?').replace(
|
out_string.replace(' .', '.').replace(' ?', '?').replace(
|
||||||
@@ -321,7 +321,7 @@ class BasicTokenizer(object):
|
|||||||
|
|
||||||
|
|
||||||
class WordpieceTokenizer(object):
|
class WordpieceTokenizer(object):
|
||||||
"""Runs WordPiece tokenziation."""
|
"""Runs WordPiece tokenization."""
|
||||||
|
|
||||||
def __init__(self, vocab, unk_token='[UNK]', max_input_chars_per_word=200):
|
def __init__(self, vocab, unk_token='[UNK]', max_input_chars_per_word=200):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
@@ -384,7 +384,7 @@ class WordpieceTokenizer(object):
|
|||||||
|
|
||||||
def _is_whitespace(char):
|
def _is_whitespace(char):
|
||||||
"""Checks whether `chars` is a whitespace character."""
|
"""Checks whether `chars` is a whitespace character."""
|
||||||
# \t, \n, and \r are technically contorl characters but we treat them
|
# \t, \n, and \r are technically control characters but we treat them
|
||||||
# as whitespace since they are generally considered as such.
|
# as whitespace since they are generally considered as such.
|
||||||
if char == ' ' or char == '\t' or char == '\n' or char == '\r':
|
if char == ' ' or char == '\t' or char == '\n' or char == '\r':
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class BertConfig(object):
|
|||||||
layer in the Transformer encoder.
|
layer in the Transformer encoder.
|
||||||
hidden_act: The non-linear activation function (function or string) in the
|
hidden_act: The non-linear activation function (function or string) in the
|
||||||
encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
|
encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
|
||||||
hidden_dropout_prob: The dropout probabilitiy for all fully connected
|
hidden_dropout_prob: The dropout probability for all fully connected
|
||||||
layers in the embeddings, encoder, and pooler.
|
layers in the embeddings, encoder, and pooler.
|
||||||
attention_probs_dropout_prob: The dropout ratio for the attention
|
attention_probs_dropout_prob: The dropout ratio for the attention
|
||||||
probabilities.
|
probabilities.
|
||||||
|
|||||||
@@ -485,7 +485,7 @@ class BertModel(BertPreTrainedModel):
|
|||||||
head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(
|
head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(
|
||||||
-1) # We can specify head_mask for each layer
|
-1) # We can specify head_mask for each layer
|
||||||
head_mask = head_mask.to(dtype=next(self.parameters(
|
head_mask = head_mask.to(dtype=next(self.parameters(
|
||||||
)).dtype) # switch to fload if need + fp16 compatibility
|
)).dtype) # switch to float if need + fp16 compatibility
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.config.num_hidden_layers
|
head_mask = [None] * self.config.num_hidden_layers
|
||||||
|
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class BertConfig(object):
|
|||||||
layer in the Transformer encoder.
|
layer in the Transformer encoder.
|
||||||
hidden_act: The non-linear activation function (function or string) in the
|
hidden_act: The non-linear activation function (function or string) in the
|
||||||
encoder and pooler.
|
encoder and pooler.
|
||||||
hidden_dropout_prob: The dropout probabilitiy for all fully connected
|
hidden_dropout_prob: The dropout probability for all fully connected
|
||||||
layers in the embeddings, encoder, and pooler.
|
layers in the embeddings, encoder, and pooler.
|
||||||
attention_probs_dropout_prob: The dropout ratio for the attention
|
attention_probs_dropout_prob: The dropout ratio for the attention
|
||||||
probabilities.
|
probabilities.
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class CrossConfig(PreCrossConfig):
|
|||||||
layer in the Transformer encoder.
|
layer in the Transformer encoder.
|
||||||
hidden_act: The non-linear activation function (function or string) in the
|
hidden_act: The non-linear activation function (function or string) in the
|
||||||
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
|
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
|
||||||
hidden_dropout_prob: The dropout probabilitiy for all fully connected
|
hidden_dropout_prob: The dropout probability for all fully connected
|
||||||
layers in the embeddings, encoder, and pooler.
|
layers in the embeddings, encoder, and pooler.
|
||||||
attention_probs_dropout_prob: The dropout ratio for the attention
|
attention_probs_dropout_prob: The dropout ratio for the attention
|
||||||
probabilities.
|
probabilities.
|
||||||
|
|||||||
@@ -203,7 +203,7 @@ class BertConfig(object):
|
|||||||
layer in the Transformer encoder.
|
layer in the Transformer encoder.
|
||||||
hidden_act: The non-linear activation function (function or string) in the
|
hidden_act: The non-linear activation function (function or string) in the
|
||||||
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
|
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
|
||||||
hidden_dropout_prob: The dropout probabilitiy for all fully connected
|
hidden_dropout_prob: The dropout probability for all fully connected
|
||||||
layers in the embeddings, encoder, and pooler.
|
layers in the embeddings, encoder, and pooler.
|
||||||
attention_probs_dropout_prob: The dropout ratio for the attention
|
attention_probs_dropout_prob: The dropout ratio for the attention
|
||||||
probabilities.
|
probabilities.
|
||||||
@@ -743,7 +743,7 @@ class BertPreTrainingHeads(nn.Module):
|
|||||||
|
|
||||||
class PreTrainedBertModel(nn.Module):
|
class PreTrainedBertModel(nn.Module):
|
||||||
""" An abstract class to handle weights initialization and
|
""" An abstract class to handle weights initialization and
|
||||||
a simple interface for dowloading and loading pretrained models.
|
a simple interface for downloading and loading pretrained models.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config, *inputs, **kwargs):
|
def __init__(self, config, *inputs, **kwargs):
|
||||||
@@ -799,7 +799,7 @@ class PreTrainedBertModel(nn.Module):
|
|||||||
. `bert_config.json` a configuration file for the model
|
. `bert_config.json` a configuration file for the model
|
||||||
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
|
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
|
||||||
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
|
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
|
||||||
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
|
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models
|
||||||
*inputs, **kwargs: additional input for the specific Bert class
|
*inputs, **kwargs: additional input for the specific Bert class
|
||||||
(ex: num_labels for BertForSequenceClassification)
|
(ex: num_labels for BertForSequenceClassification)
|
||||||
""" # noqa
|
""" # noqa
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ class ParallelSelfAttention(torch.nn.Module):
|
|||||||
"""Parallel self-attention layer for GPT2.
|
"""Parallel self-attention layer for GPT2.
|
||||||
|
|
||||||
Self-attention layer takes input with size [b, s, h] where b is
|
Self-attention layer takes input with size [b, s, h] where b is
|
||||||
the batch size, s is the sequence lenght, and h is the hidden size
|
the batch size, s is the sequence length, and h is the hidden size
|
||||||
and creates output of the same size.
|
and creates output of the same size.
|
||||||
Arguments:
|
Arguments:
|
||||||
hidden_size: total hidden size of the layer (h).
|
hidden_size: total hidden size of the layer (h).
|
||||||
|
|||||||
@@ -656,7 +656,7 @@ class PreTrainedBertModel(nn.Module):
|
|||||||
. `bert_config.json` a configuration file for the model
|
. `bert_config.json` a configuration file for the model
|
||||||
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
|
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
|
||||||
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
|
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
|
||||||
state_dict: an optional state dictionnary (collections.OrderedDict object)
|
state_dict: an optional state dictionary (collections.OrderedDict object)
|
||||||
to use instead of Google pre-trained models
|
to use instead of Google pre-trained models
|
||||||
*inputs, **kwargs: additional input for the specific Bert class
|
*inputs, **kwargs: additional input for the specific Bert class
|
||||||
(ex: num_labels for BertForSequenceClassification)
|
(ex: num_labels for BertForSequenceClassification)
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ class SpaceTCnConfig(object):
|
|||||||
layer in the Transformer encoder.
|
layer in the Transformer encoder.
|
||||||
hidden_act: The non-linear activation function (function or string) in the
|
hidden_act: The non-linear activation function (function or string) in the
|
||||||
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
|
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
|
||||||
hidden_dropout_prob: The dropout probabilitiy for all fully connected
|
hidden_dropout_prob: The dropout probability for all fully connected
|
||||||
layers in the embeddings, encoder, and pooler.
|
layers in the embeddings, encoder, and pooler.
|
||||||
attention_probs_dropout_prob: The dropout ratio for the attention
|
attention_probs_dropout_prob: The dropout ratio for the attention
|
||||||
probabilities.
|
probabilities.
|
||||||
|
|||||||
Reference in New Issue
Block a user