Merge pull request #440 from xyltt/master

x54-729 · web-flow · commit e03d2ddb4b5e · 2022-12-13T11:52:09.000+08:00
[new] add elasticbert
diff --git a/fastNLP/transformers/torch/models/__init__.py b/fastNLP/transformers/torch/models/__init__.py
@@ -2,5 +2,6 @@
 from .bart import *
 from .bert import *
 from .cpt import *
+from .elasticbert import *
 from .gpt2 import *
 from .roberta import *
diff --git a/fastNLP/transformers/torch/models/auto/configuration_auto.py b/fastNLP/transformers/torch/models/auto/configuration_auto.py
@@ -31,6 +31,7 @@
         ("bert", "BertConfig"),
         ("gpt2", "GPT2Config"),
         ("cpt", "CPTConfig"),
+        ("elasticbert", "ElasticBertConfig"),
     ]
 )
 
@@ -42,6 +43,7 @@
         ("gpt2", "GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("roberta", "ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("cpt", "BART_PRETRAINED_CONFIG_ARCHIVE_MAP"),
+        ("elasticbert", "ELASTICBERT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
     ]
 )
 
@@ -52,7 +54,8 @@
         ("roberta", "RoBERTa"),
         ("bert", "BERT"),
         ("gpt2", "OpenAI GPT-2"),
-        ("cpt", "CPT")
+        ("cpt", "CPT"),
+        ("elasticbert", "ElasticBERT")
     ]
 )
 
diff --git a/fastNLP/transformers/torch/models/auto/modeling_auto.py b/fastNLP/transformers/torch/models/auto/modeling_auto.py
@@ -28,6 +28,7 @@
         ("bert", "BertModel"),
         ("gpt2", "GPT2Model"),
         ("cpt", "CPTModel"),
+        ("elasticbert", "ElasticBertModel"),
     ]
 )
 
@@ -38,6 +39,7 @@
         ("bert", "BertForPreTraining"),
         ("gpt2", "GPT2LMHeadModel"),
         ("cpt", "CPTForConditionalGeneration"),
+        ("elasticbert", "ElasticBertForPreTraining"),
     ]
 )
 
@@ -49,6 +51,7 @@
         ("bert", "BertForMaskedLM"),
         ("gpt2", "GPT2LMHeadModel"),
         ("cpt", "CPTForConditionalGeneration"),
+        ("elasticbert", "ElasticBertForMaskedLM"),
     ]
 )
 
@@ -59,6 +62,7 @@
         ("bert", "BertLMHeadModel"),
         ("gpt2", "GPT2LMHeadModel"),
         ("bart", "BartForCausalLM"),
+        ("elasticbert", "ElasticBertLMHeadModel"),
     ]
 )
 
@@ -71,6 +75,7 @@
         ("roberta", "RobertaForMaskedLM"),
         ("bert", "BertForMaskedLM"),
         ("cpt", "CPTForConditionalGeneration"),
+        ("elasticbert", "ElasticBertForMaskedLM"),
     ]
 )
 
@@ -94,6 +99,7 @@
         ("bert", "BertForSequenceClassification"),
         ("gpt2", "GPT2ForSequenceClassification"),
         ("cpt", "CPTForSequenceClassification"),
+        ("elasticbert", "ElasticBertForSequenceClassification"),
     ]
 )
 
@@ -104,6 +110,7 @@
         ("roberta", "RobertaForQuestionAnswering"),
         ("bert", "BertForQuestionAnswering"),
         ("cpt", "CPTForQuestionAnswering"),
+        ("elasticbert", "ElasticBertForQuestionAnswering"),
     ]
 )
 
@@ -115,6 +122,7 @@
         ("roberta", "RobertaForTokenClassification"),
         ("bert", "BertForTokenClassification"),
         ("gpt2", "GPT2ForTokenClassification"),
+        ("elasticbert", "ElasticBertForTokenClassification"),
     ]
 )
 
@@ -123,6 +131,7 @@
         # Model for Multiple Choice mapping
         ("roberta", "RobertaForMultipleChoice"),
         ("bert", "BertForMultipleChoice"),
+        ("elasticbert", "ElasticBertForMultipleChoice"),
     ]
 )
 
diff --git a/fastNLP/transformers/torch/models/auto/tokenization_auto.py b/fastNLP/transformers/torch/models/auto/tokenization_auto.py
@@ -50,6 +50,7 @@
             ("roberta", ("RobertaTokenizer", None)),
             ("bert", ("BertTokenizer", None)),
             ("gpt2", ("GPT2Tokenizer", None)),
+            ("elasticbert", ("BertTokenizer", None)),
         ]
     )
 
diff --git a/fastNLP/transformers/torch/models/elasticbert/__init__.py b/fastNLP/transformers/torch/models/elasticbert/__init__.py
@@ -0,0 +1,21 @@
+__all__ = [
+    "ELASTICBERT_PRETRAINED_CONFIG_ARCHIVE_MAP",
+    "ElasticBertConfig",
+
+    "ELASTICBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
+    "ElasticBertForMultipleChoice",
+    "ElasticBertForPreTraining",
+    "ElasticBertForQuestionAnswering",
+    "ElasticBertForSequenceClassification",
+    "ElasticBertForTokenClassification",
+    "ElasticBertLayer",
+    "ElasticBertModel",
+    "ElasticBertLMHeadModel",
+    "ElasticBertForMaskedLM",
+    "ElasticBertPreTrainedModel",
+]
+
+from .configuration_elasticbert import ElasticBertConfig, ELASTICBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
+from .modeling_elasticbert import ELASTICBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ElasticBertForMultipleChoice, ElasticBertForPreTraining, \
+                                    ElasticBertForQuestionAnswering, ElasticBertForSequenceClassification, ElasticBertForTokenClassification, \
+                                        ElasticBertLayer, ElasticBertModel, ElasticBertPreTrainedModel, ElasticBertForMaskedLM, ElasticBertLMHeadModel
diff --git a/fastNLP/transformers/torch/models/elasticbert/configuration_elasticbert.py b/fastNLP/transformers/torch/models/elasticbert/configuration_elasticbert.py
@@ -0,0 +1,89 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" ElasticBERT model configuration """
+
+
+from fastNLP.core.log import logger
+from fastNLP.transformers.torch.configuration_utils import PretrainedConfig
+
+
+__all__ = [
+    "ELASTICBERT_PRETRAINED_CONFIG_ARCHIVE_MAP",
+    "ElasticBertConfig",
+]
+
+ELASTICBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "elasticbert-base": "https://huggingface.co/fnlp/elasticbert-base/resolve/main/config.json",
+    "elasticbert-large": "https://huggingface.co/fnlp/elasticbert-large/resolve/main/config.json",
+    "elasticbert-base-chinese": "https://huggingface.co/fnlp/elasticbert-chinese-base/resolve/main/config.json"
+}
+
+
+class ElasticBertConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a :class:`ElasticBertModel`
+
+    Args:
+        max_output_layers (:obj: `int`, default to 12):
+            The maximum number of classification layers.
+        num_output_layers (:obj: `int`, default to 1):
+            The number of classification layers. Used to specify how many classification layers there are. 
+            It is 1 in static usage, and equal to num_hidden_layers in dynamic usage.
+    """
+
+    model_type = "elasticbert"
+
+    def __init__(
+        self,
+        vocab_size=30522,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        max_output_layers=12,
+        num_output_layers=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
+        pad_token_id=0,
+        gradient_checkpointing=False,
+        position_embedding_type="absolute",
+        use_cache=True,
+        **kwargs
+    ):
+        super().__init__(pad_token_id=pad_token_id, **kwargs)
+
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.max_output_layers = max_output_layers
+        self.num_output_layers = num_output_layers
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.initializer_range = initializer_range
+        self.layer_norm_eps = layer_norm_eps
+        self.gradient_checkpointing = gradient_checkpointing
+        self.position_embedding_type = position_embedding_type
+        self.use_cache = use_cache
diff --git a/fastNLP/transformers/torch/models/elasticbert/modeling_elasticbert.py b/fastNLP/transformers/torch/models/elasticbert/modeling_elasticbert.py

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@`
`31`	`31`	`("bert", "BertConfig"),`
`32`	`32`	`("gpt2", "GPT2Config"),`
`33`	`33`	`("cpt", "CPTConfig"),`
	`34`	`+ ("elasticbert", "ElasticBertConfig"),`
`34`	`35`	`]`
`35`	`36`	`)`
`36`	`37`
`@@ -42,6 +43,7 @@`
`42`	`43`	`("gpt2", "GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP"),`
`43`	`44`	`("roberta", "ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP"),`
`44`	`45`	`("cpt", "BART_PRETRAINED_CONFIG_ARCHIVE_MAP"),`
	`46`	`+ ("elasticbert", "ELASTICBERT_PRETRAINED_CONFIG_ARCHIVE_MAP"),`
`45`	`47`	`]`
`46`	`48`	`)`
`47`	`49`
`@@ -52,7 +54,8 @@`
`52`	`54`	`("roberta", "RoBERTa"),`
`53`	`55`	`("bert", "BERT"),`
`54`	`56`	`("gpt2", "OpenAI GPT-2"),`
`55`		`- ("cpt", "CPT")`
	`57`	`+ ("cpt", "CPT"),`
	`58`	`+ ("elasticbert", "ElasticBERT")`
`56`	`59`	`]`
`57`	`60`	`)`
`58`	`61`
Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@`
`28`	`28`	`("bert", "BertModel"),`
`29`	`29`	`("gpt2", "GPT2Model"),`
`30`	`30`	`("cpt", "CPTModel"),`
	`31`	`+ ("elasticbert", "ElasticBertModel"),`
`31`	`32`	`]`
`32`	`33`	`)`
`33`	`34`
`@@ -38,6 +39,7 @@`
`38`	`39`	`("bert", "BertForPreTraining"),`
`39`	`40`	`("gpt2", "GPT2LMHeadModel"),`
`40`	`41`	`("cpt", "CPTForConditionalGeneration"),`
	`42`	`+ ("elasticbert", "ElasticBertForPreTraining"),`
`41`	`43`	`]`
`42`	`44`	`)`
`43`	`45`
`@@ -49,6 +51,7 @@`
`49`	`51`	`("bert", "BertForMaskedLM"),`
`50`	`52`	`("gpt2", "GPT2LMHeadModel"),`
`51`	`53`	`("cpt", "CPTForConditionalGeneration"),`
	`54`	`+ ("elasticbert", "ElasticBertForMaskedLM"),`
`52`	`55`	`]`
`53`	`56`	`)`
`54`	`57`
`@@ -59,6 +62,7 @@`
`59`	`62`	`("bert", "BertLMHeadModel"),`
`60`	`63`	`("gpt2", "GPT2LMHeadModel"),`
`61`	`64`	`("bart", "BartForCausalLM"),`
	`65`	`+ ("elasticbert", "ElasticBertLMHeadModel"),`
`62`	`66`	`]`
`63`	`67`	`)`
`64`	`68`
`@@ -71,6 +75,7 @@`
`71`	`75`	`("roberta", "RobertaForMaskedLM"),`
`72`	`76`	`("bert", "BertForMaskedLM"),`
`73`	`77`	`("cpt", "CPTForConditionalGeneration"),`
	`78`	`+ ("elasticbert", "ElasticBertForMaskedLM"),`
`74`	`79`	`]`
`75`	`80`	`)`
`76`	`81`
`@@ -94,6 +99,7 @@`
`94`	`99`	`("bert", "BertForSequenceClassification"),`
`95`	`100`	`("gpt2", "GPT2ForSequenceClassification"),`
`96`	`101`	`("cpt", "CPTForSequenceClassification"),`
	`102`	`+ ("elasticbert", "ElasticBertForSequenceClassification"),`
`97`	`103`	`]`
`98`	`104`	`)`
`99`	`105`
`@@ -104,6 +110,7 @@`
`104`	`110`	`("roberta", "RobertaForQuestionAnswering"),`
`105`	`111`	`("bert", "BertForQuestionAnswering"),`
`106`	`112`	`("cpt", "CPTForQuestionAnswering"),`
	`113`	`+ ("elasticbert", "ElasticBertForQuestionAnswering"),`
`107`	`114`	`]`
`108`	`115`	`)`
`109`	`116`
`@@ -115,6 +122,7 @@`
`115`	`122`	`("roberta", "RobertaForTokenClassification"),`
`116`	`123`	`("bert", "BertForTokenClassification"),`
`117`	`124`	`("gpt2", "GPT2ForTokenClassification"),`
	`125`	`+ ("elasticbert", "ElasticBertForTokenClassification"),`
`118`	`126`	`]`
`119`	`127`	`)`
`120`	`128`
`@@ -123,6 +131,7 @@`
`123`	`131`	`# Model for Multiple Choice mapping`
`124`	`132`	`("roberta", "RobertaForMultipleChoice"),`
`125`	`133`	`("bert", "BertForMultipleChoice"),`
	`134`	`+ ("elasticbert", "ElasticBertForMultipleChoice"),`
`126`	`135`	`]`
`127`	`136`	`)`
`128`	`137`
Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,7 @@`
`50`	`50`	`("roberta", ("RobertaTokenizer", None)),`
`51`	`51`	`("bert", ("BertTokenizer", None)),`
`52`	`52`	`("gpt2", ("GPT2Tokenizer", None)),`
	`53`	`+ ("elasticbert", ("BertTokenizer", None)),`
`53`	`54`	`]`
`54`	`55`	`)`
`55`	`56`