pass-lin
diff --git a/‎bert4keras3/__pycache__/models.cpython-310.pyc
31 Bytes b/‎bert4keras3/__pycache__/models.cpython-310.pyc
31 Bytes
diff --git a/‎bert4keras3/__pycache__/transformers.cpython-310.pyc
30 Bytes b/‎bert4keras3/__pycache__/transformers.cpython-310.pyc
30 Bytes
diff --git a/‎bert4keras3/models.py
Lines changed: 2 additions & 1 deletion b/‎bert4keras3/models.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎bert4keras3/transformers.py
Lines changed: 9 additions & 5 deletions b/‎bert4keras3/transformers.py
Lines changed: 9 additions & 5 deletions
@@ -49,6 +49,7 @@ def build_transformer_model(
     application='encoder',
     return_keras_model=True,
     keras_weights_path=None,
+    initial=True,
     **kwargs
 ):
     """根据配置文件构建模型，可选加载checkpoint权重
@@ -126,7 +127,7 @@ def build_transformer_model(
 
     transformer = MODEL(**configs)
     transformer.build(**configs)
-    if keras.__version__>'3.0' and backlib=='torch':
+    if keras.__version__>'3.0' and initial:
         #keras3不知道为什么attention需要走一次前向才能初始化
         inputs=[]
         for modelin in transformer.model.inputs: 
 
@@ -444,7 +444,7 @@ def get_cache_inputs(self,lengths:list):
     def get_custom_position_ids(self):
         return self.custom_position_ids
     def build_cache_model(self,input_lengths:list,end_token,
-                          search_mode='greedy',k=1,progress_print=False,index_bias=0):
+                          search_mode='greedy',k=1,progress_print=False,index_bias=0,initial = True):
         if backlib=='torch':
             progress_print=False
         inputs=self.get_cache_inputs(input_lengths)
@@ -458,11 +458,15 @@ def build_cache_model(self,input_lengths:list,end_token,
             shape=keras.ops.shape(modelin)
             shape=[1 if t==None else t for t in shape]
             inputs.append(ops.convert_to_tensor(np.ones(shape),modelin.dtype))
-        if backlib=='torch':
-            import torch
-            with torch.no_grad():  
+        if initial:
+            if backlib=='torch':
+                import torch
+                with torch.no_grad():  
+                    self.cache_call(inputs=inputs,input_lengths=input_lengths,end_token=end_token,
+                        search_mode=search_mode,k=k,progress_print=progress_print,index_bias=index_bias)
+            else:
                 self.cache_call(inputs=inputs,input_lengths=input_lengths,end_token=end_token,
-                       search_mode=search_mode,k=k,progress_print=progress_print,index_bias=index_bias)
+                        search_mode=search_mode,k=k,progress_print=progress_print,index_bias=index_bias)
 
         return model
 class LM_Mask(object):