Fix conversion of models with shared embeddings (#120)

guillaumekln · guillaumekln · commit bb1cfc103dbc · 2020-02-21T15:12:44.000+01:00
Variables should be aliased after checking the vocabulary sizes.
diff --git a/python/ctranslate2/converters/converter.py b/python/ctranslate2/converters/converter.py
@@ -59,8 +59,7 @@ def convert(self, output_dir, model_spec, vmap=None, quantization=None, force=Fa
         model_spec.validate()
         self._check_vocabulary_size("source", src_vocab, model_spec.source_vocabulary_size)
         self._check_vocabulary_size("target", tgt_vocab, model_spec.target_vocabulary_size)
-        if quantization is not None:
-            model_spec.quantize(quantization)
+        model_spec.optimize(quantization=quantization)
         model_spec.serialize(os.path.join(output_dir, "model.bin"))
         if vmap is not None:
             shutil.copy(vmap, os.path.join(output_dir, "vmap.txt"))
diff --git a/python/ctranslate2/specs/model_spec.py b/python/ctranslate2/specs/model_spec.py
@@ -64,7 +64,6 @@ def _check(spec, name, value):
                 # Convert bool to an integer type.
                 setattr(spec, attr_name, np.dtype("int8").type(value))
         self.visit(_check)
-        self._alias_variables()
 
     def variables(self, prefix="", ordered=False):
         """Returns a dict mapping variables name to value. If ordered is True,
@@ -98,7 +97,7 @@ def _alias_variables(self):
                     setattr(spec, attr_name, other_name)
                     break
 
-    def quantize(self, quantization):
+    def _quantize(self, quantization):
         """Possibly quantizes the variable of the layer."""
         def _quantize(spec, name, value):
             if "weight" in name and isinstance(value, np.ndarray):
@@ -117,6 +116,12 @@ def _quantize(spec, name, value):
                 setattr(spec, "weight", value)
         self.visit(_quantize)
 
+    def optimize(self, quantization=None):
+        """Applies some optimizations on this layer."""
+        self._alias_variables()
+        if quantization is not None:
+            self._quantize(quantization)
+
     def visit(self, fn):
         """Recursively visits this layer and its children."""
         visit_spec(self, fn)
diff --git a/python/tests/test.py b/python/tests/test.py
@@ -130,6 +130,21 @@ def test_opennmt_tf_model_conversion(tmpdir, model_path, src_vocab, tgt_vocab, m
     output = translator.translate_batch([["آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"]])
     assert output[0][0]["tokens"] == ["a", "t", "z", "m", "o", "n"]
 
+@pytest.mark.skipif(not _FRAMEWORK_DATA_EXIST, reason="Data files are not available")
+@pytest.mark.parametrize("quantization", ["int16", "int8"])
+def test_opennmt_tf_model_quantization(tmpdir, quantization):
+    model_path = os.path.join(
+        _TEST_DATA_DIR, "models", "transliteration-aren-all", "opennmt_tf", "v2", "checkpoint")
+    converter = ctranslate2.converters.OpenNMTTFConverter(
+        model_path,
+        src_vocab=os.path.join(model_path, "ar.vocab"),
+        tgt_vocab=os.path.join(model_path, "en.vocab"))
+    output_dir = str(tmpdir.join("ctranslate2_model"))
+    converter.convert(output_dir, ctranslate2.specs.TransformerBase(), quantization=quantization)
+    translator = ctranslate2.Translator(output_dir)
+    output = translator.translate_batch([["آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"]])
+    assert output[0][0]["tokens"] == ["a", "t", "z", "m", "o", "n"]
+
 @pytest.mark.skipif(not _FRAMEWORK_DATA_EXIST, reason="Data files are not available")
 def test_opennmt_tf_variables_conversion(tmpdir):
     model_path = os.path.join(
@@ -159,6 +174,43 @@ def test_opennmt_tf_model_conversion_invalid_vocab(tmpdir):
     with pytest.raises(ValueError):
         converter.convert(output_dir, ctranslate2.specs.TransformerBase())
 
+def test_opennmt_tf_shared_embeddings_conversion(tmpdir):
+    # Issue https://github.com/OpenNMT/CTranslate2/issues/118
+    import tensorflow as tf
+    import opennmt
+
+    vocab = opennmt.data.Vocab()
+    for i in range(10):
+        vocab.add(str(i))
+    vocab_path = str(tmpdir.join("vocab.txt"))
+    vocab.serialize(vocab_path)
+
+    num_layers = 3
+    num_heads = 4
+    model = opennmt.models.Transformer(
+        opennmt.inputters.WordEmbedder(32),
+        opennmt.inputters.WordEmbedder(32),
+        num_layers,
+        num_units=32,
+        num_heads=num_heads,
+        ffn_inner_dim=64,
+        share_embeddings=opennmt.models.EmbeddingsSharingLevel.ALL)
+    model.initialize({"source_vocabulary": vocab_path, "target_vocabulary": vocab_path})
+    model.create_variables()
+
+    checkpoint_prefix = str(tmpdir.join("ckpt"))
+    checkpoint = tf.train.Checkpoint(model=model)
+    checkpoint.write(checkpoint_prefix)
+
+    converter = ctranslate2.converters.OpenNMTTFConverter(
+        model_path=checkpoint_prefix, src_vocab=vocab_path, tgt_vocab=vocab_path)
+    output_dir = str(tmpdir.join("ctranslate2_model"))
+    converter.convert(output_dir, ctranslate2.specs.TransformerSpec(num_layers, num_heads))
+
+    # Check that the translation runs.
+    translator = ctranslate2.Translator(output_dir)
+    translator.translate_batch([["1", "2", "3"]], max_decoding_length=10)
+
 @pytest.mark.skipif(not _FRAMEWORK_DATA_EXIST, reason="Data files are not available")
 def test_opennmt_py_model_conversion(tmpdir):
     model_path = os.path.join(
@@ -203,12 +255,29 @@ def __init__(self):
     spec = Spec()
     spec.validate()
     assert spec.a.dtype == np.float32
-    assert spec.b == "a"
+    assert spec.b.dtype == np.float32
     assert spec.c.dtype == np.int32
     assert spec.d == OPTIONAL
     assert spec.e.a.dtype == np.float32
     assert spec.f.dtype == np.int8
 
+def test_layer_spec_optimize():
+
+    class Spec(ctranslate2.specs.LayerSpec):
+        def __init__(self):
+            self.a = np.ones([5], dtype=np.float32)
+            self.b = np.ones([5], dtype=np.float32)
+            self.c = np.zeros([5], dtype=np.int32)
+            self.weight = np.ones([5, 4], dtype=np.float32)
+
+    spec = Spec()
+    spec.optimize(quantization="int16")
+    assert spec.a.dtype == np.float32
+    assert spec.b == "a"
+    assert spec.c.dtype == np.int32
+    assert spec.weight.dtype == np.int16
+    assert spec.weight_scale.dtype == np.float32
+
 def test_index_spec():
     spec = ctranslate2.specs.TransformerBase()
     assert isinstance(