diff --git a/.buildinfo b/.buildinfo index d8fc416f6..bfb060702 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 2a4bd9441600d6f87ec45cdb9c8c85e0 +config: 67a75a999b8281c3988fcac7056d30e5 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index e9d426296..eada649c8 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/.doctrees/python/ctranslate2.GenerationResult.doctree b/.doctrees/python/ctranslate2.GenerationResult.doctree index 42c0b3e6b..55c31bdc8 100644 Binary files a/.doctrees/python/ctranslate2.GenerationResult.doctree and b/.doctrees/python/ctranslate2.GenerationResult.doctree differ diff --git a/.doctrees/python/ctranslate2.TranslationResult.doctree b/.doctrees/python/ctranslate2.TranslationResult.doctree index 2b1f49038..b72689762 100644 Binary files a/.doctrees/python/ctranslate2.TranslationResult.doctree and b/.doctrees/python/ctranslate2.TranslationResult.doctree differ diff --git a/.doctrees/python/ctranslate2.models.Wav2Vec2Bert.doctree b/.doctrees/python/ctranslate2.models.Wav2Vec2Bert.doctree new file mode 100644 index 000000000..bbeb2069e Binary files /dev/null and b/.doctrees/python/ctranslate2.models.Wav2Vec2Bert.doctree differ diff --git a/.doctrees/python/ctranslate2.models.WhisperGenerationResult.doctree b/.doctrees/python/ctranslate2.models.WhisperGenerationResult.doctree index 06c0b62a1..3f1645e5a 100644 Binary files a/.doctrees/python/ctranslate2.models.WhisperGenerationResult.doctree and b/.doctrees/python/ctranslate2.models.WhisperGenerationResult.doctree differ diff --git a/.doctrees/python/ctranslate2.models.doctree b/.doctrees/python/ctranslate2.models.doctree index 1fda6cc7f..41f623e39 100644 Binary files a/.doctrees/python/ctranslate2.models.doctree and b/.doctrees/python/ctranslate2.models.doctree differ diff --git a/.doctrees/python/ctranslate2.specs.Activation.doctree b/.doctrees/python/ctranslate2.specs.Activation.doctree index 8f18fcb88..684e5767f 100644 Binary files a/.doctrees/python/ctranslate2.specs.Activation.doctree and b/.doctrees/python/ctranslate2.specs.Activation.doctree differ diff --git a/.doctrees/python/ctranslate2.specs.LanguageModelSpec.doctree b/.doctrees/python/ctranslate2.specs.LanguageModelSpec.doctree index 557b29e88..5db472253 100644 Binary files a/.doctrees/python/ctranslate2.specs.LanguageModelSpec.doctree and b/.doctrees/python/ctranslate2.specs.LanguageModelSpec.doctree differ diff --git a/.doctrees/python/ctranslate2.specs.Wav2Vec2BertSpec.doctree b/.doctrees/python/ctranslate2.specs.Wav2Vec2BertSpec.doctree new file mode 100644 index 000000000..2fcfa8e2f Binary files /dev/null and b/.doctrees/python/ctranslate2.specs.Wav2Vec2BertSpec.doctree differ diff --git a/.doctrees/python/ctranslate2.specs.doctree b/.doctrees/python/ctranslate2.specs.doctree index 243eb5426..eba78acee 100644 Binary files a/.doctrees/python/ctranslate2.specs.doctree and b/.doctrees/python/ctranslate2.specs.doctree differ diff --git a/.doctrees/quantization.doctree b/.doctrees/quantization.doctree index 9ba8ce31b..606e223d6 100644 Binary files a/.doctrees/quantization.doctree and b/.doctrees/quantization.doctree differ diff --git a/_sources/python/ctranslate2.models.Wav2Vec2Bert.rst.txt b/_sources/python/ctranslate2.models.Wav2Vec2Bert.rst.txt new file mode 100644 index 000000000..c339f4389 --- /dev/null +++ b/_sources/python/ctranslate2.models.Wav2Vec2Bert.rst.txt @@ -0,0 +1,26 @@ +Wav2Vec2Bert +============ + +.. autoclass:: ctranslate2.models.Wav2Vec2Bert + :members: + :undoc-members: + :inherited-members: + + **Inherits from:** :class:`pybind11_builtins.pybind11_object` + + **Attributes:** + + - :obj:`~ctranslate2.models.Wav2Vec2Bert.compute_type` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.device` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.device_index` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.model_is_loaded` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.num_active_batches` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.num_queued_batches` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.num_workers` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.tensor_parallel` + + **Methods:** + + - :obj:`~ctranslate2.models.Wav2Vec2Bert.encode` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.load_model` + - :obj:`~ctranslate2.models.Wav2Vec2Bert.unload_model` diff --git a/_sources/python/ctranslate2.models.WhisperGenerationResult.rst.txt b/_sources/python/ctranslate2.models.WhisperGenerationResult.rst.txt index 000d7c44e..2b18aa21b 100644 --- a/_sources/python/ctranslate2.models.WhisperGenerationResult.rst.txt +++ b/_sources/python/ctranslate2.models.WhisperGenerationResult.rst.txt @@ -10,6 +10,7 @@ WhisperGenerationResult **Attributes:** + - :obj:`~ctranslate2.models.WhisperGenerationResult.logits` - :obj:`~ctranslate2.models.WhisperGenerationResult.no_speech_prob` - :obj:`~ctranslate2.models.WhisperGenerationResult.scores` - :obj:`~ctranslate2.models.WhisperGenerationResult.sequences` diff --git a/_sources/python/ctranslate2.models.rst.txt b/_sources/python/ctranslate2.models.rst.txt index 8a9eb0b96..d876402e3 100644 --- a/_sources/python/ctranslate2.models.rst.txt +++ b/_sources/python/ctranslate2.models.rst.txt @@ -6,6 +6,7 @@ ctranslate2.models .. toctree:: ctranslate2.models.Wav2Vec2 + ctranslate2.models.Wav2Vec2Bert ctranslate2.models.Whisper ctranslate2.models.WhisperGenerationResult ctranslate2.models.WhisperGenerationResultAsync diff --git a/_sources/python/ctranslate2.specs.LanguageModelSpec.rst.txt b/_sources/python/ctranslate2.specs.LanguageModelSpec.rst.txt index d997060b9..d33c4c9cd 100644 --- a/_sources/python/ctranslate2.specs.LanguageModelSpec.rst.txt +++ b/_sources/python/ctranslate2.specs.LanguageModelSpec.rst.txt @@ -12,6 +12,7 @@ LanguageModelSpec - :class:`ctranslate2.specs.TransformerDecoderModelSpec` - :class:`ctranslate2.specs.TransformerEncoderModelSpec` + - :class:`ctranslate2.specs.Wav2Vec2BertSpec` - :class:`ctranslate2.specs.Wav2Vec2Spec` - :class:`ctranslate2.specs.WhisperSpec` diff --git a/_sources/python/ctranslate2.specs.Wav2Vec2BertSpec.rst.txt b/_sources/python/ctranslate2.specs.Wav2Vec2BertSpec.rst.txt new file mode 100644 index 000000000..3688b9c00 --- /dev/null +++ b/_sources/python/ctranslate2.specs.Wav2Vec2BertSpec.rst.txt @@ -0,0 +1,26 @@ +Wav2Vec2BertSpec +================ + +.. autoclass:: ctranslate2.specs.Wav2Vec2BertSpec + :members: + :undoc-members: + :inherited-members: + + **Inherits from:** :class:`ctranslate2.specs.LanguageModelSpec` + + **Attributes:** + + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.config` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.name` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.revision` + + **Methods:** + + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.get_default_config` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.get_vocabulary_size` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.optimize` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.register_file` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.register_vocabulary` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.save` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.validate` + - :obj:`~ctranslate2.specs.Wav2Vec2BertSpec.variables` diff --git a/_sources/python/ctranslate2.specs.rst.txt b/_sources/python/ctranslate2.specs.rst.txt index a2d177d03..4015d9b45 100644 --- a/_sources/python/ctranslate2.specs.rst.txt +++ b/_sources/python/ctranslate2.specs.rst.txt @@ -17,5 +17,6 @@ ctranslate2.specs ctranslate2.specs.TransformerEncoderModelSpec ctranslate2.specs.TransformerEncoderSpec ctranslate2.specs.TransformerSpec + ctranslate2.specs.Wav2Vec2BertSpec ctranslate2.specs.Wav2Vec2Spec ctranslate2.specs.WhisperSpec diff --git a/_sources/quantization.md.txt b/_sources/quantization.md.txt index 296c57000..ae79ee6b9 100644 --- a/_sources/quantization.md.txt +++ b/_sources/quantization.md.txt @@ -165,18 +165,26 @@ In this mode, all model weights are stored in BF16 and all layers are run with t ### 4-bit AWQ -The compute type would be `int32_float16` - **Supported on:** * NVIDIA GPU with Compute Capability >= 7.5 +CTranslate2 internally handles the compute type for AWQ quantization. In this mode, all model weights are stored in half precision and all layers are run in half precision. Other parameters like scale and zero are stored in ``int32``. -For example, +**Steps to use AWQ Quantization:** + +* Download a AWQ quantized model from Hugging Face for example (TheBloke/Llama-2-7B-AWQ){https://huggingface.co/TheBloke/Llama-2-7B-AWQ} or quantize your own model with using this (AutoAWQ example){https://casper-hansen.github.io/AutoAWQ/examples/}. +* Convert AWQ Quantized model to Ctranslate2 model: ```bash ct2-transformers-converter --model TheBloke/Llama-2-7B-AWQ --copy_files tokenizer.model --output_dir ct2_model ``` -We have to quantize the model with AWQ first, then convert it to CT2 format. \ No newline at end of file +* Run inference as usual with Ctranslate2: +```bash +model = ctranslate2.Generator('ct2_model', device='cuda') +outputs = model.generate_batch([tokens]) +``` + +Currently, CTranslate2 only supports the GEMM and GEMV kernels for AWQ quantization. \ No newline at end of file diff --git a/_static/documentation_options.js b/_static/documentation_options.js index b6ce93da8..2a1a60ed2 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '4.4.0', + VERSION: '4.5.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/conversion.html b/conversion.html index 9fb4df1bc..61b9e1092 100644 --- a/conversion.html +++ b/conversion.html @@ -4,7 +4,7 @@ -