Skip to content

Commit a95d55a

Browse files
committed
Merge remote-tracking branch 'origin/feature/damian/v2/factor_out_transformation_utils' into feature/damian/no_kv_cache
2 parents fa96efb + 71f4c6d commit a95d55a

File tree

8 files changed

+29
-41
lines changed

8 files changed

+29
-41
lines changed

src/deepsparse/transformers/helpers.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ def setup_transformers_pipeline(
7676
tokenizer.pad_token = tokenizer.eos_token
7777

7878
engine_kwargs = engine_kwargs or {}
79+
if engine_kwargs.get("model_path"):
80+
raise ValueError(
81+
"The engine kwargs already specify "
82+
f"a model path: {engine_kwargs['model_path']}, "
83+
f"but a model path was also provided: {model_path}. "
84+
"Please only provide one."
85+
)
7986
engine_kwargs["model_path"] = model_path
8087
return model_path, config, tokenizer, engine_kwargs
8188

@@ -84,6 +91,7 @@ def setup_onnx_file_path(
8491
model_path: str,
8592
sequence_length: int,
8693
onnx_model_name: Optional[str] = None,
94+
task: Optional[str] = None,
8795
) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
8896
"""
8997
Parses ONNX model from the `model_path` provided. It additionally
@@ -102,7 +110,9 @@ def setup_onnx_file_path(
102110
hf_logger_level = hf_logger.level
103111
hf_logger.setLevel(logging.ERROR)
104112

105-
config = transformers.PretrainedConfig.from_pretrained(deployment_path)
113+
config = transformers.PretrainedConfig.from_pretrained(
114+
deployment_path, finetuning_task=task
115+
)
106116
hf_logger.setLevel(hf_logger_level)
107117

108118
trust_remote_code = False

src/deepsparse/transformers/pipelines/pipeline.py

+11-27
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,18 @@
1616
Base Pipeline class for transformers inference pipeline
1717
"""
1818

19-
import logging
19+
2020
import warnings
2121
from pathlib import Path
2222
from typing import Any, Dict, List, Mapping, Optional, Union
2323

2424
import numpy
2525
import transformers
26-
from transformers.models.auto import AutoTokenizer
2726

2827
from deepsparse import Bucketable, Pipeline
28+
from deepsparse.transformers.helpers import overwrite_transformer_onnx_model_inputs
2929
from deepsparse.transformers.helpers import (
30-
get_deployment_path,
31-
overwrite_transformer_onnx_model_inputs,
30+
setup_onnx_file_path as setup_onnx_file_path_v2,
3231
)
3332

3433

@@ -124,24 +123,15 @@ def setup_onnx_file_path(self) -> str:
124123
125124
:return: file path to the processed ONNX file for the engine to compile
126125
"""
127-
deployment_path, onnx_path = get_deployment_path(self.model_path)
128-
129-
# temporarily set transformers logger to ERROR to avoid
130-
# printing misleading warnings
131-
hf_logger = logging.getLogger("transformers")
132-
hf_logger_level = hf_logger.level
133-
hf_logger.setLevel(logging.ERROR)
134-
self.config = transformers.PretrainedConfig.from_pretrained(
135-
deployment_path,
136-
finetuning_task=self.task if hasattr(self, "task") else None,
137-
)
138-
hf_logger.setLevel(hf_logger_level)
139-
140-
self.tokenizer = AutoTokenizer.from_pretrained(
141-
deployment_path,
142-
trust_remote_code=self._trust_remote_code,
143-
model_max_length=self.sequence_length,
126+
# we will be soon retiring V1 pipelines. This is why I am deciding
127+
# to reuse the functions from V2 pipelines in the (soon) legacy pipelines
128+
onnx_path, config, tokenizer = setup_onnx_file_path_v2(
129+
model_path=self.model_path,
130+
sequence_length=self.sequence_length,
131+
task=self.task if hasattr(self, "task") else None,
144132
)
133+
self.config = config
134+
self.tokenizer = tokenizer
145135

146136
if not self._delay_overwriting_inputs:
147137
# overwrite onnx graph to given required input shape
@@ -153,12 +143,6 @@ def setup_onnx_file_path(self) -> str:
153143
onnx_path, max_length=self.sequence_length
154144
)
155145

156-
if not self.config or not self.tokenizer:
157-
raise RuntimeError(
158-
"Invalid config or tokenizer provided. Please provide "
159-
"paths to the files or ensure they exist in the `model_path` provided. "
160-
"See `tokenizer` and `config` arguments for details."
161-
)
162146
return onnx_path
163147

164148
def tokens_to_engine_input(

src/deepsparse/transformers/utils/token_generator.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,16 @@ def generate(self, logits: numpy.ndarray) -> numpy.ndarray:
7777
:param logits: the logits from the model with shape (vocab_size,)
7878
:return: the sampled token
7979
"""
80-
if self.deterministic:
81-
token = numpy.argmax(logits)
82-
self.tokens.append(token)
83-
return token
84-
8580
if self.top_k:
8681
logits = self.apply_top_k(logits)
8782
if self.top_p:
8883
logits = self.apply_top_p(logits)
8984

85+
if self.deterministic:
86+
token = numpy.argmax(logits)
87+
self.tokens.append(token)
88+
return token
89+
9090
if self.sampling_temperature != 1.0:
9191
logits /= self.sampling_temperature
9292

src/deepsparse/v2/text_generation/join_output.py

-3
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ def __init__(self, tokenizer):
3333
self.tokenizer = tokenizer
3434

3535
def run(self, inp: List[CompileGenerationsOutput], **kwargs):
36-
37-
if not isinstance(inp, list):
38-
inp = [[inp]]
3936
batch_outputs = [x for x in inp[0]]
4037
generated_tokens = [x.generated_tokens for x in batch_outputs]
4138
generated_logits = [x.generated_logits for x in batch_outputs]

src/deepsparse/v2/text_generation/prep_for_generation.py

-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ def run(
9191
"token_generator": token_generator,
9292
}
9393
output = {
94-
"logits": prompt_logits,
9594
"tokens": token_generator.tokens,
9695
"kv_cache": kv_cache,
9796
"in_generation": True,

tests/deepsparse/v2/unit/text_generation/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from deepsparse.v2 import InferenceState, PipelineState
2626
from deepsparse.v2.text_generation import (
2727
GenerationDefaults,
28-
NlEngineOperator,
28+
NLEngineOperator,
2929
TokenGeneratorOperator,
3030
)
3131

@@ -61,7 +61,7 @@ def single_token_engine_no_internal_cache(text_generation_attributes, model_attr
6161
seq_length, _ = text_generation_attributes
6262
_, model_path = model_attributes
6363

64-
nl_engine_operator = NlEngineOperator(
64+
nl_engine_operator = NLEngineOperator(
6565
sequence_length=seq_length, input_ids_length=1, model_path=model_path
6666
)
6767
return nl_engine_operator

tests/testdata/gsm8k-v0-greedy_until

-1
This file was deleted.

tests/testdata/gsm8k-v0-res.json

-1
This file was deleted.

0 commit comments

Comments
 (0)