From d57744f889c7b6d8356a8751f6c5974809491813 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Fri, 6 Dec 2024 14:29:18 +0100 Subject: [PATCH 1/8] add system.security and system.security.securuse_safetensors entries --- garak/resources/garak.core.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 72f7caa8..d5f42441 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -7,6 +7,8 @@ system: lite: true show_z: false enable_experimental: false + security: + use_safetensors: true run: seed: From 1824a9f9ecea0f14a5df8898a0ca9ffafb5a8673 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 9 Dec 2024 17:37:05 +0100 Subject: [PATCH 2/8] add use_safetensors consumption in HFCompatible, Pipeline --- garak/generators/huggingface.py | 2 +- garak/resources/api/huggingface.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index abfddc9c..881e464d 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -78,7 +78,7 @@ def _load_client(self): if _config.run.seed is not None: set_seed(_config.run.seed) - pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline) + pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline, use_safetensors=_config.system.security["use_safetensors"]) self.generator = pipeline("text-generation", **pipeline_kwargs) if self.generator.tokenizer is None: # account for possible model without a stored tokenizer diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py index 67802c21..bd35967d 100644 --- a/garak/resources/api/huggingface.py +++ b/garak/resources/api/huggingface.py @@ -17,7 +17,7 @@ def _set_hf_context_len(self, config): if isinstance(config.n_ctx, int): self.context_len = config.n_ctx - def _gather_hf_params(self, hf_constructor: Callable): + def _gather_hf_params(self, hf_constructor: Callable, use_safetensors=True): """ "Identify arguments that impact huggingface transformers resources and behavior""" import torch @@ -85,6 +85,9 @@ def _gather_hf_params(self, hf_constructor: Callable): ): args["trust_remote_code"] = False + if "use_safetensors" in params_to_process and "use_safetensors" not in params: + args["use_safetensors"] = use_safetensors + return args def _select_hf_device(self): From b2a8af2ff78d531c553a112f48fffce01ec655af Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 9 Dec 2024 17:37:23 +0100 Subject: [PATCH 3/8] black --- garak/generators/huggingface.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 881e464d..1d13669f 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -78,7 +78,10 @@ def _load_client(self): if _config.run.seed is not None: set_seed(_config.run.seed) - pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline, use_safetensors=_config.system.security["use_safetensors"]) + pipeline_kwargs = self._gather_hf_params( + hf_constructor=pipeline, + use_safetensors=_config.system.security["use_safetensors"], + ) self.generator = pipeline("text-generation", **pipeline_kwargs) if self.generator.tokenizer is None: # account for possible model without a stored tokenizer From e63b3d87453a5c22e0a25bf6d5335592328ccaef Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 9 Dec 2024 17:50:02 +0100 Subject: [PATCH 4/8] add safetensors option to param gathering --- garak/generators/huggingface.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 1d13669f..40558ea2 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -171,8 +171,11 @@ def _load_client(self): if "use_fp8" in _config.plugins.generators.OptimumPipeline: self.use_fp8 = True - pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline) - self.generator = pipeline("text-generation", **pipline_kwargs) + pipeline_kwargs = self._gather_hf_params( + hf_constructor=pipeline, + use_safetensors=_config.system.security["use_safetensors"], + ) + self.generator = pipeline("text-generation", **pipeline_kwargs) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix if _config.loaded: @@ -199,8 +202,11 @@ def _load_client(self): # Note that with pipeline, in order to access the tokenizer, model, or device, you must get the attribute # directly from self.generator instead of from the ConversationalPipeline object itself. - pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline) - self.generator = pipeline("conversational", **pipline_kwargs) + pipeline_kwargs = self._gather_hf_params( + hf_constructor=pipeline, + use_safetensors=_config.system.security["use_safetensors"], + ) + self.generator = pipeline("conversational", **pipeline_kwargs) self.conversation = Conversation() if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix @@ -447,7 +453,8 @@ def _load_client(self): transformers.set_seed(_config.run.seed) model_kwargs = self._gather_hf_params( - hf_constructor=transformers.AutoConfig.from_pretrained + hf_constructor=transformers.AutoConfig.from_pretrained, + use_safetensors=_config.system.security["use_safetensors"], ) # will defer to device_map if device map was `auto` may not match self.device self.config = transformers.AutoConfig.from_pretrained(self.name, **model_kwargs) @@ -566,7 +573,8 @@ def __init__(self, name="", config_root=_config): self.device = self._select_hf_device() model_kwargs = self._gather_hf_params( - hf_constructor=LlavaNextForConditionalGeneration.from_pretrained + hf_constructor=LlavaNextForConditionalGeneration.from_pretrained, + use_safetensors=_config.system.security["use_safetensors"], ) # will defer to device_map if device map was `auto` may not match self.device self.processor = LlavaNextProcessor.from_pretrained(self.name) From c9dd5ad3d94ca1a05f5b24c618ba6381f50d388b Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 16 Dec 2024 13:35:44 +0100 Subject: [PATCH 5/8] put use_safetensors at same level as trust_remote_code --- garak/generators/huggingface.py | 7 ++----- garak/resources/api/huggingface.py | 2 +- garak/resources/garak.core.yaml | 2 -- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 40558ea2..3c72310a 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -80,7 +80,6 @@ def _load_client(self): pipeline_kwargs = self._gather_hf_params( hf_constructor=pipeline, - use_safetensors=_config.system.security["use_safetensors"], ) self.generator = pipeline("text-generation", **pipeline_kwargs) if self.generator.tokenizer is None: @@ -173,7 +172,6 @@ def _load_client(self): pipeline_kwargs = self._gather_hf_params( hf_constructor=pipeline, - use_safetensors=_config.system.security["use_safetensors"], ) self.generator = pipeline("text-generation", **pipeline_kwargs) if not hasattr(self, "deprefix_prompt"): @@ -204,7 +202,6 @@ def _load_client(self): # directly from self.generator instead of from the ConversationalPipeline object itself. pipeline_kwargs = self._gather_hf_params( hf_constructor=pipeline, - use_safetensors=_config.system.security["use_safetensors"], ) self.generator = pipeline("conversational", **pipeline_kwargs) self.conversation = Conversation() @@ -452,9 +449,10 @@ def _load_client(self): if _config.run.seed is not None: transformers.set_seed(_config.run.seed) + print(dir(_config.system)) + model_kwargs = self._gather_hf_params( hf_constructor=transformers.AutoConfig.from_pretrained, - use_safetensors=_config.system.security["use_safetensors"], ) # will defer to device_map if device map was `auto` may not match self.device self.config = transformers.AutoConfig.from_pretrained(self.name, **model_kwargs) @@ -574,7 +572,6 @@ def __init__(self, name="", config_root=_config): self.device = self._select_hf_device() model_kwargs = self._gather_hf_params( hf_constructor=LlavaNextForConditionalGeneration.from_pretrained, - use_safetensors=_config.system.security["use_safetensors"], ) # will defer to device_map if device map was `auto` may not match self.device self.processor = LlavaNextProcessor.from_pretrained(self.name) diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py index bd35967d..435cdb44 100644 --- a/garak/resources/api/huggingface.py +++ b/garak/resources/api/huggingface.py @@ -86,7 +86,7 @@ def _gather_hf_params(self, hf_constructor: Callable, use_safetensors=True): args["trust_remote_code"] = False if "use_safetensors" in params_to_process and "use_safetensors" not in params: - args["use_safetensors"] = use_safetensors + args["use_safetensors"] = True return args diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index d5f42441..72f7caa8 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -7,8 +7,6 @@ system: lite: true show_z: false enable_experimental: false - security: - use_safetensors: true run: seed: From 23a7ad6dab02b61c7e56e594068d046b1ecd1b34 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 16 Dec 2024 13:38:21 +0100 Subject: [PATCH 6/8] unformat --- garak/generators/huggingface.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 3c72310a..1204992f 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -78,9 +78,7 @@ def _load_client(self): if _config.run.seed is not None: set_seed(_config.run.seed) - pipeline_kwargs = self._gather_hf_params( - hf_constructor=pipeline, - ) + pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline) self.generator = pipeline("text-generation", **pipeline_kwargs) if self.generator.tokenizer is None: # account for possible model without a stored tokenizer @@ -170,9 +168,7 @@ def _load_client(self): if "use_fp8" in _config.plugins.generators.OptimumPipeline: self.use_fp8 = True - pipeline_kwargs = self._gather_hf_params( - hf_constructor=pipeline, - ) + pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline) self.generator = pipeline("text-generation", **pipeline_kwargs) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix @@ -200,9 +196,7 @@ def _load_client(self): # Note that with pipeline, in order to access the tokenizer, model, or device, you must get the attribute # directly from self.generator instead of from the ConversationalPipeline object itself. - pipeline_kwargs = self._gather_hf_params( - hf_constructor=pipeline, - ) + pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline) self.generator = pipeline("conversational", **pipeline_kwargs) self.conversation = Conversation() if not hasattr(self, "deprefix_prompt"): @@ -452,7 +446,7 @@ def _load_client(self): print(dir(_config.system)) model_kwargs = self._gather_hf_params( - hf_constructor=transformers.AutoConfig.from_pretrained, + hf_constructor=transformers.AutoConfig.from_pretrained ) # will defer to device_map if device map was `auto` may not match self.device self.config = transformers.AutoConfig.from_pretrained(self.name, **model_kwargs) @@ -571,7 +565,7 @@ def __init__(self, name="", config_root=_config): self.device = self._select_hf_device() model_kwargs = self._gather_hf_params( - hf_constructor=LlavaNextForConditionalGeneration.from_pretrained, + hf_constructor=LlavaNextForConditionalGeneration.from_pretrained ) # will defer to device_map if device map was `auto` may not match self.device self.processor = LlavaNextProcessor.from_pretrained(self.name) From b04818cac6e18d70c4dcbc603d716e35d86d2bed Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 16 Dec 2024 13:39:08 +0100 Subject: [PATCH 7/8] unadd _gather_hf_params option --- garak/resources/api/huggingface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py index 435cdb44..a9c6445c 100644 --- a/garak/resources/api/huggingface.py +++ b/garak/resources/api/huggingface.py @@ -17,7 +17,7 @@ def _set_hf_context_len(self, config): if isinstance(config.n_ctx, int): self.context_len = config.n_ctx - def _gather_hf_params(self, hf_constructor: Callable, use_safetensors=True): + def _gather_hf_params(self, hf_constructor: Callable): """ "Identify arguments that impact huggingface transformers resources and behavior""" import torch From 58e54c424d9b4a555bc7f15248cbec78ff2585fd Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 16 Dec 2024 14:15:01 +0100 Subject: [PATCH 8/8] use_safetensors not always expressed in params observable in inspect --- garak/resources/api/huggingface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py index a9c6445c..aeb47b43 100644 --- a/garak/resources/api/huggingface.py +++ b/garak/resources/api/huggingface.py @@ -85,7 +85,7 @@ def _gather_hf_params(self, hf_constructor: Callable): ): args["trust_remote_code"] = False - if "use_safetensors" in params_to_process and "use_safetensors" not in params: + if "use_safetensors" not in params: args["use_safetensors"] = True return args