-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmodel_loader.py
41 lines (37 loc) · 1.15 KB
/
model_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
from transformers import (
AutoModelForCausalLM,
AutoConfig,
AutoTokenizer,
BitsAndBytesConfig,
)
import torch
from dotenv import load_dotenv
load_dotenv()
class ModelLoader:
def __init__(self, model_path: str):
self.model_path = model_path
self.config = AutoConfig.from_pretrained(
self.model_path,
trust_remote_code=True,
use_auth_token=os.getenv("HUGGINGFACE_TOKEN"),
)
self.model = self._load_model()
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_path, use_auth_token=os.getenv("HUGGINGFACE_TOKEN")
)
def _load_model(self):
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
self.model_path,
quantization_config=nf4_config,
trust_remote_code=True,
device_map="auto",
use_auth_token=os.getenv("HUGGINGFACE_TOKEN"),
)
return model