-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathll_module.py
More file actions
124 lines (94 loc) · 4.97 KB
/
ll_module.py
File metadata and controls
124 lines (94 loc) · 4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from langchain.chains import LLMChain, SequentialChain
from langchain.memory import ConversationBufferMemory
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoModel
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
import textwrap
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf",
device_map='auto',
torch_dtype=torch.float16,
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16)
pipe = pipeline("text-generation",
model=model,
tokenizer= tokenizer,
torch_dtype=torch.float16,
device_map="auto",
max_new_tokens = 512,
do_sample=True,
top_k=30,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<>\n", "\n<>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a member of the English committee of the Korean Scholastic Ability Test.
Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.Just say you don't know and you are sorry!"""
def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT, citation=None):
SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
if citation:
prompt_template += f"\n\nCitation: {citation}" # Insert citation here
return prompt_template
def cut_off_text(text, prompt):
cutoff_phrase = prompt
index = text.find(cutoff_phrase)
if index != -1:
return text[:index]
else:
return text
def remove_substring(string, substring):
return string.replace(substring, "")
def generate(text, citation=None):
prompt = get_prompt(text, citation=citation)
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs,
max_length=512,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
final_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
final_outputs = cut_off_text(final_outputs, '')
final_outputs = remove_substring(final_outputs, prompt)
return final_outputs
def parse_text(text):
wrapped_text = textwrap.fill(text, width=100)
print(wrapped_text + '\n\n')
def talk_2_llama():
# text = "My life sucks, what do you suggest? Please don't tell me to medidate"
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0.7,'max_length': 256, 'top_k' :50})
system_prompt = "You are a member of the English committee of the Korean Scholastic Ability Test."
instruction = "You should ask blank inference questions. I'll give you step-by-step instructions to help you get the right answer. Please provide only the answers to step 1, step 2, step 3, and step 4.:\n\n {text}"
template = get_prompt(instruction, system_prompt)
# print(template)
prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose = False)
response = llm_chain.run(step="""Step 1: In this step, you must provide a sentence of at least 800 characters.
Step 2: Replace only one specific word or grammar in the passage with "___" and combine the sentence from step 1 with the passage from step 2 to display the full sentence.
Step 3: Create a multiple-choice question to find replacement words based on the passage with five options. One of the options is the replaced answer from step 2, and the other four options are distractions.
Step 4: Please provide the correct answers (example: 1, 2, 3, 4, 5) obtained through step 3.
""",
json_schema="""{
"class": "blank",
"content" : "{step 1}",
"main": "{step 2}",
"view1": "{step 3[0]}",
"view2": "{step 3[1]}",
"view3": "{step 3[2]}",
"view4": "{step 3[3]}",
"view5": "{step 3[4]}",
"answer": "{step 4}"
}
""")
response = eval(response)
return response