-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmodelrevolver.py
247 lines (215 loc) · 12.9 KB
/
modelrevolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import sys
import os
import random
import shutil
import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import subprocess
from colorama import init, Fore, Style
import numpy as np
from datasets import load_dataset
from math import exp
import random
import flask
import requests
import itertools
import gc
# [currently working on script variant that assembles models based on user choice of best prompt responses; makes x cycles of models, reads prompt from text file, generates five responses, saves cycle num, percentages, and responses to individual text files, then when the cycles complete, it plays a tone so the user knows the system is ready for them to review outputs... when they reviewed all x cycles of 5 prompts each they choose the cycle number that has what they're looking for in the model they want assembled]
# Set default values for optional arguments
DEFAULT_FP16 = False
DEFAULT_MAX_SHARD_SIZE = "18000MiB"
DEFAULT_NUM_CYCLES = 3
# Set up the command-line argument parser
parser = argparse.ArgumentParser(description='Merge two models.')
parser.add_argument('--firstmodel', type=str, required=True, help='Path to the first model.')
parser.add_argument('--secondmodel', type=str, required=True, help='Path to the second model.')
parser.add_argument('--mergedpath', type=str, required=True, help='Path to save the merged model.')
parser.add_argument('--num_cycles', type=int, default=DEFAULT_NUM_CYCLES, help='Number of merge-eval-delete cycles to perform.')
parser.add_argument('--fp16', action='store_true', default=DEFAULT_FP16, help='Save the merged model in half precision (fp16).')
parser.add_argument('--shardsize', type=str, default=DEFAULT_MAX_SHARD_SIZE, help='Max shard size for the merged model.')
args = parser.parse_args()
# Fetch values from command-line arguments
first_model_path = os.path.abspath(args.firstmodel)
second_model_path = os.path.abspath(args.secondmodel)
merged_model_path = os.path.abspath(args.mergedpath)
num_cycles = args.num_cycles + 1
fp16 = args.fp16
max_shard_size = args.shardsize
# Initialize settings
always_output_fp16 = False
verbose_info = True
force_cpu = True
newline = '\n'
def clear_console():
if os.name == "nt": # For Windows
subprocess.call("cls", shell=True)
else: # For Linux and macOS
subprocess.call("clear", shell=True)
with torch.no_grad():
if fp16:
torch.set_default_dtype(torch.float16)
else:
torch.set_default_dtype(torch.float32)
device = torch.device('cuda')
print(device)
# Ensure output directory exists
os.makedirs(merged_model_path, exist_ok=True)
model_nameX = os.path.basename(first_model_path)
model_nameY= os.path.basename(second_model_path)
model_nameZ= os.path.basename(merged_model_path)
clear_console()
print(f"Initial Setup...")
def recreate_model(best_cycle, first_model):
# Read the merge ratios from the corresponding text file
with open(os.path.join(os.getcwd(), f"{model_nameZ}_mergecycle{best_cycle}.txt"), "r", encoding='utf-8') as file:
lines = file.readlines()
# Parse the merge ratios
merge_ratios = list(map(float, lines[3].strip().strip('[]').split(',')))
# Load the second model
print(f"\nLoading Transient Recipient Parent Model {model_nameY} For Recreation of Model {model_nameZ} from cycle {best_cycle} to RAM...")
second_model = AutoModelForCausalLM.from_pretrained(second_model_path).to('cpu')
second_model.eval()
print("Recipient Loaded. Dtype: " + str(second_model.dtype))
num_layers = first_model.config.num_hidden_layers
print("Number of Layers:", num_layers)
print("Merge Ratios:", merge_ratios)
# Merge the models according to the stored merge ratios
for i in range(num_layers):
first_ratio = merge_ratios[i]
second_ratio = 1 - first_ratio
merged_layer = (first_model.model.layers[i].state_dict(), second_model.model.layers[i].state_dict())
for key in merged_layer[0].keys():
merged_layer[0][key] = first_ratio * merged_layer[0][key] + second_ratio * merged_layer[1][key]
second_model.model.layers[i].load_state_dict(merged_layer[0])
print("Merging Layer " + str(i))
# Save the merged model
print(f"{Fore.YELLOW}\nSaving User Preference Cycle {best_cycle} to disk and copying files.{Style.RESET_ALL}")
second_model.save_pretrained(merged_model_path, max_shard_size=max_shard_size)
# List of files to copy to merged model dir
files_to_copy = ["special_tokens_map.json", "tokenizer_config.json", "vocab.json", "tokenizer.model", "generation_config.json", "added_tokens.json", "merges.txt"]
# Check for the existence of 'special_tokens_map.json' in both directories
first_model_has_special_tokens = os.path.exists(os.path.join(first_model_path, "special_tokens_map.json"))
second_model_has_special_tokens = os.path.exists(os.path.join(second_model_path, "special_tokens_map.json"))
# Decide the source directory based on the presence of 'special_tokens_map.json'
if first_model_has_special_tokens and not second_model_has_special_tokens:
src_dir = first_model_path
elif second_model_has_special_tokens or not first_model_has_special_tokens:
src_dir = second_model_path
# Copy each file to the new folder
for filename in files_to_copy:
src_path = os.path.join(src_dir, filename)
dst_path = os.path.join(merged_model_path, filename)
print(f"\nCopying files from dir: {src_path}")
print(f"To dir: {dst_path}")
try:
shutil.copy2(src_path, dst_path)
except FileNotFoundError:
print("\nFile " + filename + " not found in " + src_dir + ". Skipping (likely not important).")
del second_model
def review_files(first_model):
files = sorted([f for f in os.listdir(os.getcwd()) if f.startswith(f'{model_nameZ}_mergecycle') and f.endswith('.txt')])
for file in files:
# Extract cycle number from filename
cycle_num = int(file.replace(f'{model_nameZ}_mergecycle', '').replace('.txt', ''))
print(f"\nCycle Number: {cycle_num}\n")
with open(file, "r") as f:
print(f.read())
print("\n")
best_cycle = int(input("Please enter the cycle number with the best responses: "))
# Logic for recreating the model based on the user's choice
recreate_model(best_cycle, first_model)
# loads from file called prompts.txt, uses that as the prompt and has as many completions as seed is left per this while loop.
def generate_responses(second_model, tokenizer, cycle):
with open('prompts.txt', 'r', encoding='utf-8') as file:
prompt = file.read()
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to('cuda')
gen_tokens = second_model.generate(input_ids, max_length=300, num_return_sequences=5, do_sample=True)
model_completion = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
with open(os.path.join(os.getcwd(), f'{model_nameZ}_mergecycle{cycle + 1}.txt'), "a", encoding='utf-8') as file:
print(f'Prompt: {prompt}')
file.write(f'Prompt: {prompt}')
for i, completion in enumerate(model_completion):
print(f'Completion {i+1}: {completion}\n')
file.write(f'Completion {i+1}: {completion}\n')
def main():
# Check if paths exist
if not os.path.exists(first_model_path) or not os.path.exists(second_model_path):
print("\nYou must select two directories containing models to merge and one output directory. Exiting.")
exit()
clear_console()
print(f"{Fore.YELLOW}[Model REVOLVER: Rapid Evolution Via Optimized-List Viewer Evaluated Response] is working with\nmodels: {Fore.GREEN}{model_nameX}{Fore.YELLOW} and {Fore.GREEN}{model_nameY}{Fore.YELLOW} for {Fore.GREEN}{num_cycles}{Fore.YELLOW} cycles.{Style.RESET_ALL}\n")
sys.setrecursionlimit(2000)
# Load the first model before the cycles begin, so it never gets reloaded.
print(f"Loading Resident Donor Parent Model {model_nameX} to RAM...")
first_model = AutoModelForCausalLM.from_pretrained(first_model_path).to('cpu')
#first_model = first_model.to(device)
first_model.eval()
print("Donor Loaded. Dtype: " + str(first_model.dtype))
# ------------------------cycles begin------------------------
for cycle in range(num_cycles):
if cycle == num_cycles - 1: # If it's the last cycle
print(f"Initiating Human Review Process...")
review_files(first_model)
else:
random.seed()
print(f"\nLoading Transient Recipient Parent Model {model_nameY} to RAM for cycle {cycle + 1} of {num_cycles}...")
second_model = AutoModelForCausalLM.from_pretrained(second_model_path).to('cpu')
second_model.eval()
print("Recipient Loaded. Dtype: " + str(second_model.dtype))
num_layers = first_model.config.num_hidden_layers
print("Number of Layers:", num_layers)
merge_ratios = [round(random.uniform(0.0, 1.0), 2) for _ in range(num_layers)]
print("Merge Ratios:", merge_ratios)
with open(os.path.join(os.getcwd(), f'{model_nameZ}_mergecycle{cycle + 1}.txt'), "w", encoding='utf-8') as file:
file.write(f"First Model: {first_model_path}\n")
file.write(f"Second Model: {second_model_path}\n")
file.write("Merge Ratios:\n")
file.write(f"{merge_ratios}\n")
for i in range(num_layers):
first_ratio = merge_ratios[i]
second_ratio = 1 - first_ratio
merged_layer = (first_model.model.layers[i].state_dict(), second_model.model.layers[i].state_dict())
for key in merged_layer[0].keys():
merged_layer[0][key] = first_ratio * merged_layer[0][key] + second_ratio * merged_layer[1][key]
second_model.model.layers[i].load_state_dict(merged_layer[0])
print("Merging Layer " + str(i))
print(f"{Fore.YELLOW}\nSaving Offspring {cycle + 1} to disk and copying files.{Style.RESET_ALL}")
second_model.save_pretrained(merged_model_path, max_shard_size=max_shard_size)
# List of files to copy to merged model dir
files_to_copy = ["special_tokens_map.json", "tokenizer_config.json", "vocab.json", "tokenizer.model", "generation_config.json", "added_tokens.json", "merges.txt"]
# Check for the existence of 'special_tokens_map.json' in both directories
first_model_has_special_tokens = os.path.exists(os.path.join(first_model_path, "special_tokens_map.json"))
second_model_has_special_tokens = os.path.exists(os.path.join(second_model_path, "special_tokens_map.json"))
# Decide the source directory based on the presence of 'special_tokens_map.json'
if first_model_has_special_tokens and not second_model_has_special_tokens:
src_dir = first_model_path
elif second_model_has_special_tokens or not first_model_has_special_tokens:
src_dir = second_model_path
# Copy each file to the new folder
for filename in files_to_copy:
src_path = os.path.join(src_dir, filename)
dst_path = os.path.join(merged_model_path, filename)
print(f"\nCopying files from dir: {src_path}")
print(f"To dir: {dst_path}")
try:
shutil.copy2(src_path, dst_path)
except FileNotFoundError:
print("\nFile " + filename + " not found in " + src_dir + ". Skipping (likely not important).")
del second_model
print(f"{Fore.YELLOW}\nLoading Offspring {cycle + 1} to VRAM in 4bit mode for evaluation.{Style.RESET_ALL}")
# loads second_model to GPU in 4 bit for inference (this method was introduced to HuggingFace after September 2022, functions exactly the same as a regular model after it is loaded)
gc.collect()
torch.cuda.empty_cache()
second_model = AutoModelForCausalLM.from_pretrained(merged_model_path, device_map="auto", load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True)
tokenizer = AutoTokenizer.from_pretrained(second_model_path)
print(f"{Fore.YELLOW}\nInitiating Prompt Responses...{Style.RESET_ALL}")
generate_responses(second_model, tokenizer, cycle)
del second_model
gc.collect()
torch.cuda.empty_cache()
shutil.rmtree(merged_model_path)
clear_console()
if __name__ == "__main__":
main()