|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": null, |
| 5 | + "execution_count": 1, |
6 | 6 | "id": "ae5c6dc9-d246-4f47-a4c0-c1c07da0b901",
|
7 | 7 | "metadata": {},
|
8 | 8 | "outputs": [],
|
|
12 | 12 | },
|
13 | 13 | {
|
14 | 14 | "cell_type": "code",
|
15 |
| - "execution_count": null, |
| 15 | + "execution_count": 2, |
16 | 16 | "id": "b0ab721c-85f3-49b8-ac06-e8c2efce5d69",
|
17 | 17 | "metadata": {},
|
18 | 18 | "outputs": [],
|
|
22 | 22 | },
|
23 | 23 | {
|
24 | 24 | "cell_type": "markdown",
|
| 25 | + "id": "7bbf2852", |
25 | 26 | "metadata": {},
|
26 | 27 | "source": [
|
27 | 28 | "### Test CUDA"
|
28 | 29 | ]
|
29 | 30 | },
|
30 | 31 | {
|
31 | 32 | "cell_type": "code",
|
32 |
| - "execution_count": null, |
| 33 | + "execution_count": 3, |
33 | 34 | "id": "e1d9dc6d-bf38-41b5-a44a-77aea0a355cf",
|
34 | 35 | "metadata": {},
|
35 |
| - "outputs": [], |
| 36 | + "outputs": [ |
| 37 | + { |
| 38 | + "name": "stdout", |
| 39 | + "output_type": "stream", |
| 40 | + "text": [ |
| 41 | + "Using device: cuda\n" |
| 42 | + ] |
| 43 | + } |
| 44 | + ], |
36 | 45 | "source": [
|
37 | 46 | "import torch\n",
|
38 | 47 | "use_cuda = torch.cuda.is_available()\n",
|
|
42 | 51 | },
|
43 | 52 | {
|
44 | 53 | "cell_type": "markdown",
|
| 54 | + "id": "e9252eac", |
45 | 55 | "metadata": {},
|
46 | 56 | "source": [
|
47 | 57 | "## Imports"
|
48 | 58 | ]
|
49 | 59 | },
|
50 | 60 | {
|
51 | 61 | "cell_type": "code",
|
52 |
| - "execution_count": null, |
| 62 | + "execution_count": 4, |
53 | 63 | "id": "c1afd2e2-297e-4f67-ac7c-24b1473d5e06",
|
54 | 64 | "metadata": {},
|
55 | 65 | "outputs": [],
|
|
69 | 79 | },
|
70 | 80 | {
|
71 | 81 | "cell_type": "markdown",
|
| 82 | + "id": "06512f0f", |
72 | 83 | "metadata": {},
|
73 | 84 | "source": [
|
74 | 85 | "## Login to huggingface\n",
|
|
77 | 88 | },
|
78 | 89 | {
|
79 | 90 | "cell_type": "code",
|
80 |
| - "execution_count": null, |
| 91 | + "execution_count": 5, |
81 | 92 | "id": "3a1cc9bd-17ca-4754-b36f-1d67e194d205",
|
82 | 93 | "metadata": {},
|
83 |
| - "outputs": [], |
| 94 | + "outputs": [ |
| 95 | + { |
| 96 | + "name": "stdout", |
| 97 | + "output_type": "stream", |
| 98 | + "text": [ |
| 99 | + "\n", |
| 100 | + " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", |
| 101 | + " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", |
| 102 | + " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", |
| 103 | + " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", |
| 104 | + " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", |
| 105 | + "\n", |
| 106 | + " A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.\n", |
| 107 | + " Setting a new token will erase the existing one.\n", |
| 108 | + " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n" |
| 109 | + ] |
| 110 | + }, |
| 111 | + { |
| 112 | + "name": "stdin", |
| 113 | + "output_type": "stream", |
| 114 | + "text": [ |
| 115 | + "Token: ········\n", |
| 116 | + "Add token as git credential? (Y/n) n\n" |
| 117 | + ] |
| 118 | + }, |
| 119 | + { |
| 120 | + "name": "stdout", |
| 121 | + "output_type": "stream", |
| 122 | + "text": [ |
| 123 | + "Token is valid (permission: write).\n", |
| 124 | + "Your token has been saved to /pc2/users/a/ashwin/.cache/huggingface/token\n", |
| 125 | + "Login successful\n" |
| 126 | + ] |
| 127 | + } |
| 128 | + ], |
84 | 129 | "source": [
|
85 | 130 | "from huggingface_hub import interpreter_login\n",
|
86 | 131 | "interpreter_login()"
|
87 | 132 | ]
|
88 | 133 | },
|
89 | 134 | {
|
90 | 135 | "cell_type": "markdown",
|
| 136 | + "id": "e27647e5", |
91 | 137 | "metadata": {},
|
92 | 138 | "source": [
|
93 | 139 | "## Load dataset "
|
94 | 140 | ]
|
95 | 141 | },
|
96 | 142 | {
|
97 | 143 | "cell_type": "code",
|
98 |
| - "execution_count": null, |
| 144 | + "execution_count": 6, |
99 | 145 | "id": "c70ed7a5-0b8c-41d9-8ad6-80c36d6a10da",
|
100 | 146 | "metadata": {},
|
101 |
| - "outputs": [], |
| 147 | + "outputs": [ |
| 148 | + { |
| 149 | + "name": "stdout", |
| 150 | + "output_type": "stream", |
| 151 | + "text": [ |
| 152 | + "Number of prompts: 15\n", |
| 153 | + "Column names are: ['text']\n" |
| 154 | + ] |
| 155 | + } |
| 156 | + ], |
102 | 157 | "source": [
|
103 | 158 | "dataset_name = \"ashwinprasadme/typeevalpy_finetuning\"\n",
|
104 | 159 | "dataset = load_dataset(dataset_name, split=\"train\", token=True)\n",
|
|
109 | 164 | },
|
110 | 165 | {
|
111 | 166 | "cell_type": "markdown",
|
| 167 | + "id": "d4ad7e49", |
112 | 168 | "metadata": {},
|
113 | 169 | "source": [
|
114 | 170 | "## Dataset preparation functions"
|
115 | 171 | ]
|
116 | 172 | },
|
117 | 173 | {
|
118 | 174 | "cell_type": "code",
|
119 |
| - "execution_count": null, |
| 175 | + "execution_count": 7, |
120 | 176 | "id": "6a4f3f26-6c38-4074-804b-caa4e66aa1d5",
|
121 | 177 | "metadata": {},
|
122 | 178 | "outputs": [],
|
|
175 | 231 | },
|
176 | 232 | {
|
177 | 233 | "cell_type": "markdown",
|
| 234 | + "id": "dec144c8", |
178 | 235 | "metadata": {},
|
179 | 236 | "source": [
|
180 | 237 | "## Training functions"
|
181 | 238 | ]
|
182 | 239 | },
|
183 | 240 | {
|
184 | 241 | "cell_type": "code",
|
185 |
| - "execution_count": null, |
| 242 | + "execution_count": 8, |
186 | 243 | "id": "e7a80306-3bcc-4b18-837f-08b883bf98e7",
|
187 | 244 | "metadata": {},
|
188 | 245 | "outputs": [],
|
|
270 | 327 | },
|
271 | 328 | {
|
272 | 329 | "cell_type": "code",
|
273 |
| - "execution_count": null, |
| 330 | + "execution_count": 9, |
| 331 | + "id": "427c7dea", |
274 | 332 | "metadata": {},
|
275 | 333 | "outputs": [],
|
276 | 334 | "source": [
|
|
353 | 411 | },
|
354 | 412 | {
|
355 | 413 | "cell_type": "markdown",
|
| 414 | + "id": "001b7997", |
356 | 415 | "metadata": {},
|
357 | 416 | "source": [
|
358 | 417 | "## Load model"
|
359 | 418 | ]
|
360 | 419 | },
|
361 | 420 | {
|
362 | 421 | "cell_type": "code",
|
363 |
| - "execution_count": null, |
| 422 | + "execution_count": 10, |
| 423 | + "id": "1eb2cd1a", |
364 | 424 | "metadata": {},
|
365 | 425 | "outputs": [],
|
366 | 426 | "source": [
|
|
392 | 452 | "execution_count": null,
|
393 | 453 | "id": "fbd5d2d4-8c64-4266-a9cf-2fdeac5df0d4",
|
394 | 454 | "metadata": {},
|
395 |
| - "outputs": [], |
| 455 | + "outputs": [ |
| 456 | + { |
| 457 | + "name": "stdout", |
| 458 | + "output_type": "stream", |
| 459 | + "text": [ |
| 460 | + "Processing Model: codellama/CodeLlama-7b-Python-hf\n" |
| 461 | + ] |
| 462 | + }, |
| 463 | + { |
| 464 | + "data": { |
| 465 | + "application/vnd.jupyter.widget-view+json": { |
| 466 | + "model_id": "ff429292453744558773a62add62d543", |
| 467 | + "version_major": 2, |
| 468 | + "version_minor": 0 |
| 469 | + }, |
| 470 | + "text/plain": [ |
| 471 | + "config.json: 0%| | 0.00/644 [00:00<?, ?B/s]" |
| 472 | + ] |
| 473 | + }, |
| 474 | + "metadata": {}, |
| 475 | + "output_type": "display_data" |
| 476 | + }, |
| 477 | + { |
| 478 | + "data": { |
| 479 | + "application/vnd.jupyter.widget-view+json": { |
| 480 | + "model_id": "9f6cbe7009a644489cbc1d387c41c4d9", |
| 481 | + "version_major": 2, |
| 482 | + "version_minor": 0 |
| 483 | + }, |
| 484 | + "text/plain": [ |
| 485 | + "model.safetensors.index.json: 0%| | 0.00/25.1k [00:00<?, ?B/s]" |
| 486 | + ] |
| 487 | + }, |
| 488 | + "metadata": {}, |
| 489 | + "output_type": "display_data" |
| 490 | + }, |
| 491 | + { |
| 492 | + "data": { |
| 493 | + "application/vnd.jupyter.widget-view+json": { |
| 494 | + "model_id": "627db9bae4134e45858c3ea37652089b", |
| 495 | + "version_major": 2, |
| 496 | + "version_minor": 0 |
| 497 | + }, |
| 498 | + "text/plain": [ |
| 499 | + "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]" |
| 500 | + ] |
| 501 | + }, |
| 502 | + "metadata": {}, |
| 503 | + "output_type": "display_data" |
| 504 | + }, |
| 505 | + { |
| 506 | + "data": { |
| 507 | + "application/vnd.jupyter.widget-view+json": { |
| 508 | + "model_id": "9d3a2cba3b3b42e0b723f63392766fae", |
| 509 | + "version_major": 2, |
| 510 | + "version_minor": 0 |
| 511 | + }, |
| 512 | + "text/plain": [ |
| 513 | + "model-00001-of-00002.safetensors: 0%| | 0.00/9.98G [00:00<?, ?B/s]" |
| 514 | + ] |
| 515 | + }, |
| 516 | + "metadata": {}, |
| 517 | + "output_type": "display_data" |
| 518 | + } |
| 519 | + ], |
396 | 520 | "source": [
|
397 | 521 | "# Load model from HF with user's token and with bitsandbytes config\n",
|
398 |
| - "output_dir_str = \"/scratch/hpc-prf-hdgen/ashwin/finetuned_models/ft_{model_name}\"\n", |
399 |
| - "output_dir_merged_str = \"/scratch/hpc-prf-hdgen/ashwin/finetuned_models/ft_{model_name}_merged\"\n", |
| 522 | + "output_dir_str = \"/scratch/hpc-prf-hdgen/ashwin/finetuned_models/ft_v1_{model_name}\"\n", |
| 523 | + "output_dir_merged_str = \"/scratch/hpc-prf-hdgen/ashwin/finetuned_models/ft_v1_{model_name}_merged\"\n", |
400 | 524 | "\n",
|
401 | 525 | "start_time = time.time()\n",
|
402 | 526 | "for model_name in models_list:\n",
|
403 |
| - " print(f\"Processing Model: {model_name}\")\n", |
404 |
| - " # model_name = \"meta-llama/Llama-2-7b-hf\" \n", |
405 |
| - " bnb_config = create_bnb_config()\n", |
406 |
| - " model, tokenizer = load_model(model_name, bnb_config)\n", |
407 |
| - "\n", |
408 |
| - " # Preprocess dataset\n", |
409 |
| - " print(\"Preprocess dataset\")\n", |
410 |
| - " max_length = get_max_length(model)\n", |
411 |
| - " dataset = preprocess_dataset(tokenizer, max_length, 0, dataset)\n", |
412 |
| - "\n", |
413 |
| - " # Start training\n", |
414 |
| - " print(\"Start training\")\n", |
415 |
| - " output_dir = output_dir_str.format(model_name=model_name)\n", |
416 |
| - " train(model, tokenizer, dataset, output_dir)\n", |
417 |
| - "\n", |
418 |
| - " # Save and Merge Model\n", |
419 |
| - " print(\"Save and Merge Model\")\n", |
420 |
| - " model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map=\"auto\", torch_dtype=torch.bfloat16)\n", |
421 |
| - " model = model.merge_and_unload()\n", |
422 |
| - "\n", |
423 |
| - " output_merged_dir = output_dir_merged_str.format(model_name=model_name)\n", |
424 |
| - " os.makedirs(output_merged_dir, exist_ok=True)\n", |
425 |
| - " model.save_pretrained(output_merged_dir, safe_serialization=True)\n", |
426 |
| - "\n", |
427 |
| - " # save tokenizer for easy inference\n", |
428 |
| - " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", |
429 |
| - " tokenizer.save_pretrained(output_merged_dir)\n", |
| 527 | + " try:\n", |
| 528 | + " print(f\"Processing Model: {model_name}\")\n", |
| 529 | + " # model_name = \"meta-llama/Llama-2-7b-hf\" \n", |
| 530 | + " bnb_config = create_bnb_config()\n", |
| 531 | + " model, tokenizer = load_model(model_name, bnb_config)\n", |
| 532 | + " \n", |
| 533 | + " # Preprocess dataset\n", |
| 534 | + " print(\"Preprocess dataset\")\n", |
| 535 | + " max_length = get_max_length(model)\n", |
| 536 | + " dataset = preprocess_dataset(tokenizer, max_length, 0, dataset)\n", |
| 537 | + " \n", |
| 538 | + " # Start training\n", |
| 539 | + " print(\"Start training\")\n", |
| 540 | + " output_dir = output_dir_str.format(model_name=model_name.split(\"/\")[1])\n", |
| 541 | + " train(model, tokenizer, dataset, output_dir)\n", |
| 542 | + " \n", |
| 543 | + " # Save and Merge Model\n", |
| 544 | + " print(\"Save and Merge Model\")\n", |
| 545 | + " model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map=\"auto\", torch_dtype=torch.bfloat16)\n", |
| 546 | + " model = model.merge_and_unload()\n", |
| 547 | + " \n", |
| 548 | + " output_merged_dir = output_dir_merged_str.format(model_name=model_name.split(\"/\")[1])\n", |
| 549 | + " os.makedirs(output_merged_dir, exist_ok=True)\n", |
| 550 | + " model.save_pretrained(output_merged_dir, safe_serialization=True)\n", |
| 551 | + " \n", |
| 552 | + " # save tokenizer for easy inference\n", |
| 553 | + " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", |
| 554 | + " tokenizer.save_pretrained(output_merged_dir)\n", |
| 555 | + " except Exception as e:\n", |
| 556 | + " print(f\"Error training: {model_name}\")\n", |
| 557 | + " print(e)\n", |
430 | 558 | "\n",
|
431 | 559 | "print(f\"DONE! Took{time.time()-start_time}\")"
|
432 | 560 | ]
|
| 561 | + }, |
| 562 | + { |
| 563 | + "cell_type": "code", |
| 564 | + "execution_count": null, |
| 565 | + "id": "cfedb7a2-9a60-498a-bfed-86690ea6eb6b", |
| 566 | + "metadata": {}, |
| 567 | + "outputs": [], |
| 568 | + "source": [] |
433 | 569 | }
|
434 | 570 | ],
|
435 | 571 | "metadata": {
|
|
0 commit comments