"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [60/60 01:41, Epoch 0/1]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 1.818600 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2.305800 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1.703400 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2.014700 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1.735100 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1.670600 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1.259200 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1.308900 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1.165100 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 1.225300 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 0.952900 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 0.975100 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 0.927200 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 1.053400 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 0.893100 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 0.907500 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 1.010400 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 1.262300 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 1.024500 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 0.884400 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 0.945600 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 1.019000 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 0.899700 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 0.994800 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 1.078800 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 1.021300 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 1.048400 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 0.883900 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 0.849500 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 0.894900 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 0.857800 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 0.866900 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 0.987500 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 0.860600 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 0.959300 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 0.860900 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 0.882400 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 0.756700 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 1.090100 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 1.174900 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 0.893200 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 0.981300 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 0.954800 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 0.911400 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " 0.918000 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 0.973800 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 0.872000 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 1.198100 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " 0.909500 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " 1.032100 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " 1.017500 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " 0.909200 | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " 0.976500 | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " 1.154300 | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " 0.781000 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " 1.012800 | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " 0.885200 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " 0.828500 | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " 0.853500 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " 0.898100 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "exp.end()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8II72I92Bm9s",
+ "outputId": "6518c199-dff2-4c4a-f821-aefdf595fa2c"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m ---------------------------------------------------------------------------------------\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m Comet.ml Experiment Summary\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m ---------------------------------------------------------------------------------------\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m Data:\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m display_summary_level : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m name : apparent_pagoda_1033\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m url : https://www.comet.com/examples/comet-example-unsloth/cb280e1a2ac942cbab2343349d80282f\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m Metrics [count] (min, max):\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m epoch [61] : (0.0001545595054095827, 0.00927357032457496)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m grad_norm [60] : (0.2999851107597351, 2.063112258911133)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m learning_rate [60] : (0.0, 0.0002)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m loss [60] : (0.7567, 2.3058)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m total_flos : 5726714157219840.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/epoch [61] : (0.0001545595054095827, 0.00927357032457496)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/grad_norm [60] : (0.2999851107597351, 2.063112258911133)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/learning_rate [60] : (0.0, 0.0002)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/loss [60] : (0.7567, 2.3058)\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/total_flos : 5726714157219840.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/train_loss : 1.0665242771307628\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/train_runtime : 109.8838\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/train_samples_per_second : 4.368\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train/train_steps_per_second : 0.546\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train_loss : 1.0665242771307628\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train_runtime : 109.8838\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train_samples_per_second : 4.368\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m train_steps_per_second : 0.546\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m Others:\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m hasNestedParams : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m Parameters:\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|accelerator_config|dispatch_batches : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|accelerator_config|even_batches : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|accelerator_config|gradient_accumulation_kwargs : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|accelerator_config|non_blocking : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|accelerator_config|split_batches : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|accelerator_config|use_seedable_sampler : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|adafactor : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|adam_beta1 : 0.9\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|adam_beta2 : 0.999\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|adam_epsilon : 1e-08\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|auto_find_batch_size : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|batch_eval_metrics : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|bf16 : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|bf16_full_eval : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|chars_per_token : \n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|data_seed : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataloader_drop_last : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataloader_num_workers : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataloader_persistent_workers : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataloader_pin_memory : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataloader_prefetch_factor : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataset_batch_size : 1000\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataset_kwargs : {}\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataset_num_proc : 2\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dataset_text_field : text\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ddp_backend : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ddp_broadcast_buffers : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ddp_bucket_cap_mb : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ddp_find_unused_parameters : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ddp_timeout : 1800\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|debug : []\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|deepspeed : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|disable_tqdm : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|dispatch_batches : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|do_eval : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|do_predict : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|do_train : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_accumulation_steps : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_delay : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_do_concat_batches : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_on_start : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_packing : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_steps : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_strategy : no\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|eval_use_gather_object : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|evaluation_strategy : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fp16 : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fp16_backend : auto\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fp16_full_eval : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fp16_opt_level : O1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp : []\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp_config|min_num_params : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp_config|xla : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp_config|xla_fsdp_grad_ckpt : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp_config|xla_fsdp_v2 : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp_min_num_params : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|fsdp_transformer_layer_cls_to_wrap : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|full_determinism : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|gradient_accumulation_steps : 4\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|gradient_checkpointing : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|gradient_checkpointing_kwargs : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|greater_is_better : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|group_by_length : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|half_precision_backend : auto\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|hub_always_push : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|hub_model_id : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|hub_private_repo : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|hub_strategy : every_save\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|hub_token : \n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ignore_data_skip : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|include_inputs_for_metrics : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|include_num_input_tokens_seen : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|include_tokens_per_second : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|jit_mode_eval : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|label_names : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|label_smoothing_factor : 0.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|learning_rate : 0.0002\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|length_column_name : length\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|load_best_model_at_end : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|local_rank : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|log_level : passive\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|log_level_replica : warning\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|log_on_each_node : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|logging_dir : outputs/runs/Aug30_23-55-50_eba2d1e0a1d9\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|logging_first_step : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|logging_nan_inf_filter : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|logging_steps : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|logging_strategy : steps\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|lr_scheduler_kwargs : {}\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|lr_scheduler_type : linear\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|max_grad_norm : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|max_seq_length : 2048\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|max_steps : 60\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|metric_for_best_model : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|model_init_kwargs : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|mp_parameters : \n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|neftune_noise_alpha : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|no_cuda : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|num_of_sequences : 1024\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|num_train_epochs : 3.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|optim : adamw_8bit\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|optim_args : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|optim_target_modules : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|output_dir : outputs\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|overwrite_output_dir : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|packing : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|past_index : -1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|per_device_eval_batch_size : 8\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|per_device_train_batch_size : 2\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|per_gpu_eval_batch_size : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|per_gpu_train_batch_size : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|prediction_loss_only : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|push_to_hub : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|push_to_hub_model_id : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|push_to_hub_organization : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|push_to_hub_token : \n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|ray_scope : last\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|remove_unused_columns : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|report_to : ['comet_ml', 'tensorboard']\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|restore_callback_states_from_checkpoint : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|resume_from_checkpoint : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|run_name : outputs\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|save_on_each_node : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|save_only_model : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|save_safetensors : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|save_steps : 500\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|save_strategy : steps\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|save_total_limit : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|seed : 3407\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|skip_memory_metrics : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|split_batches : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|tf32 : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|torch_compile : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|torch_compile_backend : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|torch_compile_mode : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|torch_empty_cache_steps : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|torchdynamo : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|tpu_metrics_debug : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|tpu_num_cores : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|use_cpu : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|use_ipex : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|use_legacy_prediction_loop : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|use_liger : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|use_mps_device : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|warmup_ratio : 0.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|warmup_steps : 5\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m args|weight_decay : 0.01\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|_name_or_path : unsloth/meta-llama-3.1-8b-bnb-4bit\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|add_cross_attention : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|architectures : ['LlamaForCausalLM']\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|attention_bias : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|attention_dropout : 0.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|bad_words_ids : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|begin_suppress_tokens : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|bos_token_id : 128000\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|chunk_size_feed_forward : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|cross_attention_hidden_size : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|decoder_start_token_id : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|diversity_penalty : 0.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|do_sample : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|early_stopping : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|encoder_no_repeat_ngram_size : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|eos_token_id : 128001\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|exponential_decay_length_penalty : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|finetuning_task : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|forced_bos_token_id : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|forced_eos_token_id : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|hidden_act : silu\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|hidden_size : 4096\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|id2label|0 : LABEL_0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|id2label|1 : LABEL_1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|initializer_range : 0.02\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|intermediate_size : 14336\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|is_decoder : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|is_encoder_decoder : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|label2id|LABEL_0 : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|label2id|LABEL_1 : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|length_penalty : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|max_length : 20\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|max_position_embeddings : 131072\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|min_length : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|mlp_bias : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|model_type : llama\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|no_repeat_ngram_size : 0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|num_attention_heads : 32\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|num_beam_groups : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|num_beams : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|num_hidden_layers : 32\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|num_key_value_heads : 8\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|num_return_sequences : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|output_attentions : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|output_hidden_states : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|output_scores : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|pad_token_id : 128004\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|prefix : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|pretraining_tp : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|problem_type : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|pruned_heads : {}\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|bnb_4bit_compute_dtype : bfloat16\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|bnb_4bit_quant_type : nf4\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|bnb_4bit_use_double_quant : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|llm_int8_enable_fp32_cpu_offload : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|llm_int8_has_fp16_weight : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|llm_int8_skip_modules : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|llm_int8_threshold : 6.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|load_in_4bit : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|load_in_8bit : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|quantization_config|quant_method : bitsandbytes\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|remove_invalid_values : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|repetition_penalty : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|return_dict : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|return_dict_in_generate : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rms_norm_eps : 1e-05\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rope_scaling|factor : 8.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rope_scaling|high_freq_factor : 4.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rope_scaling|low_freq_factor : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rope_scaling|original_max_position_embeddings : 8192\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rope_scaling|rope_type : llama3\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|rope_theta : 500000.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|sep_token_id : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|suppress_tokens : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|task_specific_params : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|temperature : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|tf_legacy_loss : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|tie_encoder_decoder : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|tie_word_embeddings : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|tokenizer_class : None\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|top_k : 50\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|top_p : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|torch_dtype : bfloat16\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|torchscript : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|transformers_version : 4.44.2\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|typical_p : 1.0\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|unsloth_version : 2024.8\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|use_bfloat16 : False\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|use_cache : True\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m config|vocab_size : 128256\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m peft_config|default : LoraConfig(peft_type=, auto_mapping=None, base_model_name_or_path='unsloth/meta-llama-3.1-8b-bnb-4bit', revision=None, task_type=, inference_mode=False, r=16, target_modules={'k_proj', 'gate_proj', 'v_proj', 'o_proj', 'q_proj', 'down_proj', 'up_proj'}, lora_alpha=16, lora_dropout=0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False))\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m Uploads:\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m environment details : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m filename : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m installed packages : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m model graph : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m notebook : 2\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m os packages : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m source_code : 1\n",
+ "\u001b[1;38;5;39mCOMET INFO:\u001b[0m \n",
+ "\u001b[1;38;5;214mCOMET WARNING:\u001b[0m To get all data logged automatically, import comet_ml before the following modules: torch.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## ⚙ Inference"
+ ],
+ "metadata": {
+ "id": "0IdjUa_0pzPQ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# alpaca_prompt = Copied from above\n",
+ "FastLanguageModel.for_inference(model)\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"Continue the fibonnaci sequence.\", # instruction\n",
+ " \"1, 1, 2, 3, 5, 8\", # input\n",
+ " \"\", # output - leave this blank for generation\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)\n",
+ "tokenizer.batch_decode(outputs)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "76su-yv9Aad0",
+ "outputId": "8b9c2ccd-4597-41fc-d9ef-e665272fae05"
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nContinue the fibonnaci sequence.\\n\\n### Input:\\n1, 1, 2, 3, 5, 8\\n\\n### Response:\\n13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file