Skip to content

[QEff Finetune]: Use logger in place of print statements in finetuning scripts #371

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# -----------------------------------------------------------------------------

import logging
import random
import warnings
from typing import Any, Dict, Optional, Union
Expand All @@ -18,7 +19,7 @@
import torch.utils.data
from peft import PeftModel, get_peft_model
from torch.optim.lr_scheduler import StepLR
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer

from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.finetune.utils.config_utils import (
Expand All @@ -33,15 +34,16 @@
)
from QEfficient.finetune.utils.train_utils import get_longest_seq_length, print_model_size, train
from QEfficient.utils._utils import login_and_download_hf_lm
from QEfficient.utils.logging_utils import logger

# Try importing QAIC-specific module, proceed without it if unavailable
try:
import torch_qaic # noqa: F401
except ImportError as e:
print(f"Warning: {e}. Proceeding without QAIC modules.")
logger.log_rank_zero(f"{e}. Moving ahead without these qaic modules.")

logger.setLevel(logging.INFO)

from transformers import AutoModelForSequenceClassification

# Suppress all warnings
warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -119,7 +121,7 @@ def load_model_and_tokenizer(
)

if not hasattr(model, "base_model_prefix"):
raise RuntimeError("Given huggingface model does not have 'base_model_prefix' attribute.")
logger.raise_runtimeerror("Given huggingface model does not have 'base_model_prefix' attribute.")

for param in getattr(model, model.base_model_prefix).parameters():
param.requires_grad = False
Expand All @@ -144,7 +146,7 @@ def load_model_and_tokenizer(
# If there is a mismatch between tokenizer vocab size and embedding matrix,
# throw a warning and then expand the embedding matrix
if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
print("WARNING: Resizing embedding matrix to match tokenizer vocab size.")
logger.log_rank_zero("Resizing the embedding matrix to match the tokenizer vocab size.", logger.WARNING)
model.resize_token_embeddings(len(tokenizer))

# FIXME (Meet): Cover below line inside the logger once it is implemented.
Expand All @@ -160,7 +162,9 @@ def load_model_and_tokenizer(
if hasattr(model, "supports_gradient_checkpointing") and model.supports_gradient_checkpointing:
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"preserve_rng_state": False})
else:
raise RuntimeError("Given model doesn't support gradient checkpointing. Please disable it and run it.")
logger.raise_runtimeerror(
"Given model doesn't support gradient checkpointing. Please disable it and run it."
)

model = apply_peft(model, train_config, peft_config_file, **kwargs)

Expand Down Expand Up @@ -220,7 +224,7 @@ def setup_dataloaders(
- Length of longest sequence in the dataset.

Raises:
ValueError: If validation is enabled but the validation set is too small.
RuntimeError: If validation is enabled but the validation set is too small.

Notes:
- Applies a custom data collator if provided by get_custom_data_collator.
Expand All @@ -244,12 +248,12 @@ def setup_dataloaders(
# )
##
train_dl_kwargs = get_dataloader_kwargs(train_config, dataset_train, dataset_processer, "train")
print("length of dataset_train", len(dataset_train))
logger.log_rank_zero(f"Length of dataset_train = {len(dataset_train)}")

# FIXME (Meet): Add custom data collator registration from the outside by the user.
custom_data_collator = get_custom_data_collator(dataset_processer, dataset_config)
if custom_data_collator:
print("custom_data_collator is used")
logger.log_rank_zero("Custom_data_collator is used")
train_dl_kwargs["collate_fn"] = custom_data_collator

# Create DataLoaders for the training and validation dataset
Expand All @@ -259,7 +263,7 @@ def setup_dataloaders(
pin_memory=True,
**train_dl_kwargs,
)
print(f"--> Num of Training Set Batches loaded = {len(train_dataloader)}")
logger.log_rank_zero(f"Number of Training Set Batches loaded = {len(train_dataloader)}")

eval_dataloader = None
if train_config.run_validation:
Expand All @@ -279,11 +283,11 @@ def setup_dataloaders(
**val_dl_kwargs,
)
if len(eval_dataloader) == 0:
raise ValueError(
logger.raise_runtimeerror(
f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})"
)
else:
print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
logger.log_rank_zero(f"Number of Validation Set Batches loaded = {len(eval_dataloader)}")

longest_seq_length, _ = get_longest_seq_length(
torch.utils.data.ConcatDataset([train_dataloader.dataset, eval_dataloader.dataset])
Expand Down Expand Up @@ -327,7 +331,7 @@ def main(peft_config_file: str = None, **kwargs) -> None:

# Create DataLoaders for the training and validation dataset
train_dataloader, eval_dataloader, longest_seq_length = setup_dataloaders(train_config, dataset_config, tokenizer)
print(
logger.log_rank_zero(
f"The longest sequence length in the train data is {longest_seq_length}, "
f"passed context length is {train_config.context_length} and overall model's context length is "
f"{model.config.max_position_embeddings}"
Expand Down
8 changes: 5 additions & 3 deletions QEfficient/finetune/dataset/custom_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import importlib
from pathlib import Path

from QEfficient.utils.logging_utils import logger


def load_module_from_py_file(py_file: str) -> object:
"""
Expand Down Expand Up @@ -40,7 +42,7 @@ def get_custom_dataset(dataset_config, tokenizer, split: str):
try:
return getattr(module, func_name)(dataset_config, tokenizer, split)
except AttributeError as e:
print(
logger.error(
f"It seems like the given method name ({func_name}) is not present in the dataset .py file ({module_path.as_posix()})."
)
raise e
Expand All @@ -63,6 +65,6 @@ def get_data_collator(dataset_processer, dataset_config):
try:
return getattr(module, func_name)(dataset_processer)
except AttributeError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for all the cases like L62.

Whenever we are raising some exceptions, it is good to log the raised exception, otherwise dumped file logs would represent different picture than the console logs.

We can override the logger to have a custom log function as below.

 import logging

 class CustomLogger(logging.Logger):
     def raise_runtimeerror(self, message):
         self.error(message)
         raise RuntimeError(message)

 logging.setLoggerClass(CustomLogger)
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='a',
                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

What do you say?

print(f"Can not find the custom data_collator in the dataset.py file ({module_path.as_posix()}).")
print("Using the default data_collator instead.")
logger.info(f"Can not find the custom data_collator in the dataset.py file ({module_path.as_posix()}).")
logger.info("Using the default data_collator instead.")
return None
12 changes: 7 additions & 5 deletions QEfficient/finetune/dataset/grammar_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from datasets import load_dataset
from torch.utils.data import Dataset

from QEfficient.utils.logging_utils import logger


class grammar(Dataset):
def __init__(self, tokenizer, csv_name=None, context_length=None):
Expand All @@ -20,8 +22,8 @@ def __init__(self, tokenizer, csv_name=None, context_length=None):
delimiter=",",
)
except Exception as e:
print(
"Loading of grammar dataset failed! Please see [here](https://github.com/meta-llama/llama-recipes/blob/main/src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) for details on how to download the dataset."
logger.error(
"Loading of grammar dataset failed! Please check (https://github.com/meta-llama/llama-recipes/blob/main/src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) for details on how to download the dataset."
)
raise e

Expand All @@ -36,7 +38,7 @@ def convert_to_features(self, example_batch):
# Create prompt and tokenize contexts and questions

if self.print_text:
print("Input Text: ", self.clean_text(example_batch["text"]))
logger.info("Input Text: ", self.clean_text(example_batch["text"]))

input_ = example_batch["input"]
target_ = example_batch["target"]
Expand Down Expand Up @@ -71,9 +73,9 @@ def get_dataset(dataset_config, tokenizer, csv_name=None, context_length=None):
"""cover function for handling loading the working dataset"""
"""dataset loading"""
currPath = Path.cwd() / "datasets_grammar" / "grammar_train.csv"
print(f"Loading dataset {currPath}")
logger.info(f"Loading dataset {currPath}")
csv_name = str(currPath)
print(csv_name)
logger.info(csv_name)
dataset = grammar(tokenizer=tokenizer, csv_name=csv_name, context_length=context_length)

return dataset
9 changes: 5 additions & 4 deletions QEfficient/finetune/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
from utils.train_utils import evaluation, print_model_size

from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.utils.logging_utils import logger

try:
import torch_qaic # noqa: F401

device = "qaic:0"
except ImportError as e:
print(f"Warning: {e}. Moving ahead without these qaic modules.")
logger.warning(f"{e}. Moving ahead without these qaic modules.")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Suppress all warnings
Expand Down Expand Up @@ -77,7 +78,7 @@ def main(**kwargs):
# If there is a mismatch between tokenizer vocab size and embedding matrix,
# throw a warning and then expand the embedding matrix
if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
print("WARNING: Resizing the embedding matrix to match the tokenizer vocab size.")
logger.warning("Resizing the embedding matrix to match the tokenizer vocab size.")
model.resize_token_embeddings(len(tokenizer))

print_model_size(model, train_config)
Expand Down Expand Up @@ -108,13 +109,13 @@ def main(**kwargs):
pin_memory=True,
**val_dl_kwargs,
)
print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
logger.info(f"Num of Validation Set Batches loaded = {len(eval_dataloader)}")
if len(eval_dataloader) == 0:
raise ValueError(
f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})"
)
else:
print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
logger.info(f"Num of Validation Set Batches loaded = {len(eval_dataloader)}")

model.to(device)
_ = evaluation(model, train_config, eval_dataloader, None, tokenizer, device)
Expand Down
4 changes: 2 additions & 2 deletions QEfficient/finetune/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.finetune.data.sampler import DistributedLengthBasedBatchSampler
from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC
from QEfficient.utils.logging_utils import logger


def update_config(config, **kwargs):
Expand Down Expand Up @@ -53,8 +54,7 @@ def update_config(config, **kwargs):
raise ValueError(f"Config '{config_name}' does not have parameter: '{param_name}'")
else:
config_type = type(config).__name__
# FIXME (Meet): Once logger is available put this in debug level.
print(f"[WARNING]: Unknown parameter '{k}' for config type '{config_type}'")
logger.debug(f"Unknown parameter '{k}' for config type '{config_type}'")


def generate_peft_config(train_config: TrainConfig, peft_config_file: str = None, **kwargs) -> Any:
Expand Down
6 changes: 4 additions & 2 deletions QEfficient/finetune/utils/plot_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import matplotlib.pyplot as plt

from QEfficient.utils.logging_utils import logger


def plot_metric(data, metric_name, x_label, y_label, title, colors):
plt.figure(figsize=(7, 6))
Expand Down Expand Up @@ -67,14 +69,14 @@ def plot_metrics_by_step(data, metric_name, x_label, y_label, colors):

def plot_metrics(file_path):
if not os.path.exists(file_path):
print(f"File {file_path} does not exist.")
logger.error(f"File {file_path} does not exist.")
return

with open(file_path, "r") as f:
try:
data = json.load(f)
except json.JSONDecodeError:
print("Invalid JSON file.")
logger.error("Invalid JSON file.")
return

directory = os.path.dirname(file_path)
Expand Down
Loading
Loading