Skip to content

Implementation of generate and integration of lm_eval (evaluation harness) #222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 23 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
400 changes: 400 additions & 0 deletions check_logits_hidden_layers.ipynb

Large diffs are not rendered by default.

Binary file added classes_fast_llm.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
77 changes: 77 additions & 0 deletions examples/qwen_evaluate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
training:
train_iters: 100_000
logs:
interval: 10
evaluations:
gsm8k:
type: lm_eval
cli_args:
- --tasks
- gsm8k
- --output_path
- /mnt/checkpoints/test/denis/qwen_eval_experiment/lm_eval
# stack_3b:
# iterations: 10
# interval: 10
# fineweb:
# iterations: 10
# interval: 10
checkpoint:
interval: 1000
keep: 5
test_iters: 0
export: # (1)!
format: llama
interval: 20_000
batch:
micro_batch_size: 16
sequence_length: 4096
batch_size: 16
data:
tokenizer:
path: /mnt/checkpoints/pretrained_models/Qwen2-1.5B-Instruct
bos_token: "<|endoftext|>"
datasets:
# Bad dataset they are tokenized with different tokenizer, then llama
training:
type: file
path: /mnt/datasets/test/denis/fineweb_the_stack_3b.yaml
stack_3b:
type: memmap
path: /mnt/datasets/data_collections/the_stack_3b/tokens/stack_3b/default/train/99
fineweb:
type: memmap
path: /mnt/datasets/data_collections/standalone_datasets/tokens/HuggingFaceFW/fineweb/default/train/9_1000
optimizer:
weight_decay: 0.1
beta_1: 0.9
beta_2: 0.95
learning_rate:
base: 1.0e-04 # (3)!
minimum: 1.0e-05
decay_style: cosine
decay_iterations: 100_000
warmup_iterations: 2000
pretrained: # (4)!
format: qwen2
path: /mnt/checkpoints/pretrained_models/Qwen2-1.5B-Instruct
model_weights: yes # (5)!
model:
base_model:
transformer:
use_flash_attention: yes
cross_entropy_impl: fused
multi_stage:
zero_stage: 2
distributed:
training_dtype: bf16

run:
experiment_dir: "/mnt/checkpoints/test/denis/qwen_eval_experiment"

# training:
# logs:
# interval: 10
# wandb:
# project_name: ${job.project_name}
# group_name: ${job.project_version}
77 changes: 77 additions & 0 deletions examples/smol_evaluate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
training:
train_iters: 100_000
logs:
interval: 10
evaluations:
gsm8k:
type: lm_eval
cli_args:
- --tasks
- gsm8k
- --output_path
- /mnt/checkpoints/test/denis/smol_eval_experiment/lm_eval
# stack_3b:
# type: loss
# iterations: 10
# interval: 10
# fineweb:
# iterations: 10
# interval: 10
checkpoint:
interval: 1000
keep: 5
test_iters: 0
export: # (1)!
format: llama
interval: 20_000
batch:
micro_batch_size: 16
sequence_length: 4096
batch_size: 16
data:
tokenizer:
path: /mnt/checkpoints/pretrained_models/SmolLM2-135M-Instruct
datasets:
# Bad dataset they are tokenized with different tokenizer, then llama
training:
type: file
path: /mnt/datasets/test/denis/fineweb_the_stack_3b.yaml
stack_3b:
type: memmap
path: /mnt/datasets/data_collections/the_stack_3b/tokens/stack_3b/default/train/99
fineweb:
type: memmap
path: /mnt/datasets/data_collections/standalone_datasets/tokens/HuggingFaceFW/fineweb/default/train/9_1000
optimizer:
weight_decay: 0.1
beta_1: 0.9
beta_2: 0.95
learning_rate:
base: 1.0e-04 # (3)!
minimum: 1.0e-05
decay_style: cosine
decay_iterations: 100_000
warmup_iterations: 2000
pretrained: # (4)!
format: llama
path: /mnt/checkpoints/pretrained_models/SmolLM2-135M-Instruct/
model_weights: yes # (5)!
model:
base_model:
transformer:
use_flash_attention: yes
cross_entropy_impl: fused
multi_stage:
zero_stage: 2
distributed:
training_dtype: bf16

run:
experiment_dir: "/mnt/checkpoints/test/denis/smol_eval_experiment"

# training:
# logs:
# interval: 10
# wandb:
# project_name: ${job.project_name}
# group_name: ${job.project_version}
5 changes: 5 additions & 0 deletions fast_llm/data/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,8 @@ class TokenizerConfig(Config):
desc="Path to the tokenizer file.",
hint=FieldHint.core,
)
bos_token: str | None = Field(
default=None,
desc="BOS token to use if the tokenizer doesn't define one; must be an existing token.",
hint=FieldHint.core,
)
15 changes: 12 additions & 3 deletions fast_llm/data/tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import torch
from transformers import PreTrainedTokenizerFast
from transformers import PreTrainedTokenizerFast, AutoTokenizer

from fast_llm.data.config import TokenizerConfig
from fast_llm.engine.config_utils.run import log_main_rank
Expand All @@ -13,9 +13,18 @@ class Tokenizer:

def __init__(self, config: TokenizerConfig):
log_main_rank(f"> loading tokenizer from {config.path} ...")
self.tokenizer: PreTrainedTokenizerFast = PreTrainedTokenizerFast.from_pretrained(
pretrained_model_name_or_path=config.path, errors="replace", max_len=None
# self.tokenizer: PreTrainedTokenizerFast = PreTrainedTokenizerFast.from_pretrained(
# pretrained_model_name_or_path=config.path, errors="replace", max_len=None
# )
self.tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path=config.path,
errors="replace",
max_len=None,
trust_remote_code=True,
use_fast=True, # This is the flag you're asking about
)
if config.bos_token is not None:
self.tokenizer.bos_token = config.bos_token
if self.tokenizer.eos_token_id is None:
raise ValueError("Tokenizer does not have an EOS token.")
if self.tokenizer.bos_token_id is None:
Expand Down
3 changes: 2 additions & 1 deletion fast_llm/engine/inference/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def __eq__(self, other) -> bool:

def to_dict(self) -> dict[str, typing.Any]:
out = super().to_dict()
out["fast_llm_config"] = self.fast_llm_config.to_dict(verbose=FieldVerboseLevel.everything)
if self.fast_llm_config is not None:
out["fast_llm_config"] = self.fast_llm_config.to_dict(verbose=FieldVerboseLevel.everything)
return out

def to_diff_dict(self) -> dict[str, typing.Any]:
Expand Down
96 changes: 80 additions & 16 deletions fast_llm/engine/inference/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@
import pathlib
import typing

import torch
import transformers.modeling_outputs
import transformers.generation.utils

from fast_llm.engine.checkpoint.config import CheckpointLoadConfig, FastLLMCheckpointFormat
from fast_llm.engine.inference.config import HuggingfaceModelConfig
from fast_llm.engine.inference.runner import InferenceRunner
from fast_llm.engine.multi_stage.config import StageMode
from fast_llm.engine.multi_stage.fast_llm_model import FastLLMModel
from fast_llm.engine.schedule.config import BatchConfig, ScheduleConfig
from fast_llm.engine.schedule.runner import ScheduleRunner
from fast_llm.engine.schedule.schedule import Schedule
from fast_llm.engine.training.config import TrainerConfig


class HuggingfacePreTrainedModel(transformers.PreTrainedModel):
class HuggingfaceBaseModelForCausalLM(transformers.PreTrainedModel, transformers.generation.utils.GenerationMixin):
config_class: typing.ClassVar[type[HuggingfaceModelConfig]] = HuggingfaceModelConfig
runner_class: typing.ClassVar[type[InferenceRunner]] = InferenceRunner
config: HuggingfaceModelConfig
Expand All @@ -20,31 +26,84 @@ class HuggingfacePreTrainedModel(transformers.PreTrainedModel):
# _supports_cache_class = False
# _tied_weights_keys = []

def __init__(self, config: HuggingfaceModelConfig, fast_llm_model: FastLLMModel, **kwargs):
def __init__(
self,
config: HuggingfaceModelConfig,
fast_llm_model: FastLLMModel,
trainer_config: TrainerConfig | None = None,
runner: ScheduleRunner | None = None,
**kwargs,
):
"""
Initializes the HuggingfaceBaseModelForCausalLM either in standalone mode (single GPU inference)
or integrated training mode (with runner from training loop).

- If `trainer_config` and `runner` are both provided β†’ assumes training mode.
- If both are omitted β†’ assumes standalone mode with default configs.
- Any other combination will raise.
"""
assert self.runner_class.model_class.config_class is config.model_config_class
assert config.fast_llm_config is fast_llm_model.config
assert isinstance(config, self.config_class)

# The HF constructor performs a deep copy of the config,
# but config.fast_llm_config may contain non-picklable items like process groups.
# Temporarily remove it before the call and restore it afterward.
fast_llm_config = config.fast_llm_config
config.fast_llm_config = None
super().__init__(config, **kwargs)
config.fast_llm_config = fast_llm_config

self._inference_runner = self.runner_class(fast_llm_model, trainer_config, runner)

self._inference_runner = self.runner_class(fast_llm_model)
if not fast_llm_model.is_setup:
fast_llm_model.setup(mode=StageMode.inference)
# A model can be created from pretrained which setup it in the current HF wrapper api
# or set from training loop and also is setup, so, do not accept not setup model
assert fast_llm_model.is_setup
# if not fast_llm_model.is_setup:
# fast_llm_model.setup(distributed=distributed, mode=StageMode.inference)
self._inference_runner.setup()

# Transformers needs to be able to inspect the base model.
self.fast_llm_base_model = fast_llm_model.base_model
# TODO: Support distributed models?
assert fast_llm_model.config.distributed.world_size == 1
# # TODO: Support distributed models?
# assert fast_llm_model.config.distributed.world_size == 1

with transformers.modeling_utils.no_init_weights():
self.post_init()

def forward(
self,
input_ids: torch.Tensor | None = None,
attention_mask: torch.Tensor | None = None,
position_ids: torch.Tensor | None = None,
past_key_values=None,
inputs_embeds: torch.FloatTensor | None = None,
labels: torch.LongTensor | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
) -> tuple | transformers.modeling_outputs.CausalLMOutputWithPast:
# Meant to be overridden in derived classes
raise NotImplementedError()

@classmethod
def from_fast_llm_model_in_training(
cls, fast_llm_model: FastLLMModel, trainer_config: TrainerConfig, runner: ScheduleRunner, **kwargs
):
config = cls.config_class(fast_llm_model.config)
return cls(config, fast_llm_model, trainer_config=trainer_config, runner=runner, **kwargs)

@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: str | os.PathLike | CheckpointLoadConfig,
*,
mode: StageMode = StageMode.inference,
*updates: dict[str | tuple[str, ...], typing.Any],
optimizer_state_names: tuple[str, ...] | None = None,
# setup: bool = True,
mode: StageMode = StageMode.training,
use_cpu: bool = False,
stage_filter: set | None = None,
**kwargs,
) -> typing.Self:
# Pretrained config.
Expand All @@ -54,18 +113,23 @@ def from_pretrained(
format=FastLLMCheckpointFormat,
)

updates = {}
torch_dtype = kwargs.pop("torch_dtype", None)
if torch_dtype is not None:
updates[("distributed", "training_dtype")] = torch_dtype

# Create the model
# always set up model and crate distributed instance internally for now
fast_llm_model = cls.runner_class.model_class.from_pretrained(
pretrained_model_name_or_path, updates, mode=mode
pretrained_model_name_or_path,
*updates,
optimizer_state_names=optimizer_state_names,
# setup=setup,
mode=mode,
use_cpu=use_cpu,
stage_filter=stage_filter,
)
config = cls.config_class(fast_llm_model.config)

config = cls.config_class(fast_llm_model.config)
return cls(config, fast_llm_model, **kwargs)

def _init_weights(self, module) -> None:
raise NotImplementedError(module)

def can_generate(self):
return True
Loading
Loading