Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to Load and Test the SecAlign Trained Models? #5

Open
NEWbie0709 opened this issue Feb 24, 2025 · 2 comments
Open

How to Load and Test the SecAlign Trained Models? #5

NEWbie0709 opened this issue Feb 24, 2025 · 2 comments

Comments

@NEWbie0709
Copy link

Hi SecAlign Team,
Thank you for the great work! I wanted to test the SecAlign trained models, but I’m unsure how to properly load and run them.

Do you provide a Colab notebook or any scripts for easy testing? If not, could you share the best way to load and run inference on these models?

Looking forward to your guidance. Thanks!

@NEWbie0709
Copy link
Author

I tried to run the Mistral Instruct example using python setup.py --instruct, and I used the base model and merged it with the LoRA adapter, but I got this error.

Failed to run the model: Error(s) in loading state_dict for PeftModelForCausalLM:
size mismatch for base_model.model.model.embed_tokens.weight: copying a param with shape torch.Size([32006, 4096]) from checkpoint, the shape in current model is torch.Size([32000, 4096]).
size mismatch for base_model.model.lm_head.weight: copying a param with shape torch.Size([32006, 4096]) from checkpoint, the shape in current model is torch.Size([32000, 4096]).

this is my code

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch
import json
import os

def print_model_info(model, tokenizer):
    """
    Print diagnostic information about the model
    """
    print("\nModel Diagnostics:")
    print(f"Vocab size: {model.config.vocab_size}")
    print(f"Hidden size: {model.config.hidden_size}")
    print(f"Tokenizer vocab size: {tokenizer.vocab_size}")
    if hasattr(model.config, 'model_type'):
        print(f"Model type: {model.config.model_type}")

def load_model_with_local_lora(
    base_model_id="mistralai/Mistral-7B-Instruct-v0.1",
    local_lora_path="./adapter_model.safetensors",
    device="cuda" if torch.cuda.is_available() else "cpu"
):
    """
    Load Mistral base model and local LoRA adapter with debug info
    """
    print(f"\nLoading base model {base_model_id}...")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(
        base_model_id,
        trust_remote_code=True
    )
    
    # Print diagnostic information
    print_model_info(base_model, tokenizer)
    
    print(f"\nLoading LoRA adapter from {local_lora_path}...")
    try:
        # Try to load adapter config first
        config_path = os.path.join(local_lora_path, "adapter_config.json")
        print(config_path)
        with open(config_path, 'r') as f:
            adapter_config = json.load(f)
        print("\nAdapter Configuration:")
        print(f"Base model: {adapter_config['base_model_name_or_path']}")
        print(f"Target modules: {adapter_config['target_modules']}")
        print(f"LoRA rank: {adapter_config['r']}")
        print(f"LoRA alpha: {adapter_config['lora_alpha']}")
        
        # Load the adapter
        model = PeftModel.from_pretrained(
            base_model,
            local_lora_path,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        return model, tokenizer
        
    except Exception as e:
        print(f"\nError loading adapter: {str(e)}")
        print("\nFull error details:")
        raise

def generate_text(
    prompt,
    model,
    tokenizer,
    max_length=512,
    temperature=0.7,
    top_p=0.9
):
    """
    Generate text using Mistral with LoRA adapter
    """
    formatted_prompt = f"<s>[INST] {prompt} [/INST]"
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        do_sample=True
    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
if __name__ == "__main__":
    # Path to your local LoRA adapter files
    LOCAL_LORA_PATH = "mistral"  # Update this to your adapter path
    
    try:
        # Load model and local adapter
        model, tokenizer = load_model_with_local_lora(
            base_model_id="mistralai/Mistral-7B-Instruct-v0.1",
            local_lora_path=LOCAL_LORA_PATH
        )
        
        # Example prompt
        prompt = "Write a short story about a robot learning to paint:"
        
        # Generate text
        generated_text = generate_text(prompt, model, tokenizer)
        print("\nGenerated Text:")
        print(generated_text)
        
    except Exception as e:
        print(f"\nFailed to run the model: {str(e)}")

@NEWbie0709
Copy link
Author

Other than that, when I test with the Llama3-8B-Instruct (0.8GB) LoRA adapters and merge them with the base model, I get this error.

Error loading adapter: Error while deserializing header: MetadataIncompleteBuffer

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant