28 C
Canberra
Saturday, January 24, 2026

A Mild Introduction to Language Mannequin Superb-tuning


import dataclasses

 

import tokenizers

import torch

import torch.nn as nn

import torch.nn.practical as F

from torch import Tensor

 

 

# Mannequin structure similar as coaching script

@dataclasses.dataclass

class LlamaConfig:

    “”“Outline Llama mannequin hyperparameters.”“”

    vocab_size: int = 50000

    max_position_embeddings: int = 2048

    hidden_size: int = 768

    intermediate_size: int = 4*768

    num_hidden_layers: int = 12

    num_attention_heads: int = 12

    num_key_value_heads: int = 3

 

class RotaryPositionEncoding(nn.Module):

    “”“Rotary place encoding.”“”

 

    def __init__(self, dim: int, max_position_embeddings: int) -> None:

        tremendous().__init__()

        self.dim = dim

        self.max_position_embeddings = max_position_embeddings

        N = 10_000.0

        inv_freq = 1.0 / (N ** (torch.arange(0, dim, 2) / dim))

        inv_freq = torch.cat((inv_freq, inv_freq), dim=1)

        place = torch.arange(max_position_embeddings)

        sinusoid_inp = torch.outer(place, inv_freq)

        self.register_buffer(“cos”, sinusoid_inp.cos())

        self.register_buffer(“sin”, sinusoid_inp.sin())

 

    def ahead(self, x: Tensor) -> Tensor:

        batch_size, seq_len, num_heads, head_dim = x.form

        machine = x.machine

        dtype = x.dtype

        cos = self.cos.to(machine, dtype)[:seq_len].view(1, seq_len, 1, 1)

        sin = self.sin.to(machine, dtype)[:seq_len].view(1, seq_len, 1, 1)

        x1, x2 = x.chunk(2, dim=1)

        rotated = torch.cat((x2, x1), dim=1)

        return (x * cos) + (rotated * sin)

 

class LlamaAttention(nn.Module):

    “”“Grouped-query consideration with rotary embeddings.”“”

 

    def __init__(self, config: LlamaConfig) -> None:

        tremendous().__init__()

        self.hidden_size = config.hidden_size

        self.num_heads = config.num_attention_heads

        self.head_dim = self.hidden_size // self.num_heads

        self.num_kv_heads = config.num_key_value_heads

        assert (self.head_dim * self.num_heads) == self.hidden_size

 

        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)

        self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)

        self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)

        self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)

 

    def ahead(self, hidden_states: Tensor, rope: RotaryPositionEncoding) -> Tensor:

        bs, seq_len, dim = hidden_states.measurement()

 

        query_states = self.q_proj(hidden_states).view(bs, seq_len, self.num_heads, self.head_dim)

        key_states = self.k_proj(hidden_states).view(bs, seq_len, self.num_kv_heads, self.head_dim)

        value_states = self.v_proj(hidden_states).view(bs, seq_len, self.num_kv_heads, self.head_dim)

 

        attn_output = F.scaled_dot_product_attention(

            rope(query_states).transpose(1, 2),

            rope(key_states).transpose(1, 2),

            value_states.transpose(1, 2),

            is_causal=True,

            dropout_p=0.0,

            enable_gqa=True,

        )

 

        attn_output = attn_output.transpose(1, 2).reshape(bs, seq_len, self.hidden_size)

        return self.o_proj(attn_output)

 

class LlamaMLP(nn.Module):

    “”“Feed-forward community with SwiGLU activation.”“”

 

    def __init__(self, config: LlamaConfig) -> None:

        tremendous().__init__()

        self.gate_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)

        self.up_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)

        self.act_fn = F.silu

        self.down_proj = nn.Linear(config.intermediate_size, config.hidden_size, bias=False)

 

    def ahead(self, x: Tensor) -> Tensor:

        gate = self.act_fn(self.gate_proj(x))

        up = self.up_proj(x)

        return self.down_proj(gate * up)

 

class LlamaDecoderLayer(nn.Module):

    “”“Single transformer layer for a Llama mannequin.”“”

 

    def __init__(self, config: LlamaConfig) -> None:

        tremendous().__init__()

        self.input_layernorm = nn.RMSNorm(config.hidden_size, eps=1e5)

        self.self_attn = LlamaAttention(config)

        self.post_attention_layernorm = nn.RMSNorm(config.hidden_size, eps=1e5)

        self.mlp = LlamaMLP(config)

 

    def ahead(self, hidden_states: Tensor, rope: RotaryPositionEncoding) -> Tensor:

        residual = hidden_states

        hidden_states = self.input_layernorm(hidden_states)

        attn_outputs = self.self_attn(hidden_states, rope=rope)

        hidden_states = attn_outputs + residual

 

        residual = hidden_states

        hidden_states = self.post_attention_layernorm(hidden_states)

        return self.mlp(hidden_states) + residual

 

class LlamaModel(nn.Module):

    “”“The complete Llama mannequin with none pretraining heads.”“”

 

    def __init__(self, config: LlamaConfig) -> None:

        tremendous().__init__()

        self.rotary_emb = RotaryPositionEncoding(

            config.hidden_size // config.num_attention_heads,

            config.max_position_embeddings,

        )

 

        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)

        self.layers = nn.ModuleList([

            LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)

        ])

        self.norm = nn.RMSNorm(config.hidden_size, eps=1e5)

 

    def ahead(self, input_ids: Tensor) -> Tensor:

        hidden_states = self.embed_tokens(input_ids)

        for layer in self.layers:

            hidden_states = layer(hidden_states, rope=self.rotary_emb)

        return self.norm(hidden_states)

 

class LlamaForPretraining(nn.Module):

    def __init__(self, config: LlamaConfig) -> None:

        tremendous().__init__()

        self.base_model = LlamaModel(config)

        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

 

    def ahead(self, input_ids: Tensor) -> Tensor:

        hidden_states = self.base_model(input_ids)

        return self.lm_head(hidden_states)

 

 

def apply_repetition_penalty(logits: Tensor, tokens: record[int], penalty: float) -> Tensor:

    “”“Apply repetition penalty to the logits.”“”

    for tok in tokens:

        if logits[tok] > 0:

            logits[tok] /= penalty

        else:

            logits[tok] *= penalty

    return logits

 

 

@torch.no_grad()

def generate(mannequin, tokenizer, immediate, max_tokens=100, temperature=1.0, repetition_penalty=1.0,

             repetition_penalty_range=10, top_k=50, machine=None) -> str:

    “”“Generate textual content autoregressively from a immediate.

 

    Args:

        mannequin: The educated LlamaForPretraining mannequin

        tokenizer: The tokenizer

        immediate: Enter textual content immediate

        max_tokens: Most variety of tokens to generate

        temperature: Sampling temperature (larger = extra random)

        repetition_penalty: Penalty for repeating tokens

        repetition_penalty_range: Variety of earlier tokens to contemplate for repetition penalty

        top_k: Solely pattern from high okay probably tokens

        machine: Gadget the mannequin is loaded on

 

    Returns:

        Generated textual content

    ““”

    # Flip mannequin to analysis mode: Norm layer will work otherwise

    mannequin.eval()

 

    # Get particular token IDs

    bot_id = tokenizer.token_to_id(“[BOT]”)

    eot_id = tokenizer.token_to_id(“[EOT]”)

 

    # Tokenize the immediate into integer tensor

    prompt_tokens = [bot_id] + tokenizer.encode(” “ + immediate).ids

    input_ids = torch.tensor([prompt_tokens], dtype=torch.int64, machine=machine)

 

    # Recursively generate tokens

    generated_tokens = []

    for _step in vary(max_tokens):

        # Ahead go by mannequin

        logits = mannequin(input_ids)

 

        # Get logits for the final token

        next_token_logits = logits[0, 1, :] / temperature

 

        # Apply repetition penalty

        if repetition_penalty != 1.0 and len(generated_tokens) > 0:

            next_token_logits = apply_repetition_penalty(

                next_token_logits,

                generated_tokens[repetition_penalty_range:],

                repetition_penalty,

            )

 

        # Apply top-k filtering

        if top_k > 0:

            top_k_logits = torch.topk(next_token_logits, top_k)[0]

            indices_to_remove = next_token_logits < top_k_logits[1]

            next_token_logits[indices_to_remove] = float(“-inf”)

 

        # Pattern from the filtered distribution

        probs = F.softmax(next_token_logits, dim=1)

        next_token = torch.multinomial(probs, num_samples=1)

 

        # Early cease if EOT token is generated

        if next_token.merchandise() == eot_id:

            break

 

        # Append the brand new token to input_ids for subsequent iteration

        input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)

        generated_tokens.append(next_token.merchandise())

 

    # Decode all generated tokens

    return tokenizer.decode(generated_tokens)

 

 

checkpoint = “llama_model_final.pth”   # saved mannequin checkpoint

tokenizer = “bpe_50K.json”   # saved tokenizer

max_tokens = 100

temperature = 0.9

top_k = 50

penalty = 1.1

penalty_range = 10

 

# Load tokenizer and mannequin

machine = torch.machine(“cuda” if torch.cuda.is_available() else “cpu”)

tokenizer = tokenizers.Tokenizer.from_file(tokenizer)

config = LlamaConfig()

mannequin = LlamaForPretraining(config).to(machine)

mannequin.load_state_dict(torch.load(checkpoint, map_location=machine))

 

immediate = “As soon as upon a time, there was”

response = generate(

    mannequin=mannequin,

    tokenizer=tokenizer,

    immediate=immediate,

    max_tokens=max_tokens,

    temperature=temperature,

    top_k=top_k,

    repetition_penalty=penalty,

    repetition_penalty_range=penalty_range,

    machine=machine,

)

print(immediate)

print(“-“ * 20)

print(response)

Related Articles

LEAVE A REPLY

Please enter your comment!
Please enter your name here

[td_block_social_counter facebook="tagdiv" twitter="tagdivofficial" youtube="tagdiv" style="style8 td-social-boxed td-social-font-icons" tdc_css="eyJhbGwiOnsibWFyZ2luLWJvdHRvbSI6IjM4IiwiZGlzcGxheSI6IiJ9LCJwb3J0cmFpdCI6eyJtYXJnaW4tYm90dG9tIjoiMzAiLCJkaXNwbGF5IjoiIn0sInBvcnRyYWl0X21heF93aWR0aCI6MTAxOCwicG9ydHJhaXRfbWluX3dpZHRoIjo3Njh9" custom_title="Stay Connected" block_template_id="td_block_template_8" f_header_font_family="712" f_header_font_transform="uppercase" f_header_font_weight="500" f_header_font_size="17" border_color="#dd3333"]
- Advertisement -spot_img

Latest Articles