9.6 C
Canberra
Tuesday, June 2, 2026

The Roadmap for Mastering LLMOps in 2026


# llm_with_tracing.py

# Function: A production-ready LLM name wrapper with full observability.

# Each name is traced in Langfuse: enter, output, tokens, value, latency.

#

# Conditions:

#   pip set up langfuse anthropic python-dotenv

#

# Setup:

#   1. Create a free account at https://cloud.langfuse.com

#   2. Get your keys from Settings > API Keys

#   3. Create a .env file with the variables under

#

# Run:

#   python llm_with_tracing.py

 

import os

import time

from dotenv import load_dotenv

import anthropic

from langfuse import Langfuse

 

# Load surroundings variables from .env file

load_dotenv()

 

# Required surroundings variables in your .env:

# LANGFUSE_PUBLIC_KEY=pk-lf-…

# LANGFUSE_SECRET_KEY=sk-lf-…

# LANGFUSE_HOST=https://cloud.langfuse.com   (or your self-hosted URL)

# ANTHROPIC_API_KEY=sk-ant-…

 

# Initialize shoppers

langfuse_client = Langfuse()          # Reads keys routinely from surroundings

anthropic_client = anthropic.Anthropic()  # Reads ANTHROPIC_API_KEY from surroundings

 

# ── Configuration ─────────────────────────────────────────────────────────────

# Retailer your immediate right here, not inline within the API name.

# This makes it versionable and testable independently.

SYSTEM_PROMPT = “”“You’re a useful buyer assist assistant.

Reply questions clearly and concisely.

Should you have no idea one thing, say so instantly — don’t guess.”“”

 

MODEL = “claude-sonnet-4-20250514”

 

# Anthropic’s pricing as of mid-2026 (replace when pricing adjustments)

# Used to calculate value per name for value monitoring

COST_PER_INPUT_TOKEN  = 3.00 / 1_000_000   # $3.00 per million enter tokens

COST_PER_OUTPUT_TOKEN = 15.00 / 1_000_000  # $15.00 per million output tokens

 

 

def call_llm_with_tracing(

    user_message: str,

    session_id: str = “default-session”,

    user_id: str = “nameless”

) -> str:

    “”

    Make a traced LLM name. Each name creates a Langfuse hint with:

    – Full enter and output

    – Token utilization (enter, output, complete)

    – Calculated value in USD

    – Latency in milliseconds

    – Mannequin used and session context

 

    Parameters:

        user_message : The message from the person

        session_id   : Teams associated calls into one dialog in Langfuse

        user_id      : Associates the decision with a selected person for analytics

 

    Returns:

        The LLM response as a string

    ““”

 

    # Create a top-level hint for this person interplay

    # The hint seems within the Langfuse dashboard as one unit of labor

    hint = langfuse_client.hint(

        title=“customer-support-call”,

        session_id=session_id,

        user_id=user_id,

        enter={“user_message”: user_message, “system_prompt”: SYSTEM_PROMPT}

    )

 

    # Create a era span contained in the hint

    # This captures model-specific particulars: mannequin title, tokens, value

    era = hint.era(

        title=“claude-completion”,

        mannequin=MODEL,

        enter={

            “system”: SYSTEM_PROMPT,

            “messages”: [{“role”: “user”, “content”: user_message}]

        }

    )

 

    start_time = time.time()

 

    strive:

        # Make the API name

        response = anthropic_client.messages.create(

            mannequin=MODEL,

            max_tokens=1024,

            system=SYSTEM_PROMPT,

            messages=[{“role”: “user”, “content”: user_message}]

        )

 

        latency_ms = int((time.time() start_time) * 1000)

 

        # Extract the response textual content

        response_text = response.content material[0].textual content

 

        # Extract token utilization from the response

        input_tokens  = response.utilization.input_tokens

        output_tokens = response.utilization.output_tokens

        total_tokens  = input_tokens + output_tokens

 

        # Calculate value for this name

        cost_usd = (

            input_tokens  * COST_PER_INPUT_TOKEN +

            output_tokens * COST_PER_OUTPUT_TOKEN

        )

 

        # Replace the era span with outcomes

        # This information populates the Langfuse value and token dashboards

        era.finish(

            output=response_text,

            utilization={

                “enter”:  input_tokens,

                “output”: output_tokens,

                “complete”:  total_tokens,

                “unit”:   “TOKENS”

            },

            metadata={

                “latency_ms”: latency_ms,

                “cost_usd”:   spherical(cost_usd, 6),

                “mannequin”:      MODEL

            }

        )

 

        # Replace the hint with the ultimate output

        hint.replace(

            output={“response”: response_text},

            metadata={“total_cost_usd”: spherical(cost_usd, 6)}

        )

 

        # Print a abstract to stdout for native visibility

        print(f“n{‘─’ * 60}”)

        print(f“Person:    {user_message}”)

        print(f“Claude:  {response_text}”)

        print(f“Tokens:  {input_tokens} in / {output_tokens} out / {total_tokens} complete”)

        print(f“Value:    ${cost_usd:.6f}”)

        print(f“Latency: {latency_ms}ms”)

        print(f“Hint:   {langfuse_client.base_url}/hint/{hint.id}”)

        print(f“{‘─’ * 60}n”)

 

        return response_text

 

    besides Exception as e:

        # Document the error within the hint so it exhibits up in Langfuse

        era.finish(

            output=None,

            metadata={“error”: str(e), “latency_ms”: int((time.time() start_time) * 1000)}

        )

        hint.replace(output={“error”: str(e)})

 

        # All the time flush earlier than elevating — ensures the error hint is shipped

        langfuse_client.flush()

        increase

 

    lastly:

        # Flush sends all buffered occasions to Langfuse

        # In a long-running service, Langfuse flushes routinely.

        # In a script, you have to flush manually earlier than the method exits.

        langfuse_client.flush()

 

 

# ── Run an indication ────────────────────────────────────────────────────────

if __name__ == “__main__”:

    # Simulate two turns of a buyer assist dialog

    test_messages = [

        “What is your return policy for electronics?”,

        “Can I return an item I bought 45 days ago?”

    ]

 

    session = “demo-session-001”

 

    for i, message in enumerate(test_messages):

        print(f“nCall {i + 1}/{len(test_messages)}”)

        strive:

            call_llm_with_tracing(

                user_message=message,

                session_id=session,

                user_id=“test-user-42”

            )

        besides Exception as e:

            print(f“Error on name {i + 1}: {e}”)

Related Articles

LEAVE A REPLY

Please enter your comment!
Please enter your name here

[td_block_social_counter facebook="tagdiv" twitter="tagdivofficial" youtube="tagdiv" style="style8 td-social-boxed td-social-font-icons" tdc_css="eyJhbGwiOnsibWFyZ2luLWJvdHRvbSI6IjM4IiwiZGlzcGxheSI6IiJ9LCJwb3J0cmFpdCI6eyJtYXJnaW4tYm90dG9tIjoiMzAiLCJkaXNwbGF5IjoiIn0sInBvcnRyYWl0X21heF93aWR0aCI6MTAxOCwicG9ydHJhaXRfbWluX3dpZHRoIjo3Njh9" custom_title="Stay Connected" block_template_id="td_block_template_8" f_header_font_family="712" f_header_font_transform="uppercase" f_header_font_weight="500" f_header_font_size="17" border_color="#dd3333"]
- Advertisement -spot_img

Latest Articles