The Roadmap for Mastering LLMOps in 2026

By sales@avisionmarketing.com

June 1, 2026

0

16

# llm_with_tracing.py

# Function: A production-ready LLM name wrapper with full observability.

# Each name is traced in Langfuse: enter, output, tokens, value, latency.

#

# Conditions:

# pip set up langfuse anthropic python-dotenv

#

# Setup:

# 1. Create a free account at https://cloud.langfuse.com

# 2. Get your keys from Settings > API Keys

# 3. Create a .env file with the variables under

#

# Run:

# python llm_with_tracing.py

import os

import time

from dotenv import load_dotenv

import anthropic

from langfuse import Langfuse

# Load surroundings variables from .env file

load_dotenv()

# Required surroundings variables in your .env:

# LANGFUSE_PUBLIC_KEY=pk-lf-…

# LANGFUSE_SECRET_KEY=sk-lf-…

# LANGFUSE_HOST=https://cloud.langfuse.com (or your self-hosted URL)

# ANTHROPIC_API_KEY=sk-ant-…

# Initialize shoppers

langfuse_client = Langfuse() # Reads keys routinely from surroundings

anthropic_client = anthropic.Anthropic() # Reads ANTHROPIC_API_KEY from surroundings

# ── Configuration ─────────────────────────────────────────────────────────────

# Retailer your immediate right here, not inline within the API name.

# This makes it versionable and testable independently.

SYSTEM_PROMPT = “”“You’re a useful buyer assist assistant.

Reply questions clearly and concisely.

Should you have no idea one thing, say so instantly — don’t guess.”“”

MODEL = “claude-sonnet-4-20250514”

# Anthropic’s pricing as of mid-2026 (replace when pricing adjustments)

# Used to calculate value per name for value monitoring

COST_PER_INPUT_TOKEN = 3.00 / 1_000_000 # $3.00 per million enter tokens

COST_PER_OUTPUT_TOKEN = 15.00 / 1_000_000 # $15.00 per million output tokens

def call_llm_with_tracing(

user_message: str,

session_id: str = “default-session”,

user_id: str = “nameless”

) -> str:

“”“

Make a traced LLM name. Each name creates a Langfuse hint with:

– Full enter and output

– Token utilization (enter, output, complete)

– Calculated value in USD

– Latency in milliseconds

– Mannequin used and session context

Parameters:

user_message : The message from the person

session_id : Teams associated calls into one dialog in Langfuse

user_id : Associates the decision with a selected person for analytics

Returns:

The LLM response as a string

““”

# Create a top-level hint for this person interplay

# The hint seems within the Langfuse dashboard as one unit of labor

hint = langfuse_client.hint(

title=“customer-support-call”,

session_id=session_id,

user_id=user_id,

enter={“user_message”: user_message, “system_prompt”: SYSTEM_PROMPT}

)

# Create a era span contained in the hint

# This captures model-specific particulars: mannequin title, tokens, value

era = hint.era(

title=“claude-completion”,

mannequin=MODEL,

enter={

“system”: SYSTEM_PROMPT,

“messages”: [{“role”: “user”, “content”: user_message}]

}

)

start_time = time.time()

strive:

# Make the API name

response = anthropic_client.messages.create(

mannequin=MODEL,

max_tokens=1024,

system=SYSTEM_PROMPT,

messages=[{“role”: “user”, “content”: user_message}]

)

latency_ms = int((time.time() – start_time) * 1000)

# Extract the response textual content

response_text = response.content material[0].textual content

# Extract token utilization from the response

input_tokens = response.utilization.input_tokens

output_tokens = response.utilization.output_tokens

total_tokens = input_tokens + output_tokens

# Calculate value for this name

cost_usd = (

input_tokens * COST_PER_INPUT_TOKEN +

output_tokens * COST_PER_OUTPUT_TOKEN

)

# Replace the era span with outcomes

# This information populates the Langfuse value and token dashboards

era.finish(

output=response_text,

utilization={

“enter”: input_tokens,

“output”: output_tokens,

“complete”: total_tokens,

“unit”: “TOKENS”

},

metadata={

“latency_ms”: latency_ms,

“cost_usd”: spherical(cost_usd, 6),

“mannequin”: MODEL

}

)

# Replace the hint with the ultimate output

hint.replace(

output={“response”: response_text},

metadata={“total_cost_usd”: spherical(cost_usd, 6)}

)

# Print a abstract to stdout for native visibility

print(f“n{‘─’ * 60}”)

print(f“Person: {user_message}”)

print(f“Claude: {response_text}”)

print(f“Tokens: {input_tokens} in / {output_tokens} out / {total_tokens} complete”)

print(f“Value: ${cost_usd:.6f}”)

print(f“Latency: {latency_ms}ms”)

print(f“Hint: {langfuse_client.base_url}/hint/{hint.id}”)

print(f“{‘─’ * 60}n”)

return response_text

besides Exception as e:

# Document the error within the hint so it exhibits up in Langfuse

era.finish(

output=None,

metadata={“error”: str(e), “latency_ms”: int((time.time() – start_time) * 1000)}

)

hint.replace(output={“error”: str(e)})

# All the time flush earlier than elevating — ensures the error hint is shipped

langfuse_client.flush()

increase

lastly:

# Flush sends all buffered occasions to Langfuse

# In a long-running service, Langfuse flushes routinely.

# In a script, you have to flush manually earlier than the method exits.

langfuse_client.flush()

# ── Run an indication ────────────────────────────────────────────────────────

if __name__ == “__main__”:

# Simulate two turns of a buyer assist dialog

test_messages = [

“What is your return policy for electronics?”,

“Can I return an item I bought 45 days ago?”

]

session = “demo-session-001”

for i, message in enumerate(test_messages):

print(f“nCall {i + 1}/{len(test_messages)}”)

strive:

call_llm_with_tracing(

user_message=message,

session_id=session,

user_id=“test-user-42”

)

besides Exception as e:

print(f“Error on name {i + 1}: {e}”)

The Roadmap for Mastering LLMOps in 2026

Related Articles

Report shares the state of bodily AI and robotics

Investing within the Way forward for Mexico’s Telco Panorama

After surprising quarter, IBM insists that AI is not killing the mainframe

LEAVE A REPLY Cancel reply

Latest Articles

Report shares the state of bodily AI and robotics

Investing within the Way forward for Mexico’s Telco Panorama

After surprising quarter, IBM insists that AI is not killing the mainframe

GKN Aerospace and Pratt & Whitney increase additive manufacturing work to F135 engine | VoxelMatters

MIT’s new lidar chip might give self-driving vehicles a wider view

ABOUT US