Agent with Code Execution
Data analysis agent that uses Python code execution.
Python Implementation
import requests
# Create agent
create_response = requests.post(
"http://localhost/api/v1/agent/configs",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"name": "data_analyst",
"display_name": "Data Analyst",
"description": "Executes Python code for data analysis and visualization",
"category": "analysis",
"system_prompt": "You are a data analyst. Use code execution to process data, generate insights, and create visualizations. Explain your methods clearly. Use the pandas, numpy, matplotlib, and seaborn libraries. Return results in a clear, tabular format.",
"model": "gpt-4",
"temperature": 0.2,
"builtin_tools": ["code_execution"],
"tool_choice": "required",
"max_iterations": 10,
"tool_resources": {
"code_execution": {
"timeout": 60,
"allowed_libraries": ["pandas", "numpy", "matplotlib", "seaborn", "scikit-learn"]
}
}
}
)
agent_id = create_response.json()["id"]
# Chat with agent
chat_response = requests.post(
"http://localhost/api/v1/agent/chat",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"agent_id": agent_id,
"message": "Analyze this dataset: Generate 100 random numbers between 0 and 100, calculate mean, median, standard deviation, and create a histogram."
}
)
result = chat_response.json()
print(f"Response: {result['response']}")
# Check tool usage
if "tool_calls" in result:
print("\nTool execution trace:")
for tool_call in result["tool_calls"]:
print(f" Tool: {tool_call['tool_name']}")
print(f" Parameters: {tool_call.get('parameters', {})}")
print(f" Status: {tool_call['status']}")
if "result" in tool_call:
print(f" Output: {tool_call['result'][:200]}...")
print(f" Stderr: {tool_call['result'].get('stderr', '')}")
Code Execution Examples
Basic Statistics
# Agent will execute this code
code = """
import pandas as pd
import numpy as np
# Generate sample data
np.random.seed(42)
data = pd.DataFrame({
'values': np.random.randn(100),
'category': np.random.choice(['A', 'B', 'C'], 100)
})
# Calculate statistics
mean = data['values'].mean()
median = data['values'].median()
std = data['values'].std()
# Format output
result = f"""
Mean: {mean:.2f}
Median: {median:.2f}
Std Dev: {std:.2f}
"""
Data Visualization
code = """
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Load data
data = pd.DataFrame({
'month': range(1, 13),
'sales': np.random.randint(5000, 20000, 12)
})
# Create histogram
plt.figure(figsize=(10, 6))
plt.hist(data['sales'], bins=30, edgecolor='skyblue')
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.title('Monthly Sales Distribution')
# Save plot
plt.savefig('/tmp/sales_histogram.png')
# Return as base64
import base64
with open('/tmp/sales_histogram.png', 'rb') as f:
plot_data = base64.b64encode(f.read())
"""
Data Analysis
code = """
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
# Sample data
data = pd.DataFrame({
'x': np.random.randn(100),
'y': np.random.randn(100) * 2 + 1
})
# Fit model
model = LinearRegression()
X = data[['x']]
y = data['y']
model.fit(X, y)
# Make prediction
predictions = model.predict(np.array([[1.5]]))
print(f"Predicted value: {predictions[0]:.2f}")
"""
Tool Configuration
Timeout Settings
{
"tool_resources": {
"code_execution": {
"timeout": 30 # 30 second timeout
}
}
}
Allowed Libraries
{
"tool_resources": {
"code_execution": {
"allowed_libraries": [
"pandas",
"numpy",
"matplotlib",
"seaborn",
"scikit-learn"
]
}
}
}
Best Practices
Code Quality
- Use Libraries: Leverage pandas, numpy for efficiency
- Error Handling: Include try-except blocks in code
- Clear Output: Print results with clear formatting
- Comment Code: Add comments explaining analysis steps
- Return Data: Format results as structured data (JSON, CSV)
Security Considerations
- No File System Access: Code runs in isolated environment
- No Network Access: No internet access by default
- Restricted Libraries: Only allow safe, approved libraries
- Memory Limits: Code has memory and time constraints
- Sanitize Inputs: Validate user inputs before executing
Performance
- Optimize Libraries: Use vectorized operations in pandas/numpy
- Limit Data Size: Process samples or aggregates for large datasets
- Complexity Awareness: Avoid O(n²) operations
- Timeout Management: Set appropriate timeouts for code execution
Error Handling
Timeout Error
result = {
"tool_name": "code_execution",
"status": "error",
"error": "Execution failed",
"details": {
"error_type": "timeout",
"message": "Code execution exceeded timeout of 60 seconds"
}
}
Handle by:
- Adjusting timeout in agent config
- Breaking long tasks into smaller steps
- Using more efficient code
Code Execution Failed
result = {
"tool_name": "code_execution",
"status": "error",
"error": "Syntax error in code",
"details": {
"error_type": "syntax_error",
"message": "Unexpected EOF while parsing",
"line_number": 42
}
}
Handle by:
- Pre-validating code syntax
- Checking for common errors
- Providing helpful error messages to LLM
Library Import Errors
result = {
"tool_name": "code_execution",
"status": "error",
"error": "Import failed",
"details": {
"error_type": "import_error",
"message": "Library 'tensorflow' is not allowed",
"library": "tensorflow"
}
}
Handle by:
- Checking allowed libraries list
- Providing clear error messages
- Suggesting alternatives
Use Cases
Exploratory Data Analysis
query = "Explore the sales data and identify trends, patterns, and insights"
code = """
import pandas as pd
import numpy as np
# Sample data
data = pd.read_csv('sales.csv')
# Basic statistics
print(data.describe())
# Trend analysis
data['date'] = pd.to_datetime(data['date'])
monthly_sales = data.groupby(data['date'].dt.to_period('M'))['sales'].sum()
print(monthly_sales.head(12))
"""
Customer Churn Analysis
query = "Analyze customer data and identify churn risk factors"
code = """
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
# Load data
data = pd.read_csv('customers.csv')
# Features
features = ['purchase_count', 'avg_order_value', 'days_since_last_purchase']
X = data[features]
y = data['churn']
# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Predict
predictions = model.predict(X_test)
"""
A/B Testing Analysis
query = "Compare the performance of two different features or products"
code = """
import pandas as pd
from scipy import stats
# Load data
data = pd.read_csv('experiment_results.csv')
# Group by variant
variant_a = data[data['variant'] == 'A']['conversion_rate']
variant_b = data[data['variant'] == 'B']['conversion_rate']
# Perform t-test
t_stat, p_value = stats.ttest_ind(variant_a['conversion_rate'], variant_b['conversion_rate'])
print(f"Variant A: {variant_a['conversion_rate']:.2%}")
print(f"Variant B: {variant_b['conversion_rate']:.2%}")
print(f"P-value: {p_value:.4f}")
print(f"Significant: {p_value < 0.05}")
"""
Advanced Patterns
Data Visualization Pipeline
code = """
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# Load data
data = pd.read_csv('metrics.csv')
# Create multi-panel visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# Plot 1: Sales over time
axes[0, 0].plot(data['date'], data['sales'])
axes[0, 0].set_title('Sales Over Time')
axes[0, 0].set_ylabel('Sales')
# Plot 2: Category distribution
category_counts = data['category'].value_counts()
axes[0, 1].bar(category_counts.index, category_counts.values)
axes[0, 1].set_title('Sales by Category')
axes[0, 1].set_ylabel('Count')
plt.tight_layout()
plt.savefig('/tmp/sales_dashboard.png')
"""
Automated Reporting
code = """
import pandas as pd
import matplotlib.pyplot as plt
# Generate daily report
data = pd.read_csv('sales_data.csv')
# Daily summary
daily_summary = data.groupby('date').agg({
'sales': 'sum',
'orders': 'count',
'unique_customers': 'nunique'
})
# Create visualizations
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
# Line chart
axes[0, 0].plot(daily_summary.index, daily_summary['sales'])
axes[0, 0].set_title('Daily Sales')
axes[0, 0].set_ylabel('Sales')
# Bar chart
axes[1, 0].bar(daily_summary['unique_customers'])
axes[1, 0].set_title('Unique Customers')
axes[1, 0].set_ylabel('Count')
plt.tight_layout()
plt.savefig('/tmp/daily_report.png')
"""
Troubleshooting
Code Execution Timeout
Problem: Code takes longer than allowed timeout
Solutions:
- Break large analysis into smaller steps
- Use more efficient libraries and algorithms
- Reduce data size by sampling
- Increase timeout in agent configuration
Memory Issues
Problem: Code execution fails with memory error
Solutions:
- Use data chunking for large datasets
- Process data in batches
- Reduce memory usage in code
- Use more memory-efficient data types
Library Import Failures
Problem: Required library not available or import fails
Solutions:
- Verify library is in allowed list
- Check Python environment and dependencies
- Use alternative libraries if needed
- Provide clear error messages about which library is missing
Next Steps
- Tools - Learn about code execution tool configuration
- Examples - See more agent patterns
- Direct Tool Calling - Use code execution without agents