Skip to main content

RAG Chat Example

Combine RAG search with AI chat to answer questions using your documents.

Basic RAG Chat

import requests

def rag_chat(collection_name, user_query, model="gpt-4"):
# Step 1: Search for relevant documents
search_response = requests.post(
"http://localhost/api/v1/rag/search",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"collection_name": collection_name,
"query": user_query,
"top_k": 5
}
)

search_results = search_response.json()["results"]

# Step 2: Format context from search results
context = "\n\n".join([
f"[Source: {r['metadata']['document_id']}]\n{r['content']}"
for r in search_results
])

# Step 3: Generate response with context
chat_response = requests.post(
"http://localhost/api/v1/chat/completions",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": model,
"messages": [
{
"role": "system",
"content": f"You are a helpful assistant. Answer the user's question using the provided context. If the answer is not in the context, say so.\n\nContext:\n{context}"
},
{
"role": "user",
"content": user_query
}
],
"temperature": 0.3
}
)

answer = chat_response.json()["choices"][0]["message"]["content"]

return {
"answer": answer,
"sources": search_results
}

# Use RAG chat
result = rag_chat("documentation", "How do I configure API keys?")

print(f"Answer: {result['answer']}\n")
print("Sources:")
for source in result["sources"]:
print(f" - {source['metadata']['document_id']} (score: {source['score']:.2f})")

RAG Chat with Citations

def rag_chat_with_citations(collection_name, user_query):
# Search documents
search_response = requests.post(
"http://localhost/api/v1/rag/search",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"collection_name": collection_name,
"query": user_query,
"top_k": 5
}
)

results = search_response.json()["results"]

# Format context with citations
context_parts = []
for i, result in enumerate(results, 1):
context_parts.append(f"[{i}] {result['content']}")

context = "\n\n".join(context_parts)

# Generate response with citation instructions
system_prompt = f"""You are a helpful assistant. Answer the user's question using the provided context.

Context:
{context}

When using information from the context, cite the source using [1], [2], etc. format.
If the answer is not in the context, state that clearly."""

chat_response = requests.post(
"http://localhost/api/v1/chat/completions",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "gpt-4",
"messages": [
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": user_query
}
],
"temperature": 0.3
}
)

answer = chat_response.json()["choices"][0]["message"]["content"]

return {
"answer": answer,
"sources": results
}

result = rag_chat_with_citations("documentation", "What are the security best practices?")
print(f"Answer: {result['answer']}")

Conversational RAG Chat

class RAGChatBot:
def __init__(self, collection_name, model="gpt-4"):
self.collection_name = collection_name
self.model = model
self.conversation_history = []

def chat(self, user_message):
# Search for relevant context
search_response = requests.post(
"http://localhost/api/v1/rag/search",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"collection_name": self.collection_name,
"query": user_message,
"top_k": 5
}
)

results = search_response.json()["results"]
context = "\n\n".join([r["content"] for r in results])

# Build messages with history
messages = [
{
"role": "system",
"content": f"You are a helpful assistant. Use the provided context to answer questions.\n\nContext:\n{context}"
}
]

# Add conversation history (last 5 messages)
messages.extend(self.conversation_history[-10:])

# Add current message
messages.append({
"role": "user",
"content": user_message
})

# Generate response
chat_response = requests.post(
"http://localhost/api/v1/chat/completions",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": self.model,
"messages": messages,
"temperature": 0.3
}
)

assistant_message = chat_response.json()["choices"][0]["message"]["content"]

# Update conversation history
self.conversation_history.append({
"role": "user",
"content": user_message
})
self.conversation_history.append({
"role": "assistant",
"content": assistant_message
})

return {
"response": assistant_message,
"sources": results
}

# Use conversational RAG chat
bot = RAGChatBot("documentation")

# Multi-turn conversation
while True:
user_input = input("\nYou: ")
if user_input.lower() == "quit":
break

result = bot.chat(user_input)
print(f"\nBot: {result['response']}")

if result['sources']:
print("\nSources:")
for source in result['sources']:
print(f" - {source['metadata']['document_id']}")

RAG Chat with Streaming

def rag_chat_stream(collection_name, user_query):
# Search documents
search_response = requests.post(
"http://localhost/api/v1/rag/search",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"collection_name": collection_name,
"query": user_query,
"top_k": 5
}
)

results = search_response.json()["results"]
context = "\n\n".join([r["content"] for r in results])

# Stream response
chat_response = requests.post(
"http://localhost/api/v1/chat/completions",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "gpt-4",
"messages": [
{
"role": "system",
"content": f"Answer using this context:\n{context}"
},
{
"role": "user",
"content": user_query
}
],
"stream": true
},
stream=True
)

print("Bot: ", end="", flush=True)

for line in chat_response.iter_lines():
if line:
line = line.decode("utf-8")
if line.startswith("data: ") and line != "data: [DONE]":
content = line[6:]
if "choices" in content:
try:
import json
data = json.loads(content)
delta = data["choices"][0]["delta"]
if "content" in delta:
print(delta["content"], end="", flush=True)
except:
pass

print(f"\n\nSources: {len(results)} documents")

# Stream RAG chat
rag_chat_stream("documentation", "Explain the API authentication process")

JavaScript RAG Chat

class RAGChat {
constructor(apiKey, collectionName) {
this.apiKey = apiKey;
this.collectionName = collectionName;
this.baseUrl = 'http://localhost/api/v1';
this.history = [];
}

async search(query) {
const response = await fetch(`${this.baseUrl}/rag/search`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
collection_name: this.collectionName,
query,
top_k: 5
})
});

return await response.json();
}

async chat(userMessage) {
// Search for context
const searchResult = await this.search(userMessage);
const context = searchResult.results.map(r => r.content).join('\n\n');

// Build messages
const messages = [
{
role: 'system',
content: `Answer using this context:\n${context}`
},
...this.history.slice(-10),
{
role: 'user',
content: userMessage
}
];

// Generate response
const chatResponse = await fetch(`${this.baseUrl}/chat/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-4',
messages,
temperature: 0.3
})
});

const data = await chatResponse.json();
const assistantMessage = data.choices[0].message.content;

// Update history
this.history.push(
{ role: 'user', content: userMessage },
{ role: 'assistant', content: assistantMessage }
);

return {
response: assistantMessage,
sources: searchResult.results
};
}
}

// Usage
const chat = new RAGChat('YOUR_API_KEY', 'documentation');

const result = await chat.chat('How do I configure API keys?');
console.log('Response:', result.response);
console.log('Sources:', result.sources.map(s => s.metadata.document_id));

React RAG Chat Component

import React, { useState } from 'react';

function RAGChat() {
const [message, setMessage] = useState('');
const [messages, setMessages] = useState([]);
const [loading, setLoading] = useState(false);
const [sources, setSources] = useState([]);

const sendMessage = async () => {
if (!message.trim() || loading) return;

setLoading(true);

try {
// Search for context
const searchResponse = await fetch('http://localhost/api/v1/rag/search', {
method: 'POST',
headers: {
'Authorization': 'Bearer YOUR_API_KEY',
'Content-Type': 'application/json'
},
body: JSON.stringify({
collection_name: 'documentation',
query: message,
top_k: 5
})
});

const searchData = await searchResponse.json();
const context = searchData.results.map(r => r.content).join('\n\n');

// Generate response
const chatResponse = await fetch('http://localhost/api/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': 'Bearer YOUR_API_KEY',
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-4',
messages: [
{
role: 'system',
content: `Answer using this context:\n${context}`
},
...messages,
{
role: 'user',
content: message
}
],
temperature: 0.3
})
});

const chatData = await chatResponse.json();
const response = chatData.choices[0].message.content;

setMessages([...messages, { role: 'user', content: message }]);
setMessages(prev => [...prev, { role: 'assistant', content: response }]);
setSources(searchData.results);
setMessage('');
} catch (error) {
console.error('Error:', error);
} finally {
setLoading(false);
}
};

return (
<div className="max-w-4xl mx-auto p-6">
<h2 className="text-2xl font-bold mb-4">RAG Chat</h2>

<div className="border rounded-lg p-4 h-96 overflow-y-auto mb-4">
{messages.map((msg, i) => (
<div key={i} className={`mb-2 ${msg.role === 'user' ? 'text-right' : 'text-left'}`}>
<span className={`inline-block px-3 py-1 rounded ${
msg.role === 'user' ? 'bg-blue-500 text-white' : 'bg-gray-200'
}`}>
{msg.content}
</span>
</div>
))}
</div>

{sources.length > 0 && (
<div className="mb-4 p-3 bg-gray-50 rounded">
<h3 className="font-bold text-sm mb-2">Sources Used:</h3>
{sources.map((s, i) => (
<div key={i} className="text-xs">
{s.metadata.document_id} (score: {s.score.toFixed(2)})
</div>
))}
</div>
)}

<div className="flex space-x-2">
<input
type="text"
value={message}
onChange={(e) => setMessage(e.target.value)}
onKeyPress={(e) => e.key === 'Enter' && sendMessage()}
className="flex-1 px-4 py-2 border rounded"
placeholder="Ask a question..."
disabled={loading}
/>
<button
onClick={sendMessage}
disabled={loading || !message}
className="px-6 py-2 bg-blue-500 text-white rounded"
>
{loading ? 'Sending...' : 'Send'}
</button>
</div>
</div>
);
}

export default RAGChat;

Best Practices

  1. Relevant context - Use top_k=5-7 for good balance
  2. Cite sources - Always reference document sources
  3. Temperature - Use 0.2-0.3 for factual answers
  4. Conversation limit - Keep history manageable (10-20 messages)
  5. No information - State clearly when answer is not in documents
  6. Streaming - Use streaming for better UX

Next Steps