RAG Chat Example

Combine RAG search with AI chat to answer questions using your documents.

Basic RAG Chat

import requests

def rag_chat(collection_name, user_query, model="gpt-4"):
    # Step 1: Search for relevant documents
    search_response = requests.post(
        "http://localhost/api/v1/rag/search",
        headers={"Authorization": "Bearer YOUR_API_KEY"},
        json={
            "collection_name": collection_name,
            "query": user_query,
            "top_k": 5
        }
    )

    search_results = search_response.json()["results"]

    # Step 2: Format context from search results
    context = "\n\n".join([
        f"[Source: {r['metadata']['document_id']}]\n{r['content']}"
        for r in search_results
    ])

    # Step 3: Generate response with context
    chat_response = requests.post(
        "http://localhost/api/v1/chat/completions",
        headers={"Authorization": "Bearer YOUR_API_KEY"},
        json={
            "model": model,
            "messages": [
                {
                    "role": "system",
                    "content": f"You are a helpful assistant. Answer the user's question using the provided context. If the answer is not in the context, say so.\n\nContext:\n{context}"
                },
                {
                    "role": "user",
                    "content": user_query
                }
            ],
            "temperature": 0.3
        }
    )

    answer = chat_response.json()["choices"][0]["message"]["content"]

    return {
        "answer": answer,
        "sources": search_results
    }

# Use RAG chat
result = rag_chat("documentation", "How do I configure API keys?")

print(f"Answer: {result['answer']}\n")
print("Sources:")
for source in result["sources"]:
    print(f"  - {source['metadata']['document_id']} (score: {source['score']:.2f})")

RAG Chat with Citations

def rag_chat_with_citations(collection_name, user_query):
    # Search documents
    search_response = requests.post(
        "http://localhost/api/v1/rag/search",
        headers={"Authorization": "Bearer YOUR_API_KEY"},
        json={
            "collection_name": collection_name,
            "query": user_query,
            "top_k": 5
        }
    )

    results = search_response.json()["results"]

    # Format context with citations
    context_parts = []
    for i, result in enumerate(results, 1):
        context_parts.append(f"[{i}] {result['content']}")

    context = "\n\n".join(context_parts)

    # Generate response with citation instructions
    system_prompt = f"""You are a helpful assistant. Answer the user's question using the provided context.

Context:
{context}

When using information from the context, cite the source using [1], [2], etc. format.
If the answer is not in the context, state that clearly."""

    chat_response = requests.post(
        "http://localhost/api/v1/chat/completions",
        headers={"Authorization": "Bearer YOUR_API_KEY"},
        json={
            "model": "gpt-4",
            "messages": [
                {
                    "role": "system",
                    "content": system_prompt
                },
                {
                    "role": "user",
                    "content": user_query
                }
            ],
            "temperature": 0.3
        }
    )

    answer = chat_response.json()["choices"][0]["message"]["content"]

    return {
        "answer": answer,
        "sources": results
    }

result = rag_chat_with_citations("documentation", "What are the security best practices?")
print(f"Answer: {result['answer']}")

Conversational RAG Chat

class RAGChatBot:
    def __init__(self, collection_name, model="gpt-4"):
        self.collection_name = collection_name
        self.model = model
        self.conversation_history = []

    def chat(self, user_message):
        # Search for relevant context
        search_response = requests.post(
            "http://localhost/api/v1/rag/search",
            headers={"Authorization": "Bearer YOUR_API_KEY"},
            json={
                "collection_name": self.collection_name,
                "query": user_message,
                "top_k": 5
            }
        )

        results = search_response.json()["results"]
        context = "\n\n".join([r["content"] for r in results])

        # Build messages with history
        messages = [
            {
                "role": "system",
                "content": f"You are a helpful assistant. Use the provided context to answer questions.\n\nContext:\n{context}"
            }
        ]

        # Add conversation history (last 5 messages)
        messages.extend(self.conversation_history[-10:])

        # Add current message
        messages.append({
            "role": "user",
            "content": user_message
        })

        # Generate response
        chat_response = requests.post(
            "http://localhost/api/v1/chat/completions",
            headers={"Authorization": "Bearer YOUR_API_KEY"},
            json={
                "model": self.model,
                "messages": messages,
                "temperature": 0.3
            }
        )

        assistant_message = chat_response.json()["choices"][0]["message"]["content"]

        # Update conversation history
        self.conversation_history.append({
            "role": "user",
            "content": user_message
        })
        self.conversation_history.append({
            "role": "assistant",
            "content": assistant_message
        })

        return {
            "response": assistant_message,
            "sources": results
        }

# Use conversational RAG chat
bot = RAGChatBot("documentation")

# Multi-turn conversation
while True:
    user_input = input("\nYou: ")
    if user_input.lower() == "quit":
        break

    result = bot.chat(user_input)
    print(f"\nBot: {result['response']}")

    if result['sources']:
        print("\nSources:")
        for source in result['sources']:
            print(f"  - {source['metadata']['document_id']}")

RAG Chat with Streaming

def rag_chat_stream(collection_name, user_query):
    # Search documents
    search_response = requests.post(
        "http://localhost/api/v1/rag/search",
        headers={"Authorization": "Bearer YOUR_API_KEY"},
        json={
            "collection_name": collection_name,
            "query": user_query,
            "top_k": 5
        }
    )

    results = search_response.json()["results"]
    context = "\n\n".join([r["content"] for r in results])

    # Stream response
    chat_response = requests.post(
        "http://localhost/api/v1/chat/completions",
        headers={"Authorization": "Bearer YOUR_API_KEY"},
        json={
            "model": "gpt-4",
            "messages": [
                {
                    "role": "system",
                    "content": f"Answer using this context:\n{context}"
                },
                {
                    "role": "user",
                    "content": user_query
                }
            ],
            "stream": true
        },
        stream=True
    )

    print("Bot: ", end="", flush=True)

    for line in chat_response.iter_lines():
        if line:
            line = line.decode("utf-8")
            if line.startswith("data: ") and line != "data: [DONE]":
                content = line[6:]
                if "choices" in content:
                    try:
                        import json
                        data = json.loads(content)
                        delta = data["choices"][0]["delta"]
                        if "content" in delta:
                            print(delta["content"], end="", flush=True)
                    except:
                        pass

    print(f"\n\nSources: {len(results)} documents")

# Stream RAG chat
rag_chat_stream("documentation", "Explain the API authentication process")

JavaScript RAG Chat

class RAGChat {
  constructor(apiKey, collectionName) {
    this.apiKey = apiKey;
    this.collectionName = collectionName;
    this.baseUrl = 'http://localhost/api/v1';
    this.history = [];
  }

  async search(query) {
    const response = await fetch(`${this.baseUrl}/rag/search`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${this.apiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        collection_name: this.collectionName,
        query,
        top_k: 5
      })
    });

    return await response.json();
  }

  async chat(userMessage) {
    // Search for context
    const searchResult = await this.search(userMessage);
    const context = searchResult.results.map(r => r.content).join('\n\n');

    // Build messages
    const messages = [
      {
        role: 'system',
        content: `Answer using this context:\n${context}`
      },
      ...this.history.slice(-10),
      {
        role: 'user',
        content: userMessage
      }
    ];

    // Generate response
    const chatResponse = await fetch(`${this.baseUrl}/chat/completions`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${this.apiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        model: 'gpt-4',
        messages,
        temperature: 0.3
      })
    });

    const data = await chatResponse.json();
    const assistantMessage = data.choices[0].message.content;

    // Update history
    this.history.push(
      { role: 'user', content: userMessage },
      { role: 'assistant', content: assistantMessage }
    );

    return {
      response: assistantMessage,
      sources: searchResult.results
    };
  }
}

// Usage
const chat = new RAGChat('YOUR_API_KEY', 'documentation');

const result = await chat.chat('How do I configure API keys?');
console.log('Response:', result.response);
console.log('Sources:', result.sources.map(s => s.metadata.document_id));

React RAG Chat Component

import React, { useState } from 'react';

function RAGChat() {
  const [message, setMessage] = useState('');
  const [messages, setMessages] = useState([]);
  const [loading, setLoading] = useState(false);
  const [sources, setSources] = useState([]);

  const sendMessage = async () => {
    if (!message.trim() || loading) return;

    setLoading(true);

    try {
      // Search for context
      const searchResponse = await fetch('http://localhost/api/v1/rag/search', {
        method: 'POST',
        headers: {
          'Authorization': 'Bearer YOUR_API_KEY',
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          collection_name: 'documentation',
          query: message,
          top_k: 5
        })
      });

      const searchData = await searchResponse.json();
      const context = searchData.results.map(r => r.content).join('\n\n');

      // Generate response
      const chatResponse = await fetch('http://localhost/api/v1/chat/completions', {
        method: 'POST',
        headers: {
          'Authorization': 'Bearer YOUR_API_KEY',
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          model: 'gpt-4',
          messages: [
            {
              role: 'system',
              content: `Answer using this context:\n${context}`
            },
            ...messages,
            {
              role: 'user',
              content: message
            }
          ],
          temperature: 0.3
        })
      });

      const chatData = await chatResponse.json();
      const response = chatData.choices[0].message.content;

      setMessages([...messages, { role: 'user', content: message }]);
      setMessages(prev => [...prev, { role: 'assistant', content: response }]);
      setSources(searchData.results);
      setMessage('');
    } catch (error) {
      console.error('Error:', error);
    } finally {
      setLoading(false);
    }
  };

  return (
    <div className="max-w-4xl mx-auto p-6">
      <h2 className="text-2xl font-bold mb-4">RAG Chat</h2>

      <div className="border rounded-lg p-4 h-96 overflow-y-auto mb-4">
        {messages.map((msg, i) => (
          <div key={i} className={`mb-2 ${msg.role === 'user' ? 'text-right' : 'text-left'}`}>
            <span className={`inline-block px-3 py-1 rounded ${
              msg.role === 'user' ? 'bg-blue-500 text-white' : 'bg-gray-200'
            }`}>
              {msg.content}
            </span>
          </div>
        ))}
      </div>

      {sources.length > 0 && (
        <div className="mb-4 p-3 bg-gray-50 rounded">
          <h3 className="font-bold text-sm mb-2">Sources Used:</h3>
          {sources.map((s, i) => (
            <div key={i} className="text-xs">
              {s.metadata.document_id} (score: {s.score.toFixed(2)})
            </div>
          ))}
        </div>
      )}

      <div className="flex space-x-2">
        <input
          type="text"
          value={message}
          onChange={(e) => setMessage(e.target.value)}
          onKeyPress={(e) => e.key === 'Enter' && sendMessage()}
          className="flex-1 px-4 py-2 border rounded"
          placeholder="Ask a question..."
          disabled={loading}
        />
        <button
          onClick={sendMessage}
          disabled={loading || !message}
          className="px-6 py-2 bg-blue-500 text-white rounded"
        >
          {loading ? 'Sending...' : 'Send'}
        </button>
      </div>
    </div>
  );
}

export default RAGChat;

Best Practices

Relevant context - Use top_k=5-7 for good balance
Cite sources - Always reference document sources
Temperature - Use 0.2-0.3 for factual answers
Conversation limit - Keep history manageable (10-20 messages)
No information - State clearly when answer is not in documents
Streaming - Use streaming for better UX

Next Steps

Upload Example - Add more documents
Search Example - Learn search techniques
Managing Example - Manage your documents

Basic RAG Chat​

RAG Chat with Citations​

Conversational RAG Chat​

RAG Chat with Streaming​

JavaScript RAG Chat​

React RAG Chat Component​

Best Practices​

Next Steps​

Basic RAG Chat

RAG Chat with Citations

Conversational RAG Chat

RAG Chat with Streaming

JavaScript RAG Chat

React RAG Chat Component

Best Practices

Next Steps