LlamaIndex Integration
Integrate Enclava with LlamaIndex for advanced RAG applications.
Installation
pip install llama-index
pip install llama-index-llms-openai
Basic Configuration
from llama_index.llms.openai import OpenAI as LlamaOpenAI
# Configure with Enclava endpoint
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
# Simple completion
response = llm.complete("Hello, LlamaIndex!")
print(response.text)
Complete Example
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.openai import OpenAIEmbedding
class EnclavaLlamaIndex:
def __init__(self, api_key, base_url="https://localhost/api/v1"):
self.llm = LlamaOpenAI(
api_base=base_url,
api_key=api_key,
model="gpt-3.5-turbo"
)
self.embed_model = OpenAIEmbedding(
api_base=base_url,
api_key=api_key
)
def complete(self, prompt):
"""Generate completion"""
response = self.llm.complete(prompt)
return response.text
def chat(self, messages):
"""Chat with messages"""
response = self.llm.chat(messages)
return response.message.content
# Usage
client = EnclavaLlamaIndex(api_key="YOUR_API_KEY")
# Complete
response = client.complete("Tell me about AI")
print(response)
# Chat
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "What is 2+2?"}
]
response = client.chat(messages)
print(response)
RAG with LlamaIndex
Simple RAG
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
# Configure Enclava
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
embed_model = OpenAIEmbedding(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY"
)
# Load documents
documents = SimpleDirectoryReader("documents/").load_data()
# Create index
index = VectorStoreIndex.from_documents(
documents,
embed_model=embed_model
)
# Create query engine
query_engine = index.as_query_engine(llm=llm)
# Query
response = query_engine.query("What is Enclava?")
print(f"Answer: {response}")
print(f"Sources: {response.source_nodes}")
RAG with Custom Retrieval
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.indices.query.schema import QueryType
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
embed_model = OpenAIEmbedding(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY"
)
# Load and index documents
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
# Create query engine with custom settings
query_engine = index.as_query_engine(
llm=llm,
similarity_top_k=5, # Retrieve top 5 documents
response_mode="compact", # Compact retrieved context
query_type=QueryType.DEFAULT
)
# Query
response = query_engine.query("How do I use RAG in Enclava?")
print(f"Answer: {response}")
RAG with Streaming
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo",
streaming=True
)
embed_model = OpenAIEmbedding(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY"
)
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
# Create streaming query engine
query_engine = index.as_query_engine(
llm=llm,
streaming=True
)
# Stream response
query = "Tell me about Enclava's features"
response_stream = query_engine.query(query)
for text in response_stream.response_gen:
print(text, end="", flush=True)
Chat Engine
Simple Chat
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
embed_model = OpenAIEmbedding(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY"
)
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
# Create chat engine with memory
chat_engine = index.as_chat_engine(
llm=llm,
chat_mode="condense_question", # Condense follow-up questions
memory=False, # Let LlamaIndex handle memory
verbose=True
)
# Chat
response1 = chat_engine.chat("What is Enclava?")
print(f"User: What is Enclava?")
print(f"AI: {response1}")
response2 = chat_engine.chat("How does it work?")
print(f"User: How does it work?")
print(f"AI: {response2}")
Chat with Streaming
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo",
streaming=True
)
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
chat_engine = index.as_chat_engine(
llm=llm,
streaming=True,
chat_mode="condense_question"
)
# Stream chat response
response_stream = chat_engine.stream_chat("Tell me a story")
for text in response_stream.response_gen:
print(text, end="", flush=True)
Index Management
Persist Index
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.storage.index_store import SimpleIndexStore
# Create storage context
storage_context = StorageContext.from_defaults(
docstore=SimpleDocumentStore.from_persist_dir("./storage"),
index_store=SimpleIndexStore.from_persist_dir("./storage")
)
# Create and persist index
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
embed_model=embed_model
)
index.storage_context.persist("./storage")
Load Persisted Index
from llama_index.core import load_index_from_storage
from llama_index.core import StorageContext
# Load from storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
# Create query engine
query_engine = index.as_query_engine(llm=llm)
response = query_engine.query("What is Enclava?")
print(response)
Advanced RAG
Hybrid Search (Keyword + Semantic)
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.indices.query.schema import QueryType
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
# Use hybrid search
query_engine = index.as_query_engine(
llm=llm,
query_type=QueryType.HYBRID, # Hybrid keyword + semantic
similarity_top_k=10
)
response = query_engine.query("How do I configure API keys?")
print(response)
Reranking
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.indices.query.schema import QueryType
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
# Use reranking for better results
query_engine = index.as_query_engine(
llm=llm,
query_type=QueryType.DEFAULT,
similarity_top_k=20, # Retrieve more documents
node_postprocessors=[ # Rerank retrieved nodes
lambda nodes: sorted(nodes, key=lambda n: n.score, reverse=True)[:10]
]
)
response = query_engine.query("What are the security features?")
print(response)
Multi-Index RAG
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
llm = LlamaOpenAI(
api_base="https://your-enclava-instance/api/v1",
api_key="YOUR_API_KEY",
model="gpt-3.5-turbo"
)
# Create multiple indexes
docs_index = VectorStoreIndex.from_documents(
SimpleDirectoryReader("docs/").load_data(),
embed_model=embed_model
)
faq_index = VectorStoreIndex.from_documents(
SimpleDirectoryReader("faq/").load_data(),
embed_model=embed_model
)
# Create query engines for each index
docs_engine = docs_index.as_query_engine(llm=llm, similarity_top_k=3)
faq_engine = faq_index.as_query_engine(llm=llm, similarity_top_k=2)
# Query multiple indexes and combine results
query = "How do I create an API key?"
docs_response = docs_engine.query(query)
faq_response = faq_engine.query(query)
# Combine results
combined_context = f"""
Documentation:
{docs_response}
FAQ:
{faq_response}
"""
# Generate final answer
final_response = llm.complete(
f"Answer this question using the context below:\n\n{combined_context}\n\nQuestion: {query}"
)
print(final_response.text)
Best Practices
- Chunking: Use appropriate chunk sizes for your documents
- Embedding Model: Choose embedding model based on your needs
- Retrieval: Adjust
similarity_top_kbased on document size - Memory: Use chat engine for conversational applications
- Persistence: Persist indexes to avoid re-embedding
- Hybrid Search: Use hybrid search for better keyword matching
Next Steps
- LangChain Integration - Use with LangChain
- Python Integration - Direct Python SDK usage
- RAG Documentation - Enclava's built-in RAG features