LlamaIndex Integration

Integrate Enclava with LlamaIndex for advanced RAG applications.

Installation

pip install llama-index
pip install llama-index-llms-openai

Basic Configuration

from llama_index.llms.openai import OpenAI as LlamaOpenAI

# Configure with Enclava endpoint
llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

# Simple completion
response = llm.complete("Hello, LlamaIndex!")
print(response.text)

Complete Example

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.openai import OpenAIEmbedding

class EnclavaLlamaIndex:
    def __init__(self, api_key, base_url="https://localhost/api/v1"):
        self.llm = LlamaOpenAI(
            api_base=base_url,
            api_key=api_key,
            model="gpt-3.5-turbo"
        )
        self.embed_model = OpenAIEmbedding(
            api_base=base_url,
            api_key=api_key
        )

    def complete(self, prompt):
        """Generate completion"""
        response = self.llm.complete(prompt)
        return response.text

    def chat(self, messages):
        """Chat with messages"""
        response = self.llm.chat(messages)
        return response.message.content

# Usage
client = EnclavaLlamaIndex(api_key="YOUR_API_KEY")

# Complete
response = client.complete("Tell me about AI")
print(response)

# Chat
messages = [
    {"role": "system", "content": "You are helpful."},
    {"role": "user", "content": "What is 2+2?"}
]
response = client.chat(messages)
print(response)

RAG with LlamaIndex

Simple RAG

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# Configure Enclava
llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

embed_model = OpenAIEmbedding(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY"
)

# Load documents
documents = SimpleDirectoryReader("documents/").load_data()

# Create index
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=embed_model
)

# Create query engine
query_engine = index.as_query_engine(llm=llm)

# Query
response = query_engine.query("What is Enclava?")
print(f"Answer: {response}")
print(f"Sources: {response.source_nodes}")

RAG with Custom Retrieval

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.indices.query.schema import QueryType

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

embed_model = OpenAIEmbedding(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY"
)

# Load and index documents
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Create query engine with custom settings
query_engine = index.as_query_engine(
    llm=llm,
    similarity_top_k=5,  # Retrieve top 5 documents
    response_mode="compact",  # Compact retrieved context
    query_type=QueryType.DEFAULT
)

# Query
response = query_engine.query("How do I use RAG in Enclava?")
print(f"Answer: {response}")

RAG with Streaming

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo",
    streaming=True
)

embed_model = OpenAIEmbedding(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY"
)

documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Create streaming query engine
query_engine = index.as_query_engine(
    llm=llm,
    streaming=True
)

# Stream response
query = "Tell me about Enclava's features"
response_stream = query_engine.query(query)

for text in response_stream.response_gen:
    print(text, end="", flush=True)

Chat Engine

Simple Chat

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

embed_model = OpenAIEmbedding(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY"
)

documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Create chat engine with memory
chat_engine = index.as_chat_engine(
    llm=llm,
    chat_mode="condense_question",  # Condense follow-up questions
    memory=False,  # Let LlamaIndex handle memory
    verbose=True
)

# Chat
response1 = chat_engine.chat("What is Enclava?")
print(f"User: What is Enclava?")
print(f"AI: {response1}")

response2 = chat_engine.chat("How does it work?")
print(f"User: How does it work?")
print(f"AI: {response2}")

Chat with Streaming

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo",
    streaming=True
)

documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

chat_engine = index.as_chat_engine(
    llm=llm,
    streaming=True,
    chat_mode="condense_question"
)

# Stream chat response
response_stream = chat_engine.stream_chat("Tell me a story")

for text in response_stream.response_gen:
    print(text, end="", flush=True)

Index Management

Persist Index

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.storage.index_store import SimpleIndexStore

# Create storage context
storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore.from_persist_dir("./storage"),
    index_store=SimpleIndexStore.from_persist_dir("./storage")
)

# Create and persist index
documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    embed_model=embed_model
)

index.storage_context.persist("./storage")

Load Persisted Index

from llama_index.core import load_index_from_storage
from llama_index.core import StorageContext

# Load from storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)

# Create query engine
query_engine = index.as_query_engine(llm=llm)
response = query_engine.query("What is Enclava?")
print(response)

Advanced RAG

Hybrid Search (Keyword + Semantic)

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.indices.query.schema import QueryType

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Use hybrid search
query_engine = index.as_query_engine(
    llm=llm,
    query_type=QueryType.HYBRID,  # Hybrid keyword + semantic
    similarity_top_k=10
)

response = query_engine.query("How do I configure API keys?")
print(response)

Reranking

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.indices.query.schema import QueryType

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

documents = SimpleDirectoryReader("documents/").load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Use reranking for better results
query_engine = index.as_query_engine(
    llm=llm,
    query_type=QueryType.DEFAULT,
    similarity_top_k=20,  # Retrieve more documents
    node_postprocessors=[  # Rerank retrieved nodes
        lambda nodes: sorted(nodes, key=lambda n: n.score, reverse=True)[:10]
    ]
)

response = query_engine.query("What are the security features?")
print(response)

Multi-Index RAG

from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

llm = LlamaOpenAI(
    api_base="https://your-enclava-instance/api/v1",
    api_key="YOUR_API_KEY",
    model="gpt-3.5-turbo"
)

# Create multiple indexes
docs_index = VectorStoreIndex.from_documents(
    SimpleDirectoryReader("docs/").load_data(),
    embed_model=embed_model
)

faq_index = VectorStoreIndex.from_documents(
    SimpleDirectoryReader("faq/").load_data(),
    embed_model=embed_model
)

# Create query engines for each index
docs_engine = docs_index.as_query_engine(llm=llm, similarity_top_k=3)
faq_engine = faq_index.as_query_engine(llm=llm, similarity_top_k=2)

# Query multiple indexes and combine results
query = "How do I create an API key?"

docs_response = docs_engine.query(query)
faq_response = faq_engine.query(query)

# Combine results
combined_context = f"""
Documentation:
{docs_response}

FAQ:
{faq_response}
"""

# Generate final answer
final_response = llm.complete(
    f"Answer this question using the context below:\n\n{combined_context}\n\nQuestion: {query}"
)

print(final_response.text)

Best Practices

Chunking: Use appropriate chunk sizes for your documents
Embedding Model: Choose embedding model based on your needs
Retrieval: Adjust similarity_top_k based on document size
Memory: Use chat engine for conversational applications
Persistence: Persist indexes to avoid re-embedding
Hybrid Search: Use hybrid search for better keyword matching

Next Steps

LangChain Integration - Use with LangChain
Python Integration - Direct Python SDK usage
RAG Documentation - Enclava's built-in RAG features

Installation​

Basic Configuration​

Complete Example​

RAG with LlamaIndex​

Simple RAG​

RAG with Custom Retrieval​

RAG with Streaming​

Chat Engine​

Simple Chat​

Chat with Streaming​

Index Management​

Persist Index​

Load Persisted Index​

Advanced RAG​

Hybrid Search (Keyword + Semantic)​

Reranking​

Multi-Index RAG​

Best Practices​

Next Steps​

Installation

Basic Configuration

Complete Example

RAG with LlamaIndex

Simple RAG

RAG with Custom Retrieval

RAG with Streaming

Chat Engine

Simple Chat

Chat with Streaming

Index Management

Persist Index

Load Persisted Index

Advanced RAG

Hybrid Search (Keyword + Semantic)

Reranking

Multi-Index RAG

Best Practices

Next Steps