LlamaIndex Integration
Build powerful RAG applications using LlamaIndex with WikiRest as a retriever.
Installation
pip install llama-index requests Custom WikiRest Retriever
Basic retriever implementation
Create a custom retriever that fetches Wikipedia content from WikiRest:
import os
import requests
from typing import List
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle
class WikiRestRetriever(BaseRetriever):
"""Custom retriever that fetches Wikipedia content from WikiRest API."""
def __init__(
self,
api_key: str | None = None,
top_k: int = 5,
base_url: str = "https://api.wikirest.com/v1"
):
super().__init__()
self.api_key = api_key or os.environ.get("WIKIREST_API_KEY")
if not self.api_key:
raise ValueError("WIKIREST_API_KEY is required")
self.top_k = top_k
self.base_url = base_url
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
"""Retrieve relevant Wikipedia passages."""
response = requests.get(
f"{self.base_url}/search",
headers={"X-API-Key": self.api_key},
params={"q": query_bundle.query_str, "limit": self.top_k}
)
response.raise_for_status()
data = response.json()
nodes = []
for i, hit in enumerate(data.get("hits", [])):
node = TextNode(
text=hit["text"],
metadata={
"title": hit["title"],
"url": hit["url"],
"section": hit.get("section", ""),
"chunk_id": hit["id"],
"page_id": hit["page_id"],
},
)
# Score based on position (first results are most relevant)
score = 1.0 - (i * 0.1)
nodes.append(NodeWithScore(node=node, score=score))
return nodes Query Engine
Using with a query engine
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
# Initialize retriever
retriever = WikiRestRetriever(top_k=5)
# Create response synthesizer
response_synthesizer = get_response_synthesizer(
response_mode="compact"
)
# Create query engine
query_engine = RetrieverQueryEngine(
retriever=retriever,
response_synthesizer=response_synthesizer,
)
# Query
response = query_engine.query("What is quantum computing?")
print(response)
# Access source nodes
for node in response.source_nodes:
print(f"Source: {node.metadata['title']}")
print(f"URL: {node.metadata['url']}")
print(f"Score: {node.score}")
print("---") Chat Engine
Building a conversational agent
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.llms.openai import OpenAI
# Initialize components
retriever = WikiRestRetriever(top_k=5)
llm = OpenAI(model="gpt-4")
# Create chat engine with Wikipedia context
chat_engine = ContextChatEngine.from_defaults(
retriever=retriever,
llm=llm,
system_prompt="""You are a helpful assistant that answers questions using Wikipedia content.
Always cite your sources by including the Wikipedia article titles and URLs.
If you don't find relevant information, acknowledge that and provide general knowledge."""
)
# Have a conversation
response = chat_engine.chat("Tell me about machine learning")
print(response)
response = chat_engine.chat("How does it relate to artificial intelligence?")
print(response)
# Get chat history
for message in chat_engine.chat_history:
print(f"{message.role}: {message.content[:100]}...") Sub-Question Query Engine
Breaking down complex questions
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
# Create a query engine tool
wiki_tool = QueryEngineTool(
query_engine=query_engine,
metadata=ToolMetadata(
name="wikipedia",
description="Search Wikipedia for factual information on any topic"
)
)
# Create sub-question query engine
sub_question_engine = SubQuestionQueryEngine.from_defaults(
query_engine_tools=[wiki_tool],
verbose=True
)
# Complex question that requires multiple lookups
response = sub_question_engine.query(
"Compare the economic policies of the United States and China"
)
print(response) Semantic Search with Reranking
Adding reranking for better results
from llama_index.core.postprocessor import SentenceTransformerRerank
# Create reranker
reranker = SentenceTransformerRerank(
model="cross-encoder/ms-marco-MiniLM-L-6-v2",
top_n=3
)
# Create query engine with reranking
query_engine = RetrieverQueryEngine(
retriever=WikiRestRetriever(top_k=10), # Get more initial results
response_synthesizer=response_synthesizer,
node_postprocessors=[reranker], # Rerank to top 3
)
response = query_engine.query("What are the symptoms of diabetes?")
print(response) Streaming Responses
Stream response tokens
from llama_index.llms.openai import OpenAI
# Configure streaming
llm = OpenAI(model="gpt-4")
# Create streaming query engine
query_engine = RetrieverQueryEngine.from_args(
retriever=WikiRestRetriever(top_k=5),
llm=llm,
streaming=True,
)
# Stream the response
streaming_response = query_engine.query("Explain the theory of relativity")
# Print tokens as they arrive
for text in streaming_response.response_gen:
print(text, end="", flush=True)
print() # Newline at the end Async Retrieval
Async retriever for better performance
import asyncio
import aiohttp
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle
class AsyncWikiRestRetriever(BaseRetriever):
"""Async retriever for WikiRest API."""
def __init__(self, api_key: str, top_k: int = 5):
super().__init__()
self.api_key = api_key
self.top_k = top_k
self.base_url = "https://api.wikirest.com/v1"
async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
"""Async retrieve Wikipedia passages."""
async with aiohttp.ClientSession() as session:
async with session.get(
f"{self.base_url}/search",
headers={"X-API-Key": self.api_key},
params={"q": query_bundle.query_str, "limit": self.top_k}
) as response:
data = await response.json()
nodes = []
for i, hit in enumerate(data.get("hits", [])):
node = TextNode(
text=hit["text"],
metadata={
"title": hit["title"],
"url": hit["url"],
},
)
nodes.append(NodeWithScore(node=node, score=1.0 - (i * 0.1)))
return nodes
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
"""Sync wrapper for async retrieve."""
return asyncio.run(self._aretrieve(query_bundle))
# Usage
async def main():
retriever = AsyncWikiRestRetriever(api_key="your_api_key")
query = QueryBundle(query_str="artificial intelligence")
nodes = await retriever._aretrieve(query)
for node in nodes:
print(f"- {node.metadata['title']}: {node.text[:100]}...")
asyncio.run(main()) Document Summary Index
Building a summary index from Wikipedia pages
import requests
from llama_index.core import Document, SummaryIndex
def fetch_wikipedia_page(page_id: int, api_key: str) -> Document:
"""Fetch a full Wikipedia page and convert to LlamaIndex Document."""
response = requests.get(
f"https://api.wikirest.com/v1/page/{page_id}",
headers={"X-API-Key": api_key},
params={"format": "concat"}
)
response.raise_for_status()
data = response.json()
return Document(
text=data.get("text", ""),
metadata={
"title": data["page"]["title"],
"url": data["page"]["url"],
"page_id": page_id,
}
)
# Fetch multiple pages
page_ids = [12345, 67890, 11111] # Example Wikipedia page IDs
documents = [
fetch_wikipedia_page(pid, "your_api_key")
for pid in page_ids
]
# Create summary index
index = SummaryIndex.from_documents(documents)
# Query
query_engine = index.as_query_engine()
response = query_engine.query("What are the main topics covered?")
print(response)