LlamaIndex Integration

Build powerful RAG applications using LlamaIndex with WikiRest as a retriever.

Installation

pip install llama-index requests

Custom WikiRest Retriever

Basic retriever implementation

Create a custom retriever that fetches Wikipedia content from WikiRest:

import os
import requests
from typing import List

from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle

class WikiRestRetriever(BaseRetriever):
    """Custom retriever that fetches Wikipedia content from WikiRest API."""

    def __init__(
        self,
        api_key: str | None = None,
        top_k: int = 5,
        base_url: str = "https://api.wikirest.com/v1"
    ):
        super().__init__()
        self.api_key = api_key or os.environ.get("WIKIREST_API_KEY")
        if not self.api_key:
            raise ValueError("WIKIREST_API_KEY is required")
        self.top_k = top_k
        self.base_url = base_url

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve relevant Wikipedia passages."""
        response = requests.get(
            f"{self.base_url}/search",
            headers={"X-API-Key": self.api_key},
            params={"q": query_bundle.query_str, "limit": self.top_k}
        )
        response.raise_for_status()
        data = response.json()

        nodes = []
        for i, hit in enumerate(data.get("hits", [])):
            node = TextNode(
                text=hit["text"],
                metadata={
                    "title": hit["title"],
                    "url": hit["url"],
                    "section": hit.get("section", ""),
                    "chunk_id": hit["id"],
                    "page_id": hit["page_id"],
                },
            )
            # Score based on position (first results are most relevant)
            score = 1.0 - (i * 0.1)
            nodes.append(NodeWithScore(node=node, score=score))

        return nodes

Query Engine

Using with a query engine

from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

# Initialize retriever
retriever = WikiRestRetriever(top_k=5)

# Create response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="compact"
)

# Create query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# Query
response = query_engine.query("What is quantum computing?")
print(response)

# Access source nodes
for node in response.source_nodes:
    print(f"Source: {node.metadata['title']}")
    print(f"URL: {node.metadata['url']}")
    print(f"Score: {node.score}")
    print("---")

Chat Engine

Building a conversational agent

from llama_index.core.chat_engine import ContextChatEngine
from llama_index.llms.openai import OpenAI

# Initialize components
retriever = WikiRestRetriever(top_k=5)
llm = OpenAI(model="gpt-4")

# Create chat engine with Wikipedia context
chat_engine = ContextChatEngine.from_defaults(
    retriever=retriever,
    llm=llm,
    system_prompt="""You are a helpful assistant that answers questions using Wikipedia content.
    Always cite your sources by including the Wikipedia article titles and URLs.
    If you don't find relevant information, acknowledge that and provide general knowledge."""
)

# Have a conversation
response = chat_engine.chat("Tell me about machine learning")
print(response)

response = chat_engine.chat("How does it relate to artificial intelligence?")
print(response)

# Get chat history
for message in chat_engine.chat_history:
    print(f"{message.role}: {message.content[:100]}...")

Sub-Question Query Engine

Breaking down complex questions

from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

# Create a query engine tool
wiki_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="wikipedia",
        description="Search Wikipedia for factual information on any topic"
    )
)

# Create sub-question query engine
sub_question_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[wiki_tool],
    verbose=True
)

# Complex question that requires multiple lookups
response = sub_question_engine.query(
    "Compare the economic policies of the United States and China"
)
print(response)

Semantic Search with Reranking

Adding reranking for better results

from llama_index.core.postprocessor import SentenceTransformerRerank

# Create reranker
reranker = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-6-v2",
    top_n=3
)

# Create query engine with reranking
query_engine = RetrieverQueryEngine(
    retriever=WikiRestRetriever(top_k=10),  # Get more initial results
    response_synthesizer=response_synthesizer,
    node_postprocessors=[reranker],  # Rerank to top 3
)

response = query_engine.query("What are the symptoms of diabetes?")
print(response)

Streaming Responses

Stream response tokens

from llama_index.llms.openai import OpenAI

# Configure streaming
llm = OpenAI(model="gpt-4")

# Create streaming query engine
query_engine = RetrieverQueryEngine.from_args(
    retriever=WikiRestRetriever(top_k=5),
    llm=llm,
    streaming=True,
)

# Stream the response
streaming_response = query_engine.query("Explain the theory of relativity")

# Print tokens as they arrive
for text in streaming_response.response_gen:
    print(text, end="", flush=True)
print()  # Newline at the end

Async Retrieval

Async retriever for better performance

import asyncio
import aiohttp
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle

class AsyncWikiRestRetriever(BaseRetriever):
    """Async retriever for WikiRest API."""

    def __init__(self, api_key: str, top_k: int = 5):
        super().__init__()
        self.api_key = api_key
        self.top_k = top_k
        self.base_url = "https://api.wikirest.com/v1"

    async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Async retrieve Wikipedia passages."""
        async with aiohttp.ClientSession() as session:
            async with session.get(
                f"{self.base_url}/search",
                headers={"X-API-Key": self.api_key},
                params={"q": query_bundle.query_str, "limit": self.top_k}
            ) as response:
                data = await response.json()

        nodes = []
        for i, hit in enumerate(data.get("hits", [])):
            node = TextNode(
                text=hit["text"],
                metadata={
                    "title": hit["title"],
                    "url": hit["url"],
                },
            )
            nodes.append(NodeWithScore(node=node, score=1.0 - (i * 0.1)))

        return nodes

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Sync wrapper for async retrieve."""
        return asyncio.run(self._aretrieve(query_bundle))

# Usage
async def main():
    retriever = AsyncWikiRestRetriever(api_key="your_api_key")
    query = QueryBundle(query_str="artificial intelligence")
    nodes = await retriever._aretrieve(query)
    for node in nodes:
        print(f"- {node.metadata['title']}: {node.text[:100]}...")

asyncio.run(main())

Document Summary Index

Building a summary index from Wikipedia pages

import requests
from llama_index.core import Document, SummaryIndex

def fetch_wikipedia_page(page_id: int, api_key: str) -> Document:
    """Fetch a full Wikipedia page and convert to LlamaIndex Document."""
    response = requests.get(
        f"https://api.wikirest.com/v1/page/{page_id}",
        headers={"X-API-Key": api_key},
        params={"format": "concat"}
    )
    response.raise_for_status()
    data = response.json()

    return Document(
        text=data.get("text", ""),
        metadata={
            "title": data["page"]["title"],
            "url": data["page"]["url"],
            "page_id": page_id,
        }
    )

# Fetch multiple pages
page_ids = [12345, 67890, 11111]  # Example Wikipedia page IDs
documents = [
    fetch_wikipedia_page(pid, "your_api_key")
    for pid in page_ids
]

# Create summary index
index = SummaryIndex.from_documents(documents)

# Query
query_engine = index.as_query_engine()
response = query_engine.query("What are the main topics covered?")
print(response)

LlamaIndex Integration

Installation

Custom WikiRest Retriever

Basic retriever implementation

Query Engine

Using with a query engine

Chat Engine

Building a conversational agent

Sub-Question Query Engine

Breaking down complex questions

Semantic Search with Reranking

Adding reranking for better results

Streaming Responses

Stream response tokens

Async Retrieval

Async retriever for better performance

Document Summary Index

Building a summary index from Wikipedia pages

Next steps

Was this page helpful?