LangChain Integration
Build RAG applications with WikiRest and LangChain.
Installation
pip install langchain langchain-openai requests Custom Retriever
Create a custom retriever that fetches documents from WikiRest:
from typing import List
import requests
from langchain.schema import Document
from langchain.retrievers import BaseRetriever
from pydantic import Field
class WikiRestRetriever(BaseRetriever):
"""Retriever that uses WikiRest API to fetch Wikipedia content."""
api_key: str = Field(description="WikiRest API key")
base_url: str = Field(default="https://api.wikirest.com/v1")
k: int = Field(default=5, description="Number of documents to retrieve")
def _get_relevant_documents(self, query: str) -> List[Document]:
"""Fetch relevant documents from WikiRest."""
response = requests.get(
f"{self.base_url}/search",
headers={"X-API-Key": self.api_key},
params={"q": query, "limit": self.k}
)
response.raise_for_status()
data = response.json()
documents = []
for hit in data["hits"]:
doc = Document(
page_content=hit["text"],
metadata={
"title": hit["title"],
"url": hit["url"],
"page_id": hit["page_id"],
"chunk_id": hit["id"],
"section": hit.get("section", "")
}
)
documents.append(doc)
return documents
async def _aget_relevant_documents(self, query: str) -> List[Document]:
"""Async version - falls back to sync for simplicity."""
return self._get_relevant_documents(query)
# Usage
retriever = WikiRestRetriever(
api_key="YOUR_WIKIREST_API_KEY",
k=5
)
docs = retriever.get_relevant_documents("quantum computing")
for doc in docs:
print(f"Title: {doc.metadata['title']}")
print(f"Content: {doc.page_content[:200]}...")
print() Basic RAG Chain
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
import os
# Initialize components
retriever = WikiRestRetriever(
api_key=os.environ["WIKIREST_API_KEY"],
k=5
)
llm = ChatOpenAI(
model="gpt-4",
temperature=0.7,
api_key=os.environ["OPENAI_API_KEY"]
)
# Create prompt template
template = """Answer the question based on the following Wikipedia context.
Always cite your sources by mentioning the article title.
Context:
{context}
Question: {question}
Answer:"""
prompt = ChatPromptTemplate.from_template(template)
# Helper to format docs
def format_docs(docs):
return "\n\n---\n\n".join([
f"Source: {doc.metadata['title']}\n{doc.page_content}"
for doc in docs
])
# Build the chain
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Use the chain
question = "What are the main applications of machine learning?"
answer = rag_chain.invoke(question)
print(answer) Conversational RAG
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
# Create retriever
retriever = WikiRestRetriever(
api_key=os.environ["WIKIREST_API_KEY"],
k=5
)
# Create memory
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True,
output_key="answer"
)
# Create conversational chain
qa_chain = ConversationalRetrievalChain.from_llm(
llm=ChatOpenAI(model="gpt-4", temperature=0.7),
retriever=retriever,
memory=memory,
return_source_documents=True,
verbose=True
)
# Have a conversation
response1 = qa_chain.invoke({"question": "What is quantum computing?"})
print(f"AI: {response1['answer']}\n")
response2 = qa_chain.invoke({"question": "What are its main challenges?"})
print(f"AI: {response2['answer']}\n")
# The chain remembers context from previous questions
response3 = qa_chain.invoke({"question": "Who are the leading researchers in this field?"})
print(f"AI: {response3['answer']}") ReAct Agent with Wikipedia Tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain.tools import Tool
from langchain import hub
# Create a tool from our retriever
def search_wikipedia(query: str) -> str:
"""Search Wikipedia for information about a topic."""
docs = retriever.get_relevant_documents(query)
if not docs:
return "No Wikipedia articles found for this query."
results = []
for doc in docs[:3]:
results.append(f"**{doc.metadata['title']}**\n{doc.page_content}")
return "\n\n---\n\n".join(results)
wiki_tool = Tool(
name="Wikipedia",
func=search_wikipedia,
description="""Search Wikipedia for factual information. Use this when you need
accurate, encyclopedic information about people, places, concepts, history, science, etc.
Input should be a search query."""
)
# Create the agent
llm = ChatOpenAI(model="gpt-4", temperature=0)
prompt = hub.pull("hwchase17/react")
agent = create_react_agent(
llm=llm,
tools=[wiki_tool],
prompt=prompt
)
agent_executor = AgentExecutor(
agent=agent,
tools=[wiki_tool],
verbose=True,
handle_parsing_errors=True
)
# Run the agent
response = agent_executor.invoke({
"input": "Compare quantum computing with classical computing. What are the key differences?"
})
print(response["output"]) Structured Output with Sources
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List
class Source(BaseModel):
title: str = Field(description="Wikipedia article title")
url: str = Field(description="URL to the article")
relevant_quote: str = Field(description="Relevant quote from the article")
class AnswerWithSources(BaseModel):
answer: str = Field(description="The answer to the question")
sources: List[Source] = Field(description="Sources used to answer")
confidence: float = Field(description="Confidence score 0-1")
parser = PydanticOutputParser(pydantic_object=AnswerWithSources)
template = """Answer the question using the Wikipedia context below.
Include sources with relevant quotes.
Context:
{context}
Question: {question}
{format_instructions}"""
prompt = ChatPromptTemplate.from_template(
template,
partial_variables={"format_instructions": parser.get_format_instructions()}
)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| parser
)
result = chain.invoke("What is the theory of relativity?")
print(f"Answer: {result.answer}")
print(f"Confidence: {result.confidence}")
print("\nSources:")
for source in result.sources:
print(f" - {source.title}: {source.relevant_quote[:100]}...") Multi-Query Retrieval
from langchain.retrievers.multi_query import MultiQueryRetriever
# Create multi-query retriever for better recall
multi_retriever = MultiQueryRetriever.from_llm(
retriever=WikiRestRetriever(
api_key=os.environ["WIKIREST_API_KEY"],
k=3
),
llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0.5)
)
# This generates multiple queries and combines results
docs = multi_retriever.get_relevant_documents(
"How does machine learning work?"
)
# The retriever generates queries like:
# - "machine learning algorithms explanation"
# - "how artificial neural networks learn"
# - "ML training process"
print(f"Retrieved {len(docs)} unique documents") Batch Processing
import asyncio
from typing import List
async def answer_questions_batch(questions: List[str]) -> List[str]:
"""Answer multiple questions concurrently."""
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Process all questions concurrently
tasks = [chain.ainvoke(q) for q in questions]
answers = await asyncio.gather(*tasks)
return answers
# Example
questions = [
"What is quantum entanglement?",
"How does photosynthesis work?",
"What caused the French Revolution?"
]
answers = asyncio.run(answer_questions_batch(questions))
for q, a in zip(questions, answers):
print(f"Q: {q}")
print(f"A: {a[:200]}...")
print() Response Caching
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache
# Enable caching to reduce API calls
set_llm_cache(SQLiteCache(database_path=".langchain.db"))
# Now repeated queries will use cached responses
answer1 = rag_chain.invoke("What is quantum computing?") # API call
answer2 = rag_chain.invoke("What is quantum computing?") # Cached! Best Practices
- Use appropriate k values: Start with k=5 and adjust based on response quality. More docs = more context but higher cost.
- Cache aggressively: Wikipedia content doesn't change often. Cache retriever results to reduce API calls.
- Handle rate limits: Implement retry logic for the retriever to handle 429 responses gracefully.
- Cite sources: Always include article titles and URLs in responses for transparency.
- Use streaming: For long responses, use streaming to improve perceived latency.