Back to use cases
builder2-4 hours

Build a Legal Research Assistant (RAG)

Create a retrieval-augmented generation pipeline that answers legal questions grounded in real European case law and legislation.

RAGAPILLMPython

What you'll build

Retrieval-Augmented Generation (RAG) is the most effective pattern for building AI tools that give accurate, source-grounded answers to legal questions. Instead of relying on an LLM's training data alone, you retrieve relevant documents from the Moonlit API and feed them into the prompt as context. This tutorial walks through building a complete RAG pipeline: the user asks a legal question, the system searches Moonlit for relevant case law and legislation, retrieves full document texts, and passes them to an LLM with instructions to answer based only on the provided sources. The result is a research assistant that cites specific ECLI numbers and statutory provisions, dramatically reducing hallucination risk. This is the foundational architecture used by legal AI products, in-house research tools, and compliance platforms across Europe.

Architecture


  ┌──────────┐     ┌─────────────────┐     ┌──────────────────┐
  │  User     │     │  Moonlit API      │     │  Document         │
  │  Question  │───▶│  hybrid_search    │───▶│  retrieve_document│
  └──────────┘     │  _reranked       │     │  (full text)      │
                    └─────────────────┘     └────────┬─────────┘
                                                   │
                                                   ▼
  ┌──────────┐     ┌─────────────────┐     ┌──────────────────┐
  │  Cited    │     │  LLM             │     │  Prompt +         │
  │  Answer   │◀───│  (Claude/GPT)    │◀───│  Retrieved Docs   │
  └──────────┘     └─────────────────┘     └──────────────────┘

Prerequisites

  • A Moonlit API key
  • Python 3.10+ installed
  • An Anthropic API key (for Claude) or OpenAI API key
  • pip install requests anthropic

Step-by-step

1

Search for relevant documents

Use hybrid search with reranking for maximum precision. The reranked endpoint ensures the top results are truly the most relevant to the legal question, which is critical for RAG quality.

import requests

MOONLIT_KEY = "your-moonlit-api-key"
BASE_URL = "https://api.moonlit.ai/v1.1"

def search_legal_sources(question: str, jurisdictions: list[str] = None) -> list[dict]:
    """Search for relevant legal documents using hybrid reranked search."""
    payload = {
        "query": question,
        "jurisdictions": jurisdictions or ["European Union", "Netherlands"],
        "documentTypes": ["case_law", "legislation"],
        "semantic_weight": 0.6,
        "num_results": 5,
    }

    response = requests.post(
        f"{BASE_URL}/search/hybrid_search_reranked",
        headers={
            "Ocp-Apim-Subscription-Key": MOONLIT_KEY,
            "Content-Type": "application/json",
        },
        json=payload,
    )
    response.raise_for_status()
    return response.json()["result"]["results"]
2

Retrieve full document texts

For each search result, fetch the full document including its text and AI summary. This gives the LLM the complete legal reasoning, not just a snippet.

def retrieve_document(doc_id: str) -> dict:
    """Retrieve full document text and metadata."""
    response = requests.get(
        f"{BASE_URL}/document/retrieve_document",
        headers={"Ocp-Apim-Subscription-Key": MOONLIT_KEY},
        params={"DocumentIdentifier": doc_id},
    )
    response.raise_for_status()
    return response.json()

def gather_context(results: list[dict]) -> str:
    """Build context string from search results with full document text."""
    context_parts = []
    for i, result in enumerate(results, 1):
        doc = retrieve_document(result["identifier"])
        text = doc.get("text", doc.get("summary", "No text available."))
        # Truncate very long documents to ~3000 chars for the LLM context
        if len(text) > 3000:
            text = text[:3000] + "\n[... truncated]"

        context_parts.append(
            f"--- Source {i} ---\n"
            f"ID: {result['identifier']}\n"
            f"Title: {result['title']}\n"
            f"Court: {result.get('source', 'N/A')}\n"
            f"Date: {result.get('date', 'N/A')}\n"
            f"Text:\n{text}\n"
        )
    return "\n".join(context_parts)
3

Build the RAG prompt

Construct a prompt that instructs the LLM to answer based only on the provided legal sources. The system prompt enforces citation requirements and discourages hallucination.

SYSTEM_PROMPT = """You are a legal research assistant. Answer the user's question \
based ONLY on the provided legal sources. Follow these rules strictly:

1. Cite specific sources by their ID (e.g., ECLI:NL:HR:2024:1234) when making claims.
2. If the sources do not contain enough information to answer, say so explicitly.
3. Distinguish between binding precedent and persuasive authority.
4. Note the jurisdiction and court level of each source you cite.
5. Do not invent or assume legal rules not present in the sources."""

def build_messages(question: str, context: str) -> list[dict]:
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {
            "role": "user",
            "content": f"Legal sources:\n\n{context}\n\n---\n\nQuestion: {question}",
        },
    ]
4

Generate the answer with Claude

Pass the context-enriched prompt to Claude (or any LLM). The model produces a cited answer grounded in real case law.

import anthropic

client = anthropic.Anthropic()

def ask_legal_question(question: str, jurisdictions: list[str] = None) -> str:
    # Step 1: Search
    results = search_legal_sources(question, jurisdictions)
    print(f"Found {len(results)} relevant sources")

    # Step 2: Retrieve full texts
    context = gather_context(results)

    # Step 3: Build prompt
    messages = build_messages(question, context)

    # Step 4: Generate answer
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=2048,
        system=messages[0]["content"],
        messages=[messages[1]],
    )
    return response.content[0].text
5

Test the pipeline

Run a real legal question through the pipeline and verify that the answer cites specific ECLI numbers from the retrieved sources.

answer = ask_legal_question(
    "Under what conditions can a data controller rely on legitimate interest "
    "as a legal basis under GDPR Article 6(1)(f), and what balancing test applies?",
    jurisdictions=["European Union"],
)
print(answer)

Complete Code

#!/usr/bin/env python3
"""Legal research RAG assistant using Moonlit API + Claude."""

import os

import anthropic
import requests

MOONLIT_KEY = os.environ["MOONLIT_API_KEY"]
BASE_URL = "https://api.moonlit.ai/v1.1"

SYSTEM_PROMPT = """You are a legal research assistant. Answer the user's question \
based ONLY on the provided legal sources. Follow these rules strictly:

1. Cite specific sources by their ID (e.g., ECLI:NL:HR:2024:1234) when making claims.
2. If the sources do not contain enough information to answer, say so explicitly.
3. Distinguish between binding precedent and persuasive authority.
4. Note the jurisdiction and court level of each source you cite.
5. Do not invent or assume legal rules not present in the sources."""


def search_legal_sources(question: str, jurisdictions: list[str] = None) -> list[dict]:
    payload = {
        "query": question,
        "jurisdictions": jurisdictions or ["European Union", "Netherlands"],
        "documentTypes": ["case_law", "legislation"],
        "semantic_weight": 0.6,
        "num_results": 5,
    }
    response = requests.post(
        f"{BASE_URL}/search/hybrid_search_reranked",
        headers={
            "Ocp-Apim-Subscription-Key": MOONLIT_KEY,
            "Content-Type": "application/json",
        },
        json=payload,
    )
    response.raise_for_status()
    return response.json()["result"]["results"]


def retrieve_document(doc_id: str) -> dict:
    response = requests.get(
        f"{BASE_URL}/document/retrieve_document",
        headers={"Ocp-Apim-Subscription-Key": MOONLIT_KEY},
        params={"DocumentIdentifier": doc_id},
    )
    response.raise_for_status()
    return response.json()


def gather_context(results: list[dict]) -> str:
    parts = []
    for i, result in enumerate(results, 1):
        doc = retrieve_document(result["identifier"])
        text = doc.get("text", doc.get("summary", "No text available."))
        if len(text) > 3000:
            text = text[:3000] + "\n[... truncated]"
        parts.append(
            f"--- Source {i} ---\n"
            f"ID: {result['identifier']}\n"
            f"Title: {result['title']}\n"
            f"Court: {result.get('source', 'N/A')}\n"
            f"Date: {result.get('date', 'N/A')}\n"
            f"Text:\n{text}\n"
        )
    return "\n".join(parts)


def ask_legal_question(question: str, jurisdictions: list[str] = None) -> str:
    results = search_legal_sources(question, jurisdictions)
    print(f"Found {len(results)} relevant sources")

    context = gather_context(results)

    client = anthropic.Anthropic()
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=2048,
        system=SYSTEM_PROMPT,
        messages=[
            {
                "role": "user",
                "content": f"Legal sources:\n\n{context}\n\n---\n\nQuestion: {question}",
            }
        ],
    )
    return response.content[0].text


if __name__ == "__main__":
    answer = ask_legal_question(
        "Under what conditions can a data controller rely on legitimate interest "
        "as a legal basis under GDPR Article 6(1)(f), and what balancing test applies?",
        jurisdictions=["European Union"],
    )
    print("\n" + "=" * 60)
    print(answer)

What's next