Riksarkivet MCP Server

Apache 2.0

Overview InspectNew Endpoints Schema Related Servers Reviews Score

ra-mcp
src
ra_mcp

mcp_tools.py•12 kB

from typing import Optional from fastmcp import FastMCP from pydantic import Field from services import SearchOperations, SearchResultsAnalyzer, DisplayService from formatters import MCPFormatter, format_error_message ra_mcp = FastMCP( name="ra-mcp", instructions=""" 🏛️ Riksarkivet (RA) Search and Browse MCP Server This server provides access to transcribed historical documents from the Swedish National Archives. AVAILABLE TOOLS: 1. 🔍 search_transcribed - Search for keywords in transcribed materials - Returns documents and pages containing the keyword - Offset parameter required to encourage comprehensive discovery - Context disabled by default for maximum hit coverage - Provides direct links to images and ALTO XML 2. 📖 browse_document - Browse specific pages by reference code - View full transcriptions of specific pages - Supports page ranges and multiple pages - Optional keyword highlighting 3. 📚 get_document_structure - Get document structure without content - Quick overview of available manifests - Document metadata and hierarchy - Useful for understanding what's available SEARCH STRATEGY FOR MAXIMUM DISCOVERY: 1. Start with search_transcribed(keyword, offset=0) for initial hits 2. Continue pagination with increasing offsets (50, 100, 150...) to find all matches 3. Use show_context=False (default) to see more results per query 4. Only enable show_context=True when you want full page text for specific hits 5. EXPLORE RELATED TERMS: Search for similar/related words to gather comprehensive context - Historical variants and spellings (e.g., "trolldom" + "häxa" + "trollkona") - Synonyms and related concepts (e.g., "satan" + "djävul" for devil-related terms) - Different word forms (e.g., "trolleri" + "trollkonst" for witchcraft variants) - Period-appropriate terminology and archaic spellings 6. Note reference codes and page numbers for detailed browsing 7. Use browse_document() to view full transcriptions of interesting pages TYPICAL WORKFLOW: 1. Comprehensive search: search_transcribed(term, 0), then search_transcribed(term, 50), etc. 2. Search related terms in parallel to build complete context 3. Review hit summaries to identify most relevant documents across all searches 4. Use browse_document() for detailed examination of specific pages 5. Use get_document_structure() to understand document organization All tools return rich, formatted text optimized for LLM understanding. """, ) @ra_mcp.tool( name="search_transcribed", description="Search for keywords in transcribed historical documents from Riksarkivet", ) async def search_transcribed( keyword: str, offset: int, show_context: bool = False, max_results: int = 10, max_hits_per_document: int = 3, max_pages_with_context: int = 0, context_padding: int = 0, max_response_tokens: int = 15000, truncate_page_text: int = 800, ) -> str: try: search_operations = SearchOperations() display_service = DisplayService(MCPFormatter()) results_analyzer = SearchResultsAnalyzer() search_result = search_operations.search_transcribed( keyword=keyword, offset=offset, max_results=max_results, max_hits_per_document=max_hits_per_document, show_context=show_context, max_pages_with_context=max_pages_with_context, context_padding=context_padding, ) if not search_result.hits: if offset > 0: return f"No more results found for '{keyword}' at offset {offset}. Total results: {search_result.total_hits}" return f"No results found for '{keyword}'. Try different search terms or variations." if show_context and search_result.enriched: for hit in search_result.hits: if hasattr(hit, "full_page_text") and hit.full_page_text: if len(hit.full_page_text) > truncate_page_text: hit.full_page_text = ( hit.full_page_text[:truncate_page_text] + "..." ) formatted_results = display_service.format_search_results( search_result, maximum_documents_to_display=max_results, show_full_context=show_context, ) estimated_tokens = len(formatted_results) // 4 if estimated_tokens > max_response_tokens: return ( formatted_results[: max_response_tokens * 4] + "\n\n[Response truncated due to size limits]" ) pagination_info = results_analyzer.get_pagination_info( search_result.hits, search_result.total_hits, offset, max_results ) if pagination_info["has_more"]: formatted_results += f"\n\n📊 **Pagination**: Showing documents {pagination_info['document_range_start']}-{pagination_info['document_range_end']}" formatted_results += f"\n💡 Use `offset={pagination_info['next_offset']}` to see the next {max_results} documents" return formatted_results except Exception as e: return format_error_message( f"Search failed: {str(e)}", suggestions=[ "Try a simpler search term", "Check if the service is available", "Reduce max_results or max_pages_with_context", ], ) @ra_mcp.tool( name="browse_document", description="Browse specific pages of a document by reference code", ) async def browse_document( reference_code: str, pages: str, highlight_term: Optional[str] = None, max_pages: int = 20, ) -> str: """ Browse specific pages of a document by reference code. Returns: - Full transcribed text for each requested page - Optional keyword highlighting - Direct links to images and ALTO XML Examples: - browse_document("SE/RA/420422/01", "5") - View page 5 - browse_document("SE/RA/420422/01", "1-10") - View pages 1 through 10 - browse_document("SE/RA/420422/01", "5,7,9", highlight_term="Stockholm") - View specific pages with highlighting """ try: search_operations = SearchOperations() display_service = DisplayService(MCPFormatter()) browse_result = search_operations.browse_document( reference_code=reference_code, pages=pages, highlight_term=highlight_term, max_pages=max_pages, ) if not browse_result.contexts: return format_error_message( f"Could not load pages for {reference_code}", suggestions=[ "The pages might not have transcriptions", "Try different page numbers", "Check if the document is fully digitized", ], ) return display_service.format_browse_results(browse_result, highlight_term) except Exception as e: return format_error_message( f"Browse failed: {str(e)}", suggestions=[ "Check the reference code format", "Verify page numbers are valid", "Try with fewer pages", ], ) @ra_mcp.tool( name="get_document_structure", description="Get document structure and metadata without fetching content", ) async def get_document_structure( reference_code: Optional[str] = None, pid: Optional[str] = None, include_manifest_info: bool = True, ) -> str: """ Get the structure and metadata of a document without fetching page content. Useful for: - Understanding what's available in a document - Getting the total number of pages - Finding available manifests - Viewing document hierarchy Provide either reference_code or pid. """ try: if not reference_code and not pid: return format_error_message( "Either reference_code or pid must be provided", suggestions=[ "Provide a reference code like 'SE/RA/420422/01'", "Or provide a PID from search results", ], ) search_operations = SearchOperations() display_service = DisplayService(MCPFormatter()) document_structure = search_operations.get_document_structure( reference_code=reference_code, pid=pid ) if not document_structure: return format_error_message( "Could not get structure for the document", suggestions=[ "The document might not have IIIF manifests", "Try browsing specific pages instead", ], ) return display_service.format_document_structure(document_structure) except Exception as e: return format_error_message( f"Failed to get document structure: {str(e)}", suggestions=[ "Check the reference code or PID", "Try searching for the document first", ], ) @ra_mcp.resource("riksarkivet://contents/table_of_contents") def get_table_of_contents() -> str: """ Get the table of contents (Innehållsförteckning) for the Riksarkivet historical guide. """ try: import os current_dir = os.path.dirname(__file__) markdown_path = os.path.join( current_dir, "..", "..", "markdown", "00_Innehallsforteckning.md" ) with open(markdown_path, "r", encoding="utf-8") as f: content = f.read() return content except FileNotFoundError: return format_error_message( "Table of contents file not found", suggestions=[ "Check if the markdown/00_Innehallsforteckning.md file exists", "Verify the file path is correct", ], ) except Exception as e: return format_error_message( f"Failed to load table of contents: {str(e)}", suggestions=["Check file permissions", "Verify file encoding is UTF-8"], ) @ra_mcp.tool( name="get_guide_content", description="Load specific sections from the Riksarkivet historical guide", ) async def get_guide_content( filename: str = Field( description="Markdown filename to load (e.g., '01_Domstolar.md', '02_Fangelse.md')" ), ) -> str: """ Load content from specific sections of the Riksarkivet historical guide. """ try: import os if not filename.endswith(".md"): return format_error_message( "Invalid filename format", suggestions=["Filename must end with .md extension"], ) filename = os.path.basename(filename) current_dir = os.path.dirname(__file__) markdown_path = os.path.join(current_dir, "..", "..", "markdown", filename) if not os.path.exists(markdown_path): return format_error_message( f"Guide section '{filename}' not found", suggestions=[ "Check the filename spelling", "Use get_table_of_contents resource to see available sections", "Ensure the filename includes .md extension", ], ) with open(markdown_path, "r", encoding="utf-8") as f: content = f.read() return content except Exception as e: return format_error_message( f"Failed to load guide content '{filename}': {str(e)}", suggestions=[ "Check file permissions", "Verify file encoding is UTF-8", "Ensure the filename is valid", ], )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-Riksarkivet/ra-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server