Skip to main content
Glama

Riksarkivet MCP Server

search_operations.py9.42 kB
""" Unified search operations that can be used by both CLI and MCP interfaces. This eliminates code duplication between CLI commands and MCP tools. """ from typing import List, Optional, Tuple, Dict, Union from ..clients import SearchAPI, IIIFClient, OAIPMHClient from ..models import SearchHit, PageContext, SearchOperation, BrowseOperation from ..utils import HTTPClient, parse_page_range from ..config import SEARCH_API_BASE_URL, REQUEST_TIMEOUT from .search_enrichment_service import SearchEnrichmentService from .page_context_service import PageContextService class SearchOperations: """ Unified search operations that can be used by both CLI and MCP interfaces. Contains all the business logic for search, browse, and context operations. """ def __init__(self): self.search_api = SearchAPI() self.enrichment_service = SearchEnrichmentService() self.page_service = PageContextService() self.iiif_client = IIIFClient() self.oai_client = OAIPMHClient() def search_transcribed( self, keyword: str, offset: int = 0, max_results: int = 10, max_hits_per_document: Optional[int] = None, show_context: bool = False, max_pages_with_context: int = 0, context_padding: int = 0 ) -> SearchOperation: """ Unified search operation that can be used by both CLI and MCP. Returns SearchOperation with results and metadata. """ # Perform the search hits, total_hits = self.search_api.search_transcribed_text( keyword, max_results, offset, max_hits_per_document ) operation = SearchOperation( hits=hits, total_hits=total_hits, keyword=keyword, offset=offset, enriched=False ) # Enrich with context if requested if show_context and hits and max_pages_with_context > 0: hits_to_enrich = hits[:max_pages_with_context] # Expand with context padding if requested if context_padding > 0: hits_to_enrich = self.enrichment_service.expand_hits_with_context_padding( hits_to_enrich, context_padding ) # Enrich with full page text enriched_hits = self.enrichment_service.enrich_hits_with_context( hits_to_enrich, len(hits_to_enrich), keyword ) operation.hits = enriched_hits operation.enriched = True return operation def browse_document( self, reference_code: str, pages: str, highlight_term: Optional[str] = None, max_pages: int = 20 ) -> BrowseOperation: """ Unified browse operation that can be used by both CLI and MCP. Returns BrowseOperation with page contexts and metadata. """ # Find PID for the reference code pid = self._find_pid_for_reference(reference_code) if not pid: return BrowseOperation( contexts=[], reference_code=reference_code, pages_requested=pages, pid=None ) # Get manifest information collection_info = self.iiif_client.explore_collection(pid) manifest_id = pid if collection_info and collection_info.get('manifests'): manifest_id = collection_info['manifests'][0]['id'] # Parse page range selected_pages = parse_page_range(pages)[:max_pages] # Load page contexts contexts = [] for page_num in selected_pages: context = self.page_service.get_page_context( manifest_id, str(page_num), reference_code, highlight_term ) if context: contexts.append(context) return BrowseOperation( contexts=contexts, reference_code=reference_code, pages_requested=pages, pid=pid, manifest_id=manifest_id ) def show_pages_with_context( self, keyword: str, max_pages: int = 10, context_padding: int = 1, search_limit: int = 50 ) -> Tuple[SearchOperation, List[SearchHit]]: """ Unified show-pages operation that combines search and context display. Returns: - SearchOperation with the initial search results - List of enriched hits with context padding and full text """ # Step 1: Search for the keyword search_op = self.search_transcribed( keyword=keyword, max_results=search_limit, show_context=False ) if not search_op.hits: return search_op, [] # Step 2: Take the top hits and expand with context display_hits = search_op.hits[:max_pages] # Expand with context padding expanded_hits = self.enrichment_service.expand_hits_with_context_padding( display_hits, context_padding ) # Enrich with full page text enriched_hits = self.enrichment_service.enrich_hits_with_context( expanded_hits, len(expanded_hits), keyword ) return search_op, enriched_hits def get_document_structure( self, reference_code: Optional[str] = None, pid: Optional[str] = None ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: """ Get document structure information. Returns IIIF collection info or None if not found. """ if not reference_code and not pid: return None # Get PID if only reference_code provided if reference_code and not pid: pid = self._find_pid_for_reference(reference_code) if not pid: return None # Clean PID if needed clean_pid = pid[6:] if pid.startswith('arkis!') else pid # Get IIIF collection info collection_info = self.iiif_client.explore_collection(clean_pid) return collection_info def _find_pid_for_reference(self, reference_code: str) -> Optional[str]: """ Find PID for a reference code using multiple strategies. Returns PID or None if not found. """ session = HTTPClient.create_session() pid = None # Try search API first try: params = { 'reference_code': reference_code, 'only_digitised_materials': 'true', 'max': 1 } response = session.get(SEARCH_API_BASE_URL, params=params, timeout=REQUEST_TIMEOUT) response.raise_for_status() data = response.json() if data.get('items'): pid = data['items'][0].get('id') except Exception: pass # Fall back to OAI-PMH if search failed if not pid: try: pid = self.oai_client.extract_pid(reference_code) except Exception: pass return pid class SearchResultsAnalyzer: """ Utility class for analyzing search results and generating insights. Used by both CLI and MCP for result analysis. """ @staticmethod def group_hits_by_document(hits: List[SearchHit]) -> Dict[str, List[SearchHit]]: """Group search hits by document (reference code or PID).""" grouped = {} for hit in hits: key = hit.reference_code or hit.pid if key not in grouped: grouped[key] = [] grouped[key].append(hit) return grouped @staticmethod def get_pagination_info( hits: List[SearchHit], total_hits: int, offset: int, max_results: int ) -> Dict[str, Union[int, bool, Optional[int]]]: """Calculate pagination information for search results.""" # Count unique documents unique_docs = set() for hit in hits: unique_docs.add(hit.reference_code or hit.pid) has_more = len(unique_docs) == max_results and total_hits > len(hits) document_start = offset // max_results * max_results + 1 document_end = document_start + len(unique_docs) - 1 return { 'total_hits': total_hits, 'total_documents_shown': len(unique_docs), 'total_page_hits': len(hits), 'document_range_start': document_start, 'document_range_end': document_end, 'has_more': has_more, 'next_offset': offset + max_results if has_more else None } @staticmethod def extract_search_summary(operation: SearchOperation) -> Dict[str, Union[str, int, bool, Dict[str, List[SearchHit]]]]: """Extract summary information from a search operation.""" grouped = SearchResultsAnalyzer.group_hits_by_document(operation.hits) return { 'keyword': operation.keyword, 'total_hits': operation.total_hits, 'page_hits_returned': len(operation.hits), 'documents_returned': len(grouped), 'enriched': operation.enriched, 'offset': operation.offset, 'grouped_hits': grouped }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-Riksarkivet/ra-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server