Smithsonian Open Access MCP Server

MIT License

smithsonian-mcp
smithsonian_mcp

utils.py•10.1 kB

""" Utility functions for the Smithsonian MCP server. """ from typing import Dict, Any, Optional, List def mask_api_key(params: Dict[str, Any]) -> Dict[str, Any]: """ Masks the API key in a dictionary of parameters. Args: params: A dictionary of parameters. Returns: A new dictionary with the API key masked. """ if "api_key" in params: masked_params = params.copy() masked_params["api_key"] = "****" return masked_params return params def resolve_museum_code(museum_name: str) -> Optional[str]: """ Resolve a museum name or code to the correct Smithsonian unit code. This function provides flexible matching for museum names, handling common variations and partial matches. It supports: - Exact matches: "asian art" -> "FSG" - Partial matches: "Smithsonian Asian Art Museum" -> "FSG" - Direct codes: "SAAM" -> "SAAM" - Case-insensitive matching Args: museum_name: Museum name or code to resolve Returns: The corresponding museum code (e.g., "FSG", "SAAM"), or None if not found Examples: resolve_museum_code("Smithsonian Asian Art Museum") # -> "FSG" resolve_museum_code("Asian Art") # -> "FSG" resolve_museum_code("SAAM") # -> "SAAM" resolve_museum_code("Natural History Museum") # -> "NMNH" """ if not museum_name or not museum_name.strip(): return None # Import here to avoid circular imports from .constants import MUSEUM_MAP, VALID_MUSEUM_CODES # Normalize input normalized = museum_name.lower().strip() # Try exact match on original first if normalized in MUSEUM_MAP: return MUSEUM_MAP[normalized] # Remove common prefixes that don't help with matching cleaned = normalized prefixes_to_remove = ["smithsonian", "national museum of", "museum of"] for prefix in prefixes_to_remove: if cleaned.startswith(prefix + " "): cleaned = cleaned[len(prefix) + 1:].strip() # Try exact match on cleaned version if cleaned in MUSEUM_MAP: return MUSEUM_MAP[cleaned] # Try direct code match if normalized.upper() in VALID_MUSEUM_CODES: return normalized.upper() # Try partial matches - check if normalized contains any map key for map_key in MUSEUM_MAP: if map_key in normalized or normalized in map_key: return MUSEUM_MAP[map_key] # Try word-based matching for multi-word museum names normalized_words = set(normalized.split()) for map_key, code in MUSEUM_MAP.items(): map_words = set(map_key.split()) # If there's significant overlap (more than 50% of words match) if len(normalized_words & map_words) / len(map_words) > 0.5: return code # No match found return None def validate_url(url_str: Optional[str]) -> Optional[str]: """ Validate and normalize a URL string. This function checks if a URL string is a valid HTTP or HTTPS URL. It handles edge cases like malformed URLs and non-HTTP protocols. Args: url_str: The URL string to validate Returns: The validated URL string if valid, None otherwise Examples: validate_url("https://example.com") # -> "https://example.com" validate_url("http://example.com") # -> "http://example.com" validate_url("ftp://example.com") # -> None validate_url("not-a-url") # -> None validate_url(None) # -> None """ if not url_str: return None try: from pydantic import HttpUrl parsed = HttpUrl(url_str) if parsed.scheme in ('http', 'https'): return str(parsed) except (ValueError, TypeError): pass return None def prioritize_objects_by_unit_code(objects: List, unit_code: Optional[str]) -> List: """ Reorder search results to prioritize objects whose IDs start with the unit code. This ensures that when searching with a specific unit_code (e.g., "NMAH"), objects from that museum appear first, even if the API ordered them differently. Args: objects: List of SmithsonianObject instances from search results unit_code: The unit code used in the search (e.g., "NMAH", "FSG") Returns: Reordered list with museum-specific objects first """ if not unit_code or not objects: return objects # Convert unit_code to lowercase for case-insensitive matching unit_prefix = f"{unit_code.lower()}_" # Separate objects into prioritized and others prioritized = [] others = [] for obj in objects: if obj.id and obj.id.lower().startswith(unit_prefix): prioritized.append(obj) else: others.append(obj) # Return prioritized objects first, then others (maintaining their relative order) return prioritized + others def _normalize_museum_code(record_id_prefix: str) -> str: """Normalize record_id prefix to museum code key used in MUSEUM_URL_PATTERNS.""" prefix = record_id_prefix.lower() # Handle NMNH sub-museums with long prefixes if prefix.startswith("nmnh"): if "invertebratezoology" in prefix: return "NMNHINV" elif "anthropology" in prefix: return "NMNHANTHRO" elif "education" in prefix: return "NMNHEDUCATION" elif "mineralsciences" in prefix: return "NMNHMINSCI" elif "paleobiology" in prefix: return "NMNHPALEO" # Add more as needed else: return prefix.upper() # fallback return prefix.upper() async def construct_url_from_record_id(record_id: Optional[str]) -> Optional[str]: """ Construct a URL from a record_id using museum-specific URL patterns. This function uses predefined URL construction patterns for each Smithsonian museum to generate accurate object URLs. Different museums have different URL formats and identifier requirements. Args: record_id: The record identifier (e.g., "nmah_1448973", "fsg_F1900.47") Returns: Constructed URL string, or None if museum not found or record_id malformed Examples: construct_url_from_record_id("nmah_1448973") # Returns: "https://americanhistory.si.edu/collections/object/nmah_1448973" construct_url_from_record_id("fsg_F1900.47") # Returns: "https://asia.si.edu/object/F1900.47" construct_url_from_record_id("nmnhinvertebratezoology_14688577") # Returns: "https://naturalhistory.si.edu/object/nmnhinvertebratezoology_14688577" """ if not record_id or "_" not in record_id: return None # Extract components from record_id parts = record_id.split("_", 1) if len(parts) != 2: return None record_id_prefix = parts[0] accession = parts[1] # Normalize to museum code museum_code = _normalize_museum_code(record_id_prefix) # Import patterns from .constants import MUSEUM_URL_PATTERNS pattern = MUSEUM_URL_PATTERNS.get(museum_code) if not pattern: # Unknown museum, fall back to API lookup return await _get_url_from_api_record_id(record_id) # Handle different identifier types identifier_type = pattern["identifier"] base_url = pattern["base_url"] path_template = pattern["path_template"] if identifier_type == "record_ID": # Use the full record_id identifier = record_id elif identifier_type == "accession": # Use just the accession part identifier = accession elif identifier_type in ["record_link", "guid", "url", "idsId"]: # Need to get this from API return await _get_url_from_api_record_id(record_id) else: # Unknown identifier type, fall back to API return await _get_url_from_api_record_id(record_id) # Handle template variables in base_url if "{record_link}" in base_url or "{guid}" in base_url: # Need API data for these return await _get_url_from_api_record_id(record_id) # Construct the URL try: url = base_url.rstrip("/") if path_template: # Format the path template with available variables formatted_path = path_template.format( record_ID=record_id, accession=accession, url=record_id, # fallback idsId=record_id, # fallback guid=record_id, # fallback ) url += formatted_path return url except (KeyError, ValueError): # Template formatting failed, fall back to API return await _get_url_from_api_record_id(record_id) async def _get_url_from_api_record_id(record_id: str) -> Optional[str]: """ Fallback function to get URL from API when pattern-based construction fails or when API data is required (record_link, guid, etc.). """ from .api_client import SmithsonianAPIClient from .models import CollectionSearchFilter client = SmithsonianAPIClient() try: await client.connect() # Try to find the object by record_id # First, search for it filters = CollectionSearchFilter( query=record_id, unit_code=None, object_type=None, date_start=None, date_end=None, maker=None, material=None, topic=None, has_images=None, is_cc0=None, on_view=None, limit=1, offset=0, ) results = await client.search_collections(filters=filters) if results.objects and results.objects[0].record_link: return str(results.objects[0].record_link) # If that doesn't work, try direct lookup if the API supports it # For now, return None if we can't construct it return None except Exception: return None finally: await client.disconnect()

Latest Blog Posts

The 50MB Markdown Files That Broke Our Server
By punkpeye on December 3, 2025.
react
react-router
node-js
OpenTelemetry for Model Context Protocol (MCP) Analytics and Agent Observability
By Om-Shree-0709 on November 29, 2025.
observability
mcp
opentelemetry
Securing Enterprise AI Agents with Unique Identities in the Model Context Protocol (MCP)
By Om-Shree-0709 on November 27, 2025.

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/molanojustin/smithsonian-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server