Skip to main content
Glama
inhibitors.py6.43 kB
from __future__ import annotations from functools import lru_cache from typing import Dict, List, Optional import pandas as pd from .datasets import DatasetLoadError, load_dataset from .links import link_compound_to_nordic_plants _KNOWN_STATUS_VALUES = ("known", "new") _CONFIDENCE_VALUES = ("high", "medium", "low-medium", "low") _SCOPE_DATASETS = { "global": "coconut_csv-09-2025.csv", "nordic": "coconut_csv-09-2025_FI_NO_plants.csv", } @lru_cache(maxsize=1) def _inhibitors_frame() -> pd.DataFrame: dataset = load_dataset("all_mito_complex_I_inhibitors.txt") frame = dataset.frame.copy() for column in ("compound", "known_status", "confidence"): if column in frame.columns: frame[column] = frame[column].astype(str).str.strip() return frame def list_inhibitors( known_status: Optional[str] = None, confidence: Optional[str] = None, limit: Optional[int] = 20, ) -> List[Dict[str, object]]: frame = _inhibitors_frame() if known_status: norm_status = known_status.strip().lower() if norm_status not in _KNOWN_STATUS_VALUES: raise ValueError(f"known_status must be one of {_KNOWN_STATUS_VALUES}") frame = frame[frame["known_status"].str.lower() == norm_status] if confidence: norm_confidence = confidence.strip().lower() if norm_confidence not in _CONFIDENCE_VALUES: raise ValueError(f"confidence must be one of {_CONFIDENCE_VALUES}") frame = frame[frame["confidence"].str.lower() == norm_confidence] if limit is not None: try: limit_value = int(limit) except ValueError as exc: raise ValueError("limit must be an integer or null") from exc if limit_value >= 0: frame = frame.head(limit_value) columns = [col for col in ("compound", "known_status", "confidence", "pubmed_references", "pubmed_ids") if col in frame.columns] return frame.loc[:, columns].to_dict(orient="records") def summarize_inhibitors() -> Dict[str, Dict[str, int]]: frame = _inhibitors_frame() total = int(len(frame)) status_counts = {status: int(frame[frame["known_status"].str.lower() == status].shape[0]) for status in _KNOWN_STATUS_VALUES} new_frame = frame[frame["known_status"].str.lower() == "new"] known_frame = frame[frame["known_status"].str.lower() == "known"] def _confidence_counts(subset: pd.DataFrame) -> Dict[str, int]: return { confidence: int((subset["confidence"].str.lower() == confidence).sum()) for confidence in _CONFIDENCE_VALUES } return { "total": total, "by_known_status": status_counts, "new_by_confidence": _confidence_counts(new_frame), "known_by_confidence": _confidence_counts(known_frame), } def get_inhibitor_sources(compound: str) -> Dict[str, object]: """Return source details (PubMed IDs/URLs) for a specific inhibitor.""" if not compound or not compound.strip(): raise ValueError("compound must be a non-empty string") frame = _inhibitors_frame() lowered = compound.strip().lower() matching = frame[frame["compound"].str.lower() == lowered] if matching.empty: raise ValueError(f"Compound '{compound}' not found in inhibitors dataset.") record = matching.iloc[0].to_dict() raw_ids = str(record.get("pubmed_ids") or "") pubmed_ids = [pid.strip() for pid in raw_ids.split(";") if pid.strip()] pubmed_urls = [f"https://pubmed.ncbi.nlm.nih.gov/{pid}/" for pid in pubmed_ids] return { "compound": record.get("compound"), "known_status": record.get("known_status"), "confidence": record.get("confidence"), "pubmed_references": int(record.get("pubmed_references")) if record.get("pubmed_references") is not None else None, "pubmed_ids": pubmed_ids, "pubmed_urls": pubmed_urls, } def find_inhibitor_plants(compound: str, scope: str = "global") -> Dict[str, object]: """Map an inhibitor compound to organism occurrences in the selected scope.""" if not compound or not compound.strip(): raise ValueError("compound must be a non-empty string") scope_key = scope.strip().lower() if scope else "global" if scope_key not in _SCOPE_DATASETS: valid_scopes = ", ".join(sorted(_SCOPE_DATASETS)) raise ValueError(f"scope must be one of: {valid_scopes}") normalized = compound.strip().lower() inhibitors = _inhibitors_frame() inhibitor_match = inhibitors[inhibitors["compound"].str.lower() == normalized] if inhibitor_match.empty: raise ValueError(f"Compound '{compound}' not found in inhibitors dataset.") inhibitor_row = inhibitor_match.iloc[0].to_dict() dataset_name = _SCOPE_DATASETS[scope_key] dataset = load_dataset(dataset_name) frame = dataset.frame.copy() if "name" not in frame.columns or "organisms" not in frame.columns: raise DatasetLoadError(f"Dataset '{dataset_name}' is missing required columns for linking.") frame["name"] = frame["name"].astype(str).str.lower() matches = frame[frame["name"] == normalized] organisms: List[str] = [] records: List[Dict[str, object]] = [] if not matches.empty: organism_set = set() for _, row in matches.iterrows(): raw_org = str(row.get("organisms") or "") split_orgs = [org.strip() for org in raw_org.split("|") if org.strip()] organism_set.update(split_orgs) record_entry: Dict[str, object] = { "name": row.get("name"), "organisms": split_orgs, } identifier = row.get("identifier") if identifier is not None: record_entry["identifier"] = identifier records.append(record_entry) organisms = sorted(organism_set) result = { "compound": inhibitor_row.get("compound", compound), "known_status": inhibitor_row.get("known_status"), "confidence": inhibitor_row.get("confidence"), "scope": scope_key, "dataset": dataset_name, "match_count": int(matches.shape[0]), "organism_count": len(organisms), "organisms": organisms, "records": records, } if scope_key == "nordic": result["nordic_observations"] = link_compound_to_nordic_plants(compound) return result

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ndaniel/aurora-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server