config.rsโข7.71 kB
//! Configuration for indexed SPARQL endpoints
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
use crate::error::AppResult;
/// Configuration for a SPARQL endpoint with metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SparqlEndpointConfig {
/// The label of the endpoint for clearer display
pub label: String,
/// The URL of the SPARQL endpoint from which most information will be extracted
pub endpoint_url: String,
/// Description of the endpoint and its data
pub description: String,
/// Optional VoID file (local or remote) if not available in the endpoint
pub void_file: Option<String>,
/// Optional SPARQL query examples file
pub examples_file: Option<String>,
/// Optional homepage URL for additional information via JSON-LD context
pub homepage_url: Option<String>,
/// Optional ontology URL
pub ontology: Option<String>,
}
/// Configuration containing all SPARQL endpoints to index
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EndpointsConfig {
pub endpoints: Vec<SparqlEndpointConfig>,
}
/// Load EndpointsConfig from a JSON file
impl EndpointsConfig {
pub fn from_json_file<P: AsRef<Path>>(path: P) -> AppResult<Self> {
let content = fs::read_to_string(path)?;
let config: EndpointsConfig = serde_json::from_str(&content)?;
Ok(config)
}
}
impl Default for EndpointsConfig {
fn default() -> Self {
Self {
endpoints: vec![
SparqlEndpointConfig {
label: "UniProt".to_string(),
endpoint_url: "https://sparql.uniprot.org/sparql/".to_string(),
description: "UniProt is a comprehensive resource for protein sequence and annotation data.".to_string(),
void_file: Some("../sparql-llm/tests/void_uniprot.ttl".to_string()),
examples_file: None,
homepage_url: None,
ontology: None,
},
SparqlEndpointConfig {
label: "Bgee".to_string(),
endpoint_url: "https://www.bgee.org/sparql/".to_string(),
description: "Bgee is a database for retrieval and comparison of gene expression patterns across multiple animal species.".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://www.bgee.org/".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "Orthology MAtrix (OMA)".to_string(),
endpoint_url: "https://sparql.omabrowser.org/sparql/".to_string(),
description: "OMA is a method and database for the inference of orthologs among complete genomes.".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://omabrowser.org/".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "HAMAP".to_string(),
endpoint_url: "https://hamap.expasy.org/sparql/".to_string(),
description: "HAMAP is a system for the classification and annotation of protein sequences. It consists of a collection of manually curated family profiles for protein classification, and associated, manually created annotation rules that specify annotations that apply to family members.".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://hamap.expasy.org/".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "SwissLipids".to_string(),
endpoint_url: "https://beta.sparql.swisslipids.org/".to_string(),
description: "SwissLipids is an expert curated resource that provides a framework for the integration of lipid and lipidomic data with biological knowledge and models.".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://www.swisslipids.org".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "Rhea".to_string(),
endpoint_url: "https://sparql.rhea-db.org/sparql/".to_string(),
description: "Rhea is an expert-curated knowledgebase of chemical and transport reactions of biological interest - and the standard for enzyme and transporter annotation in UniProtKB.".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://www.rhea-db.org/".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "Cellosaurus".to_string(),
endpoint_url: "https://sparql.cellosaurus.org/sparql".to_string(),
description: "Cellosaurus is a knowledge resource on cell lines.".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://cellosaurus.org/".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "OrthoDB".to_string(),
endpoint_url: "https://sparql.orthodb.org/sparql/".to_string(),
description: "The hierarchical catalog of orthologs mapping genomics to functional data".to_string(),
void_file: None,
examples_file: None,
homepage_url: Some("https://www.orthodb.org/".to_string()),
ontology: None,
},
SparqlEndpointConfig {
label: "METRIN-KG".to_string(),
endpoint_url: "https://kg.earthmetabolome.org/metrin/api/".to_string(),
description: "The MEtabolomes, TRaits, and INteractions-Knowledge Graph (METRIN-KG). Aims to create a digital representation of chemo- and biodiversity, from botanical collections to the global scale in wild ecosystems.".to_string(),
void_file: None,
examples_file: None,
homepage_url: None,
ontology: None,
},
SparqlEndpointConfig {
label: "MetaNetX".to_string(),
endpoint_url: "https://rdf.metanetx.org/sparql/".to_string(),
description: "Automated Model Construction and Genome Annotation for Large-Scale Metabolic Networks.".to_string(),
void_file: None,
examples_file: None,
homepage_url: None,
ontology: None,
},
],
}
}
}
impl EndpointsConfig {
/// Get all endpoint configurations
pub fn get_endpoints(&self) -> &Vec<SparqlEndpointConfig> {
&self.endpoints
}
// /// Get an endpoint configuration by label
// pub fn get_endpoint_by_label(&self, label: &str) -> Option<&SparqlEndpointConfig> {
// self.endpoints
// .iter()
// .find(|endpoint| endpoint.label == label)
// }
// /// Add a new endpoint configuration
// pub fn add_endpoint(&mut self, endpoint: SparqlEndpointConfig) {
// self.endpoints.push(endpoint);
// }
}