Skip to main content
Glama
validate.rs10.2 kB
use serde::Serialize; use spargebra::{Query, algebra::GraphPattern, term::TriplePattern}; use std::collections::HashMap; use crate::{ error::AppResult, void_schema::{SchemasMap, VoidSchema}, }; // use crate::error::AppResult; #[derive(Serialize, Debug)] pub struct SparqlValidation { pub query: String, pub endpoint: String, pub errors: Vec<String>, } // // TODO: implement validation of SPARQL queries using VoID /// Validate a SPARQL query against the specified endpoint using their classes schema from VoID description. pub async fn validate_sparql( endpoint: &str, query: &str, schemas_map: &SchemasMap, ) -> AppResult<Vec<String>> { tracing::debug!("Validating SPARQL in markdown"); let mut errors: Vec<String> = vec![]; match Query::parse(query, None) { Ok(parsed_query) => { // tracing::debug!("Parsed SPARQL query: {parsed_query:?}"); if let Query::Select { pattern, .. } = parsed_query { let mut triples = Vec::new(); collect_triples_from_pattern(&pattern, &mut triples, endpoint); // Validate the extracted triples against the VoID schema if let Some(void_schema) = schemas_map.get(endpoint) { let validation_errors = validate_triples_against_void(&triples, void_schema); errors.extend(validation_errors); } else { tracing::warn!("No VoID schema found for endpoint: {endpoint}"); } for t in triples { tracing::debug!("{:?} {:?} {:?}", t.subject, t.predicate, t.object); } } } Err(e) => { errors.push(format!( "Error parsing SPARQL query for {endpoint}:\n{query}\n\n{e}" )); tracing::warn!("Error parsing SPARQL query for {endpoint}:\n{query}\n\n{e}"); } } Ok(errors) } fn collect_triples_from_pattern( gp: &GraphPattern, triples: &mut Vec<TriplePattern>, endpoint: &str, ) { match gp { GraphPattern::Bgp { patterns } => { triples.extend(patterns.clone()); // each is a TriplePattern } GraphPattern::Join { left, right } | GraphPattern::LeftJoin { left, right, .. } | GraphPattern::Union { left, right } => { collect_triples_from_pattern(left, triples, endpoint); collect_triples_from_pattern(right, triples, endpoint); } GraphPattern::Filter { inner, .. } | GraphPattern::Graph { inner, .. } | GraphPattern::Minus { left: inner, .. } => { collect_triples_from_pattern(inner, triples, endpoint); } GraphPattern::Service { inner, name, .. } => { tracing::info!("Service pattern found - name: {name:?}"); tracing::info!("Service pattern found - inner: {inner:?}"); collect_triples_from_pattern(inner, triples, &strip_iri_brackets(&name.to_string())); } GraphPattern::Extend { inner, .. } | GraphPattern::Group { inner, .. } | GraphPattern::OrderBy { inner, .. } | GraphPattern::Project { inner, .. } | GraphPattern::Distinct { inner } | GraphPattern::Reduced { inner } | GraphPattern::Slice { inner, .. } => { collect_triples_from_pattern(inner, triples, endpoint); } GraphPattern::Values { .. } => {} GraphPattern::Path { subject, path, object, } => { // If you want to also collect property paths, you can translate them into triples here // For now, skip or handle separately println!("Path pattern found: {subject:?} - {path:?} - {object:?}"); } GraphPattern::Lateral { left: _, right: _ } => todo!(), } } /// Validate extracted triples against a VoID schema fn validate_triples_against_void( triples: &[TriplePattern], void_schema: &VoidSchema, ) -> Vec<String> { let mut errors = Vec::new(); // Build a map of variables to their types (from rdf:type triples) let mut variable_types: HashMap<String, Vec<String>> = HashMap::new(); // Collect type information for triple in triples { let (subject, predicate, object) = (&triple.subject, &triple.predicate, &triple.object); tracing::debug!("Processing pred: {predicate:?} {}", predicate.to_string()); // Check if this is an rdf:type triple if strip_iri_brackets(&predicate.to_string()) == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" { tracing::debug!( "Found rdf:type triple: {subject:?} a {}", object.to_string() ); // If subject is a variable and object is a class if let spargebra::term::TermPattern::Variable(var) = subject { if !matches!(object, spargebra::term::TermPattern::Variable(_)) { variable_types .entry(var.to_string()) .or_default() .push(strip_iri_brackets(&object.to_string())); } } } } tracing::debug!("✅✅ Variable types inferred: {variable_types:?}"); // Validate predicates against types for triple in triples { let (subject, predicate, _object) = (&triple.subject, &triple.predicate, &triple.object); let predicate_str = strip_iri_brackets(&predicate.to_string()); // Skip rdf:type triples as they're used for type inference if predicate_str == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" { continue; } // Check if subject is a variable with known types if let spargebra::term::TermPattern::Variable(var) = subject { let subject_str = var.to_string(); if let Some(types) = variable_types.get(&subject_str) { let mut predicate_valid = false; for type_uri in types { if let Some(class_info) = void_schema.schema_map.get(type_uri) { tracing::debug!( "Checking if predicate {predicate_str} is valid for type {type_uri} {:?}", class_info ); if class_info.predicates.contains_key(&predicate_str) { predicate_valid = true; break; } } } if !predicate_valid && !types.is_empty() { let type_curies: Vec<String> = types.iter().map(|t| void_schema.get_curie(t)).collect(); let predicate_curie = void_schema.get_curie(&predicate_str); // Get available predicates for the first type for suggestion let available_predicates: Vec<String> = types .iter() .filter_map(|t| void_schema.schema_map.get(t)) .flat_map(|info| info.predicates.keys()) .map(|p| void_schema.get_curie(p)) .collect(); if available_predicates.is_empty() { errors.push(format!( "Subject {subject_str} with type `{}` does not support the predicate `{predicate_curie}`. No predicates found for this type in the schema.", type_curies.join("`, `") )); } else { errors.push(format!( "Subject {subject_str} with type `{}` does not support the predicate `{predicate_curie}`. Available predicates: `{}`", type_curies.join("`, `"), available_predicates.join("`, `") )); } } } } // Check if the predicate exists in the schema at all else if !void_schema.predicates_list.contains(&predicate_str) && !void_schema .predicates_list .contains(&format!("<{predicate_str}>")) { let predicate_curie = void_schema.get_curie(&predicate_str); errors.push(format!( "Predicate `{predicate_curie}` is not found in the endpoint schema" )); } } errors } /// Strip angle brackets from IRI strings fn strip_iri_brackets(uri: &str) -> String { if uri.starts_with('<') && uri.ends_with('>') { uri[1..uri.len() - 1].to_string() } else { uri.to_string() } } // /// Extract the SPARQL query and endpoint from the markdown message, // /// validate the query against the specified endpoint. // pub async fn validate_sparql_in_md(msg_md: &str) -> AppResult<SparqlValidation> { // tracing::debug!("Validating SPARQL in markdown"); // // Single regex to extract endpoint and query from markdown // let re = Regex::new(r"(?s)```sparql\s*#\+ endpoint: (\S+)\s*(.*?)```\s*").unwrap(); // let (endpoint, query) = re // .captures(msg_md) // .map(|cap| { // let endpoint = cap // .get(1) // .map(|m| m.as_str().to_string()) // .unwrap_or_default(); // let query = cap // .get(2) // .map(|m| m.as_str().trim().to_string()) // .unwrap_or_default(); // (endpoint, query) // }) // .unwrap_or_default(); // let mut valid_res = SparqlValidation { // query, // endpoint, // results: vec![], // errors: vec![], // }; // if valid_res.query.is_empty() || valid_res.endpoint.is_empty() { // // valid_res.errors.push("Could not extract SPARQL query and endpoint from the message. Ensure the format is correct.".to_string()); // return Ok(valid_res); // } // valid_res = validate_sparql(&valid_res.endpoint, &valid_res.query).await?; // Ok(valid_res) // }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sib-swiss/sparql-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server