use serde::Serialize;
use spargebra::{Query, algebra::GraphPattern, term::TriplePattern};
use std::collections::HashMap;
use crate::{
error::AppResult,
void_schema::{SchemasMap, VoidSchema},
};
// use crate::error::AppResult;
#[derive(Serialize, Debug)]
pub struct SparqlValidation {
pub query: String,
pub endpoint: String,
pub errors: Vec<String>,
}
// // TODO: implement validation of SPARQL queries using VoID
/// Validate a SPARQL query against the specified endpoint using their classes schema from VoID description.
pub async fn validate_sparql(
endpoint: &str,
query: &str,
schemas_map: &SchemasMap,
) -> AppResult<Vec<String>> {
tracing::debug!("Validating SPARQL in markdown");
let mut errors: Vec<String> = vec![];
match Query::parse(query, None) {
Ok(parsed_query) => {
// tracing::debug!("Parsed SPARQL query: {parsed_query:?}");
if let Query::Select { pattern, .. } = parsed_query {
let mut triples = Vec::new();
collect_triples_from_pattern(&pattern, &mut triples, endpoint);
// Validate the extracted triples against the VoID schema
if let Some(void_schema) = schemas_map.get(endpoint) {
let validation_errors = validate_triples_against_void(&triples, void_schema);
errors.extend(validation_errors);
} else {
tracing::warn!("No VoID schema found for endpoint: {endpoint}");
}
for t in triples {
tracing::debug!("{:?} {:?} {:?}", t.subject, t.predicate, t.object);
}
}
}
Err(e) => {
errors.push(format!(
"Error parsing SPARQL query for {endpoint}:\n{query}\n\n{e}"
));
tracing::warn!("Error parsing SPARQL query for {endpoint}:\n{query}\n\n{e}");
}
}
Ok(errors)
}
fn collect_triples_from_pattern(
gp: &GraphPattern,
triples: &mut Vec<TriplePattern>,
endpoint: &str,
) {
match gp {
GraphPattern::Bgp { patterns } => {
triples.extend(patterns.clone()); // each is a TriplePattern
}
GraphPattern::Join { left, right }
| GraphPattern::LeftJoin { left, right, .. }
| GraphPattern::Union { left, right } => {
collect_triples_from_pattern(left, triples, endpoint);
collect_triples_from_pattern(right, triples, endpoint);
}
GraphPattern::Filter { inner, .. }
| GraphPattern::Graph { inner, .. }
| GraphPattern::Minus { left: inner, .. } => {
collect_triples_from_pattern(inner, triples, endpoint);
}
GraphPattern::Service { inner, name, .. } => {
tracing::info!("Service pattern found - name: {name:?}");
tracing::info!("Service pattern found - inner: {inner:?}");
collect_triples_from_pattern(inner, triples, &strip_iri_brackets(&name.to_string()));
}
GraphPattern::Extend { inner, .. }
| GraphPattern::Group { inner, .. }
| GraphPattern::OrderBy { inner, .. }
| GraphPattern::Project { inner, .. }
| GraphPattern::Distinct { inner }
| GraphPattern::Reduced { inner }
| GraphPattern::Slice { inner, .. } => {
collect_triples_from_pattern(inner, triples, endpoint);
}
GraphPattern::Values { .. } => {}
GraphPattern::Path {
subject,
path,
object,
} => {
// If you want to also collect property paths, you can translate them into triples here
// For now, skip or handle separately
println!("Path pattern found: {subject:?} - {path:?} - {object:?}");
}
GraphPattern::Lateral { left: _, right: _ } => todo!(),
}
}
/// Validate extracted triples against a VoID schema
fn validate_triples_against_void(
triples: &[TriplePattern],
void_schema: &VoidSchema,
) -> Vec<String> {
let mut errors = Vec::new();
// Build a map of variables to their types (from rdf:type triples)
let mut variable_types: HashMap<String, Vec<String>> = HashMap::new();
// Collect type information
for triple in triples {
let (subject, predicate, object) = (&triple.subject, &triple.predicate, &triple.object);
tracing::debug!("Processing pred: {predicate:?} {}", predicate.to_string());
// Check if this is an rdf:type triple
if strip_iri_brackets(&predicate.to_string())
== "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
{
tracing::debug!(
"Found rdf:type triple: {subject:?} a {}",
object.to_string()
);
// If subject is a variable and object is a class
if let spargebra::term::TermPattern::Variable(var) = subject {
if !matches!(object, spargebra::term::TermPattern::Variable(_)) {
variable_types
.entry(var.to_string())
.or_default()
.push(strip_iri_brackets(&object.to_string()));
}
}
}
}
tracing::debug!("✅✅ Variable types inferred: {variable_types:?}");
// Validate predicates against types
for triple in triples {
let (subject, predicate, _object) = (&triple.subject, &triple.predicate, &triple.object);
let predicate_str = strip_iri_brackets(&predicate.to_string());
// Skip rdf:type triples as they're used for type inference
if predicate_str == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" {
continue;
}
// Check if subject is a variable with known types
if let spargebra::term::TermPattern::Variable(var) = subject {
let subject_str = var.to_string();
if let Some(types) = variable_types.get(&subject_str) {
let mut predicate_valid = false;
for type_uri in types {
if let Some(class_info) = void_schema.schema_map.get(type_uri) {
tracing::debug!(
"Checking if predicate {predicate_str} is valid for type {type_uri} {:?}",
class_info
);
if class_info.predicates.contains_key(&predicate_str) {
predicate_valid = true;
break;
}
}
}
if !predicate_valid && !types.is_empty() {
let type_curies: Vec<String> =
types.iter().map(|t| void_schema.get_curie(t)).collect();
let predicate_curie = void_schema.get_curie(&predicate_str);
// Get available predicates for the first type for suggestion
let available_predicates: Vec<String> = types
.iter()
.filter_map(|t| void_schema.schema_map.get(t))
.flat_map(|info| info.predicates.keys())
.map(|p| void_schema.get_curie(p))
.collect();
if available_predicates.is_empty() {
errors.push(format!(
"Subject {subject_str} with type `{}` does not support the predicate `{predicate_curie}`. No predicates found for this type in the schema.",
type_curies.join("`, `")
));
} else {
errors.push(format!(
"Subject {subject_str} with type `{}` does not support the predicate `{predicate_curie}`. Available predicates: `{}`",
type_curies.join("`, `"),
available_predicates.join("`, `")
));
}
}
}
}
// Check if the predicate exists in the schema at all
else if !void_schema.predicates_list.contains(&predicate_str)
&& !void_schema
.predicates_list
.contains(&format!("<{predicate_str}>"))
{
let predicate_curie = void_schema.get_curie(&predicate_str);
errors.push(format!(
"Predicate `{predicate_curie}` is not found in the endpoint schema"
));
}
}
errors
}
/// Strip angle brackets from IRI strings
fn strip_iri_brackets(uri: &str) -> String {
if uri.starts_with('<') && uri.ends_with('>') {
uri[1..uri.len() - 1].to_string()
} else {
uri.to_string()
}
}
// /// Extract the SPARQL query and endpoint from the markdown message,
// /// validate the query against the specified endpoint.
// pub async fn validate_sparql_in_md(msg_md: &str) -> AppResult<SparqlValidation> {
// tracing::debug!("Validating SPARQL in markdown");
// // Single regex to extract endpoint and query from markdown
// let re = Regex::new(r"(?s)```sparql\s*#\+ endpoint: (\S+)\s*(.*?)```\s*").unwrap();
// let (endpoint, query) = re
// .captures(msg_md)
// .map(|cap| {
// let endpoint = cap
// .get(1)
// .map(|m| m.as_str().to_string())
// .unwrap_or_default();
// let query = cap
// .get(2)
// .map(|m| m.as_str().trim().to_string())
// .unwrap_or_default();
// (endpoint, query)
// })
// .unwrap_or_default();
// let mut valid_res = SparqlValidation {
// query,
// endpoint,
// results: vec![],
// errors: vec![],
// };
// if valid_res.query.is_empty() || valid_res.endpoint.is_empty() {
// // valid_res.errors.push("Could not extract SPARQL query and endpoint from the message. Ensure the format is correct.".to_string());
// return Ok(valid_res);
// }
// valid_res = validate_sparql(&valid_res.endpoint, &valid_res.query).await?;
// Ok(valid_res)
// }