Skip to main content
Glama
by 8b-is
nlp.rs12.2 kB
//! 🧠 Natural Language Processing for Smart Tools //! //! This module provides natural language understanding capabilities //! for parsing user queries and converting them into structured //! search intents and parameters. use super::FocusArea; use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// 🧠 Natural language query parser pub struct QueryParser { /// Intent detection patterns intent_patterns: HashMap<SearchIntent, Vec<String>>, /// Entity extraction patterns entity_patterns: HashMap<String, Vec<String>>, } /// 🎯 Search intent types #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum SearchIntent { FindCode, FindConfig, FindTests, FindDocs, FindBugs, FindSecurity, FindPerformance, FindAPI, FindAuth, FindDatabase, Debug, Optimize, Deploy, General, } /// 📝 Parsed query structure #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ParsedQuery { /// Primary intent pub intent: SearchIntent, /// Extracted entities (file names, function names, etc.) pub entities: Vec<String>, /// Programming languages mentioned pub languages: Option<Vec<String>>, /// Keywords for content search pub keywords: Vec<String>, /// Focus areas derived from query pub focus_areas: Vec<FocusArea>, /// Original query text pub original_query: String, /// Confidence score (0.0-1.0) pub confidence: f32, } impl QueryParser { /// Create new query parser pub fn new() -> Self { let mut parser = Self { intent_patterns: HashMap::new(), entity_patterns: HashMap::new(), }; parser.initialize_patterns(); parser } /// 🔍 Parse natural language query pub fn parse(&self, query: &str) -> ParsedQuery { let query_lower = query.to_lowercase(); // Detect intent let intent = self.detect_intent(&query_lower); // Extract entities let entities = self.extract_entities(&query_lower); // Extract languages let languages = self.extract_languages(&query_lower); // Extract keywords let keywords = self.extract_keywords(&query_lower); // Derive focus areas let focus_areas = self.derive_focus_areas(&intent, &keywords); // Calculate confidence let confidence = self.calculate_confidence(&intent, &entities, &keywords); ParsedQuery { intent, entities, languages, keywords, focus_areas, original_query: query.to_string(), confidence, } } /// 🎯 Detect search intent from query fn detect_intent(&self, query: &str) -> SearchIntent { for (intent, patterns) in &self.intent_patterns { for pattern in patterns { if query.contains(pattern) { return intent.clone(); } } } SearchIntent::General } /// 🏷️ Extract entities (file names, function names, etc.) fn extract_entities(&self, query: &str) -> Vec<String> { let mut entities = Vec::new(); // Simple entity extraction - look for quoted strings let mut in_quotes = false; let mut current_entity = String::new(); for ch in query.chars() { match ch { '"' | '\'' => { if in_quotes && !current_entity.is_empty() { entities.push(current_entity.clone()); current_entity.clear(); } in_quotes = !in_quotes; } _ if in_quotes => { current_entity.push(ch); } _ => {} } } // Also extract common file extensions and patterns for (entity_type, patterns) in &self.entity_patterns { for pattern in patterns { if query.contains(pattern) { entities.push(format!("{}:{}", entity_type, pattern)); } } } entities } /// 💻 Extract programming languages mentioned fn extract_languages(&self, query: &str) -> Option<Vec<String>> { let languages = vec![ "rust", "python", "javascript", "typescript", "go", "java", "cpp", "c++", "c", "ruby", "php", "swift", "kotlin", "scala", ]; let mut found_languages = Vec::new(); for lang in languages { if query.contains(lang) { found_languages.push(lang.to_string()); } } if found_languages.is_empty() { None } else { Some(found_languages) } } /// 🔑 Extract keywords for content search fn extract_keywords(&self, query: &str) -> Vec<String> { // Simple keyword extraction - split on common words and take meaningful terms let stop_words = vec![ "find", "search", "look", "for", "in", "the", "a", "an", "and", "or", "with", "that", "have", "has", "is", "are", "was", "were", "be", "been", "being", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", "all", "any", "some", "this", "that", "these", "those", "i", "you", "he", "she", "it", "we", "they", ]; query .split_whitespace() .map(|word| word.trim_matches(|c: char| !c.is_alphanumeric())) .filter(|word| !word.is_empty() && word.len() > 2) .filter(|word| !stop_words.contains(&word.to_lowercase().as_str())) .map(|word| word.to_string()) .collect() } /// 🎯 Derive focus areas from intent and keywords fn derive_focus_areas(&self, intent: &SearchIntent, keywords: &[String]) -> Vec<FocusArea> { let mut focus_areas = Vec::new(); // Add focus area based on intent match intent { SearchIntent::FindAuth => focus_areas.push(FocusArea::Authentication), SearchIntent::FindAPI => focus_areas.push(FocusArea::API), SearchIntent::FindDatabase => focus_areas.push(FocusArea::Database), SearchIntent::FindTests => focus_areas.push(FocusArea::Testing), SearchIntent::FindConfig => focus_areas.push(FocusArea::Configuration), SearchIntent::FindSecurity => focus_areas.push(FocusArea::Security), SearchIntent::FindPerformance => focus_areas.push(FocusArea::Performance), SearchIntent::FindDocs => focus_areas.push(FocusArea::Documentation), _ => {} } // Add focus areas based on keywords for keyword in keywords { let keyword_lower = keyword.to_lowercase(); let focus_area = FocusArea::from_str(&keyword_lower); if !focus_areas.contains(&focus_area) { focus_areas.push(focus_area); } } // Default focus areas if none found if focus_areas.is_empty() { focus_areas = vec![FocusArea::API, FocusArea::Configuration]; } focus_areas } /// 📊 Calculate confidence score fn calculate_confidence( &self, intent: &SearchIntent, entities: &[String], keywords: &[String], ) -> f32 { let mut confidence: f32 = 0.5; // Base confidence // Boost confidence for specific intents if *intent != SearchIntent::General { confidence += 0.2; } // Boost confidence for entities if !entities.is_empty() { confidence += 0.2; } // Boost confidence for meaningful keywords if keywords.len() >= 2 { confidence += 0.1; } confidence.min(1.0) } /// Initialize intent detection patterns fn initialize_patterns(&mut self) { // Find code patterns self.intent_patterns.insert( SearchIntent::FindCode, vec![ "find code".to_string(), "show code".to_string(), "source code".to_string(), "functions".to_string(), "methods".to_string(), "classes".to_string(), ], ); // Find config patterns self.intent_patterns.insert( SearchIntent::FindConfig, vec![ "config".to_string(), "configuration".to_string(), "settings".to_string(), "env".to_string(), "environment".to_string(), ], ); // Find tests patterns self.intent_patterns.insert( SearchIntent::FindTests, vec![ "test".to_string(), "tests".to_string(), "testing".to_string(), "spec".to_string(), "specs".to_string(), ], ); // Authentication patterns self.intent_patterns.insert( SearchIntent::FindAuth, vec![ "auth".to_string(), "authentication".to_string(), "login".to_string(), "signin".to_string(), "password".to_string(), "token".to_string(), ], ); // API patterns self.intent_patterns.insert( SearchIntent::FindAPI, vec![ "api".to_string(), "endpoint".to_string(), "route".to_string(), "handler".to_string(), "controller".to_string(), ], ); // Security patterns self.intent_patterns.insert( SearchIntent::FindSecurity, vec![ "security".to_string(), "vulnerability".to_string(), "secure".to_string(), "encrypt".to_string(), "sanitize".to_string(), ], ); // Performance patterns self.intent_patterns.insert( SearchIntent::FindPerformance, vec![ "performance".to_string(), "optimize".to_string(), "slow".to_string(), "fast".to_string(), "cache".to_string(), ], ); // Debug patterns self.intent_patterns.insert( SearchIntent::Debug, vec![ "debug".to_string(), "debugging".to_string(), "bug".to_string(), "error".to_string(), "issue".to_string(), "problem".to_string(), ], ); // Entity patterns self.entity_patterns.insert( "file_extension".to_string(), vec![ ".rs", ".py", ".js", ".ts", ".go", ".java", ".json", ".yaml", ".toml", ] .into_iter() .map(String::from) .collect(), ); } } impl Default for QueryParser { fn default() -> Self { Self::new() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_query_parsing() { let parser = QueryParser::new(); let query = "find authentication code in rust files"; let parsed = parser.parse(query); assert_eq!(parsed.intent, SearchIntent::FindAuth); assert!(parsed .languages .as_ref() .unwrap() .contains(&"rust".to_string())); assert!(parsed.focus_areas.contains(&FocusArea::Authentication)); } #[test] fn test_entity_extraction() { let parser = QueryParser::new(); let query = "find 'login_handler' function"; let parsed = parser.parse(query); assert!(parsed.entities.contains(&"login_handler".to_string())); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/8b-is/smart-tree'

If you have feedback or need assistance with the MCP directory API, please join our Discord server