Skip to main content
Glama
by 8b-is
content_detector.rs12.6 kB
//! Content detection engine - "Understanding what's in your directories" - Omni //! Analyzes directory contents to determine the type of project/collection use crate::scanner::FileNode; use std::collections::HashMap; use std::path::Path; /// Types of content that can be detected in a directory #[derive(Debug, Clone, PartialEq)] pub enum DirectoryType { /// Software project with language and framework info CodeProject { language: Language, framework: Option<Framework>, has_tests: bool, has_docs: bool, }, /// Photo/image collection PhotoCollection { image_count: usize, date_range: Option<(String, String)>, cameras: Vec<String>, }, /// Document archive (PDFs, docs, etc.) DocumentArchive { categories: HashMap<String, usize>, total_docs: usize, }, /// Media library (videos, audio) MediaLibrary { video_count: usize, audio_count: usize, total_duration: Option<String>, quality: Vec<String>, // e.g., ["1080p", "4K", "720p"] }, /// Data science workspace DataScience { notebooks: usize, datasets: usize, languages: Vec<String>, }, /// Mixed content or unknown MixedContent { dominant_type: Option<String>, file_types: HashMap<String, usize>, total_files: usize, }, } #[derive(Debug, Clone, PartialEq)] pub enum Language { Rust, Python, JavaScript, TypeScript, Go, Java, Cpp, Ruby, Other(String), } #[derive(Debug, Clone, PartialEq)] pub enum Framework { // Rust Actix, Rocket, Tokio, // Python Django, Flask, FastAPI, // JavaScript/TypeScript React, Vue, Angular, NextJs, Express, // Other Other(String), } /// Analyzes a directory and detects its content type pub struct ContentDetector; impl ContentDetector { /// Analyze nodes and detect directory type pub fn detect(nodes: &[FileNode], root_path: &Path) -> DirectoryType { // Count file extensions let mut ext_counts: HashMap<String, usize> = HashMap::new(); let mut total_files = 0; for node in nodes { if !node.is_dir { total_files += 1; if let Some(ext) = node.path.extension().and_then(|e| e.to_str()) { *ext_counts.entry(ext.to_lowercase()).or_insert(0) += 1; } } } // Check for code project indicators if Self::is_code_project(&ext_counts, nodes, root_path) { return Self::analyze_code_project(nodes, root_path, &ext_counts); } // Check for photo collection if Self::is_photo_collection(&ext_counts) { return Self::analyze_photo_collection(nodes, &ext_counts); } // Check for document archive if Self::is_document_archive(&ext_counts) { return Self::analyze_document_archive(nodes); } // Check for media library if Self::is_media_library(&ext_counts) { return Self::analyze_media_library(nodes, &ext_counts); } // Check for data science if Self::is_data_science(&ext_counts) { return Self::analyze_data_science(&ext_counts); } // Default to mixed content DirectoryType::MixedContent { dominant_type: Self::get_dominant_type(&ext_counts), file_types: ext_counts, total_files, } } fn is_code_project( ext_counts: &HashMap<String, usize>, nodes: &[FileNode], _root_path: &Path, ) -> bool { // Check for common code file extensions let code_extensions = [ "rs", "py", "js", "ts", "go", "java", "cpp", "c", "rb", "php", ]; let code_files: usize = code_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); // Check for project files let has_project_files = nodes.iter().any(|n| { let name = n.path.file_name().and_then(|n| n.to_str()).unwrap_or(""); matches!( name, "Cargo.toml" | "package.json" | "requirements.txt" | "go.mod" | "pom.xml" | "Gemfile" ) }); code_files > 5 || has_project_files } fn analyze_code_project( nodes: &[FileNode], _root_path: &Path, ext_counts: &HashMap<String, usize>, ) -> DirectoryType { // Detect primary language let language = if ext_counts.contains_key("rs") { Language::Rust } else if ext_counts.contains_key("py") { Language::Python } else if ext_counts.contains_key("ts") { Language::TypeScript } else if ext_counts.contains_key("js") { Language::JavaScript } else if ext_counts.contains_key("go") { Language::Go } else if ext_counts.contains_key("java") { Language::Java } else if ext_counts.contains_key("cpp") || ext_counts.contains_key("cc") { Language::Cpp } else if ext_counts.contains_key("rb") { Language::Ruby } else { Language::Other("Unknown".to_string()) }; // Detect framework let framework = Self::detect_framework(nodes, &language); // Check for tests and docs let has_tests = nodes.iter().any(|n| { let path_str = n.path.to_string_lossy(); path_str.contains("test") || path_str.contains("spec") }); let has_docs = nodes.iter().any(|n| { let name = n.path.file_name().and_then(|n| n.to_str()).unwrap_or(""); let path_str = n.path.to_string_lossy(); name.ends_with(".md") || path_str.contains("docs/") }); DirectoryType::CodeProject { language, framework, has_tests, has_docs, } } fn detect_framework(nodes: &[FileNode], language: &Language) -> Option<Framework> { for node in nodes { let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or(""); match language { Language::Rust => { // Check Cargo.toml for dependencies if name == "Cargo.toml" { // In real implementation, would read file and check deps return None; // Placeholder } } Language::JavaScript | Language::TypeScript => { if name == "package.json" { // Would check for React, Vue, etc. in dependencies return None; // Placeholder } } Language::Python => { if name == "requirements.txt" || name == "pyproject.toml" { // Would check for Django, Flask, etc. return None; // Placeholder } } _ => {} } } None } fn is_photo_collection(ext_counts: &HashMap<String, usize>) -> bool { let image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "raw", "dng", "heic"]; let image_files: usize = image_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); image_files > 10 } fn analyze_photo_collection( _nodes: &[FileNode], ext_counts: &HashMap<String, usize>, ) -> DirectoryType { let image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "raw", "dng", "heic"]; let image_count: usize = image_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); DirectoryType::PhotoCollection { image_count, date_range: None, // Would need EXIF parsing cameras: vec![], // Would need EXIF parsing } } fn is_document_archive(ext_counts: &HashMap<String, usize>) -> bool { let doc_extensions = ["pdf", "doc", "docx", "txt", "odt", "rtf"]; let doc_files: usize = doc_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); doc_files > 10 } fn analyze_document_archive(nodes: &[FileNode]) -> DirectoryType { let mut categories = HashMap::new(); // Simple categorization based on filename patterns for node in nodes { if !node.is_dir { let name = node .path .file_name() .and_then(|n| n.to_str()) .unwrap_or("") .to_lowercase(); let category = if name.contains("invoice") || name.contains("receipt") || name.contains("bank") { "Financial" } else if name.contains("homework") || name.contains("assignment") || name.contains("grade") { "School" } else if name.contains("resume") || name.contains("cv") || name.contains("letter") { "Personal" } else { "Other" }; *categories.entry(category.to_string()).or_insert(0) += 1; } } let total_docs = categories.values().sum(); DirectoryType::DocumentArchive { categories, total_docs, } } fn is_media_library(ext_counts: &HashMap<String, usize>) -> bool { let video_extensions = ["mp4", "avi", "mkv", "mov", "wmv", "flv"]; let audio_extensions = ["mp3", "wav", "flac", "aac", "ogg", "m4a"]; let video_files: usize = video_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); let audio_files: usize = audio_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); video_files + audio_files > 10 } fn analyze_media_library( _nodes: &[FileNode], ext_counts: &HashMap<String, usize>, ) -> DirectoryType { let video_extensions = ["mp4", "avi", "mkv", "mov", "wmv", "flv"]; let audio_extensions = ["mp3", "wav", "flac", "aac", "ogg", "m4a"]; let video_count: usize = video_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); let audio_count: usize = audio_extensions .iter() .filter_map(|ext| ext_counts.get(*ext)) .sum(); DirectoryType::MediaLibrary { video_count, audio_count, total_duration: None, // Would need media parsing quality: vec![], // TODO: Extract quality from filenames (e.g., "movie_1080p.mp4") } } fn is_data_science(ext_counts: &HashMap<String, usize>) -> bool { ext_counts.contains_key("ipynb") || (ext_counts.contains_key("csv") && ext_counts["csv"] > 5) || (ext_counts.contains_key("parquet") || ext_counts.contains_key("feather")) } fn analyze_data_science(ext_counts: &HashMap<String, usize>) -> DirectoryType { let notebooks = ext_counts.get("ipynb").copied().unwrap_or(0); let datasets = ext_counts.get("csv").copied().unwrap_or(0) + ext_counts.get("parquet").copied().unwrap_or(0) + ext_counts.get("feather").copied().unwrap_or(0); let mut languages = vec![]; if ext_counts.contains_key("py") { languages.push("Python".to_string()); } if ext_counts.contains_key("r") { languages.push("R".to_string()); } if ext_counts.contains_key("jl") { languages.push("Julia".to_string()); } DirectoryType::DataScience { notebooks, datasets, languages, } } fn get_dominant_type(ext_counts: &HashMap<String, usize>) -> Option<String> { ext_counts .iter() .max_by_key(|(_, count)| *count) .map(|(ext, _)| ext.clone()) } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/8b-is/smart-tree'

If you have feedback or need assistance with the MCP directory API, please join our Discord server