Smart Tree - ST

Overview Schema Related Servers Score Discussions

MIT License

180

smart-tree
src

scanner.rs•100 kB

// // ----------------------------------------------------------------------------- // WELCOME TO THE JUNGLE! ...The filesystem jungle, that is. 🌴 // // You've found scanner.rs, the intrepid explorer and engine room of st. // This module is the Indiana Jones of our codebase. It bravely dives into // the deepest, darkest directories, dodges `.gitignore` traps, inspects // every file for treasure (metadata), and reports back its findings. // // So grab your hat, and let's go on an adventure! // // Brought to you by The Cheet - making filesystem traversal a rock concert! 🥁🧻 // ----------------------------------------------------------------------------- // use crate::scanner_safety::{estimate_node_size, ScannerSafetyLimits, ScannerSafetyTracker}; use anyhow::Result; use globset::{Glob, GlobSet, GlobSetBuilder}; // For powerful gitignore-style pattern matching. use regex::Regex; // For user-defined find patterns. use std::collections::{HashMap, HashSet}; // Our trusty hash-based collections. use std::fs; // Filesystem operations, the bread and butter here. use std::io::{BufRead, BufReader}; // For efficient reading, especially for content search. use std::path::{Path, PathBuf}; // Path manipulation is key. use std::sync::mpsc; // For streaming results from a worker thread. use std::time::SystemTime; // To know when files were last touched. use walkdir::{DirEntry, WalkDir}; // The excellent `walkdir` crate does the actual directory walking. // Unix-specific imports for richer metadata like permissions, UID, GID. // On other platforms, we'll use sensible defaults. #[cfg(unix)] use std::os::unix::fs::{MetadataExt, PermissionsExt}; /// # FileNode: The Ultimate Backstage Pass /// /// Every file and directory we meet gets one of these. It's a VIP pass that /// holds all the juicy details: its name, size, when it was last cool (modified), /// and whether it's on the super-secret "ignored" list. It's the atom of our /// `st` universe. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct FileNode { /// The full path to the file or directory. The source of truth for location! pub path: PathBuf, /// Is it a directory? `true` if yes, `false` if it's a file or symlink. pub is_dir: bool, /// Size of the file in bytes. For directories, this is often 0 or metadata-dependent. pub size: u64, /// File permissions (e.g., `rwxr-xr-x`). Stored as a u32, typically from Unix mode. pub permissions: u32, /// User ID of the owner (Unix-specific). pub uid: u32, /// Group ID of the owner (Unix-specific). pub gid: u32, /// Timestamp of the last modification. Tells us how fresh or ancient a file is. pub modified: SystemTime, /// Is it a symbolic link? `true` if yes. We handle these with care. pub is_symlink: bool, /// Is it a hidden file (e.g., starts with a `.` on Unix)? pub is_hidden: bool, /// Did we encounter a "Permission Denied" error when trying to access this? /// Important for gracefully handling parts of the filesystem we can't read. pub permission_denied: bool, /// Is this file or directory ignored based on `.gitignore` or default ignore rules? pub is_ignored: bool, /// The depth of this entry relative to the scan root (root is depth 0). pub depth: usize, /// The specific type of the file (e.g., RegularFile, Symlink, Executable). pub file_type: FileType, /// A category assigned based on extension or name, used for coloring and context. /// (e.g., Rust, Python, Image, Archive). pub category: FileCategory, /// For content search: Information about where matches were found /// `None` if no search was performed or no matches. pub search_matches: Option<SearchMatches>, /// The filesystem type this file resides on pub filesystem_type: FilesystemType, } /// Information about search matches within a file #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct SearchMatches { /// First match position (line, column) pub first_match: (usize, usize), /// Total number of matches found pub total_count: usize, /// List of all match positions (line, column) - limited to prevent memory issues pub positions: Vec<(usize, usize)>, /// Whether the search was truncated due to too many matches pub truncated: bool, /// Line content for each match (line number, line content, column) - optional for compatibility #[serde(skip_serializing_if = "Option::is_none")] pub line_content: Option<Vec<(usize, String, usize)>>, } /// # FileType: Distinguishing Different Kinds of Filesystem Objects /// /// This enum helps us categorize entries beyond just "file" or "directory". /// It's especially useful on Unix-like systems where you have sockets, pipes, etc. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub enum FileType { Directory, // A folder, a container of other things. RegularFile, // Your everyday, garden-variety file. Symlink, // A pointer to another file or directory. Executable, // A file that can be run (has execute permissions). Socket, // A Unix domain socket. Pipe, // A named pipe (FIFO). BlockDevice, // A block special file (e.g., /dev/sda). CharDevice, // A character special file (e.g., /dev/tty). } /// # FilesystemType: Identifying the underlying filesystem /// /// This enum represents different filesystem types with single-character codes /// for compact display. The mapping is designed to be memorable and intuitive. #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub enum FilesystemType { Ext4, // '4' - The most common Linux filesystem Ext3, // '3' - Older ext filesystem Ext2, // '2' - Even older ext filesystem Xfs, // 'X' - XFS filesystem Btrfs, // 'B' - Btrfs (B-tree filesystem) Zfs, // 'Z' - ZFS filesystem Ntfs, // 'N' - Windows NTFS Fat32, // 'F' - FAT32 ExFat, // 'E' - exFAT Apfs, // 'A' - Apple File System Hfs, // 'H' - HFS+ (older Mac) Nfs, // 'R' - Remote NFS mount Smb, // 'S' - SMB/CIFS network filesystem Tmpfs, // 'T' - Temporary filesystem (RAM) Procfs, // 'P' - /proc virtual filesystem Sysfs, // 'Y' - /sys virtual filesystem Devfs, // 'D' - /dev virtual filesystem Mem8, // 'M' - MEM|8 filesystem (Coming soon - Quantum File System) - https://m8.is Unknown, // '?' - Unknown filesystem } impl FilesystemType { /// Get the single-character code for this filesystem type pub fn to_char(&self) -> char { match self { FilesystemType::Ext4 => '4', FilesystemType::Ext3 => '3', FilesystemType::Ext2 => '2', FilesystemType::Xfs => 'X', FilesystemType::Btrfs => 'B', FilesystemType::Zfs => 'Z', FilesystemType::Ntfs => 'N', FilesystemType::Fat32 => 'F', FilesystemType::ExFat => 'E', FilesystemType::Apfs => 'A', FilesystemType::Hfs => 'H', FilesystemType::Nfs => 'R', FilesystemType::Smb => 'S', FilesystemType::Tmpfs => 'T', FilesystemType::Procfs => 'P', FilesystemType::Sysfs => 'Y', FilesystemType::Devfs => 'D', FilesystemType::Mem8 => 'M', FilesystemType::Unknown => '?', } } /// Check if this is a virtual filesystem that should be skipped pub fn is_virtual(&self) -> bool { matches!( self, FilesystemType::Procfs | FilesystemType::Sysfs | FilesystemType::Devfs | FilesystemType::Tmpfs ) } /// Check if this filesystem type should be shown by default /// (only "interesting" filesystems based on platform) pub fn should_show_by_default(&self) -> bool { #[cfg(target_os = "linux")] { matches!( self, FilesystemType::Ext4 | FilesystemType::Ext3 | FilesystemType::Xfs | FilesystemType::Btrfs | FilesystemType::Zfs | FilesystemType::Nfs | FilesystemType::Smb | FilesystemType::Mem8 ) } #[cfg(target_os = "macos")] { matches!( self, FilesystemType::Apfs | FilesystemType::Hfs | FilesystemType::Nfs | FilesystemType::Smb | FilesystemType::Mem8 ) } #[cfg(target_os = "windows")] { matches!( self, FilesystemType::Ntfs | FilesystemType::Fat32 | FilesystemType::ExFat | FilesystemType::Mem8 ) } #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] { // Show all non-virtual filesystems on other platforms !self.is_virtual() } } } /// # FileCategory: Adding Semantic Flavor to Files /// /// This enum provides a higher-level categorization based on common file extensions /// or names. It's primarily used for display purposes, like coloring output, /// and can also help in understanding the nature of a directory's contents. /// Trish loves how this makes the tree output more intuitive! #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub enum FileCategory { // --- Programming Languages --- Rust, // .rs Python, // .py, .pyw, .pyx, .pyi JavaScript, // .js, .mjs, .cjs TypeScript, // .ts, .tsx Java, // .java, .class, .jar C, // .c, .h Cpp, // .cpp, .cc, .cxx, .hpp, .hxx Go, // .go Ruby, // .rb PHP, // .php - Not sure php is programming. Shell, // .sh, .bash, .zsh, .fish // --- Markup & Data Formats --- Markdown, // .md, .markdown Html, // .html, .htm Css, // .css, .scss, .sass, .less Json, // .json, .jsonc Yaml, // .yaml, .yml Xml, // .xml, .svg (SVG is XML-based) Toml, // .toml Csv, // .csv // --- Build Systems & Configuration --- Makefile, // Makefile, makefile, GNUmakefile Dockerfile, // Dockerfile, .dockerfile GitConfig, // .gitignore, .gitconfig, .gitmodules // --- Archives & Compressed Files --- Archive, // .zip, .tar, .gz, .bz2, .xz, .7z, .rar // --- Media Files --- Image, // .jpg, .jpeg, .png, .gif, .bmp, .ico, .webp Video, // .mp4, .avi, .mkv, .mov, .wmv, .flv, .webm Audio, // .mp3, .wav, .flac, .aac, .ogg, .wma // --- System & Binary Files --- SystemFile, // Special system files like swap.img, vmlinuz Binary, // Executables, shared libraries (.exe, .dll, .so, .dylib, .o, .a) // --- Database --- Database, // .db, .sqlite, .mdb, .accdb, .dbf // --- Office & Documents --- Office, // .doc, .docx, .odt Spreadsheet, // .xls, .xlsx, .ods, .csv PowerPoint, // .ppt, .pptx, .odp Pdf, // .pdf Ebook, // .epub, .mobi, .azw // --- Text Variants --- Log, // .log Config, // .ini, .cfg, .conf, .env, .properties License, // LICENSE, COPYING files Readme, // README files Txt, // .txt Rtf, // .rtf // --- Security & Crypto --- Certificate, // .crt, .cert, .pem, .key Encrypted, // .gpg, .pgp, .aes // --- Fonts --- Font, // .ttf, .otf, .woff, .woff2 // --- Virtual & Disk Images --- DiskImage, // .img, .iso, .vdi, .vmdk, .vhd, .dd, .dmg // --- 3D & CAD --- Model3D, // .obj, .stl, .dae, .fbx, .blend // --- Scientific & Data --- Jupyter, // .ipynb RData, // .rdata, .rds Matlab, // .m, .mat // --- Web Assets --- WebAsset, // .wasm, .map // --- Package & Dependencies --- Package, // package.json, Cargo.toml, requirements.txt, etc. Lock, // package-lock.json, Cargo.lock, yarn.lock // --- Testing --- Test, // Files with test_, _test, .test, .spec patterns // --- Memory Files (Our special type!) --- Memory, // .mem8, .m8 - MEM|8 memory files // --- Others --- Backup, // .bak, .backup, ~ Temp, // .tmp, .temp, .swp Unknown, // If we can't categorize it, it's a mysterious Unknown! } /// # TreeStats: The Final Scoreboard /// /// After the concert is over, this is where we see how we did. It's the /// scoreboard that tracks total files, total directories, the biggest hits /// (largest files), and more. It's the answer to "So, how was the show?" #[derive(Debug, Default)] pub struct TreeStats { /// Total number of files encountered (excluding directories). pub total_files: u64, /// Total number of directories encountered. pub total_dirs: u64, /// Total size of all files (in bytes). pub total_size: u64, /// A map of file extensions to their counts (e.g., {"rs": 10, "toml": 2}). pub file_types: HashMap<String, u64>, /// Top N largest files found (path and size). N is usually 10. pub largest_files: Vec<(u64, PathBuf)>, /// Top N newest files found (path and modification time). pub newest_files: Vec<(SystemTime, PathBuf)>, /// Top N oldest files found (path and modification time). pub oldest_files: Vec<(SystemTime, PathBuf)>, } impl TreeStats { /// Updates the statistics based on a newly processed `FileNode`. /// This method is called for each non-permission-denied node. pub fn update_file(&mut self, node: &FileNode) { if node.is_dir { self.total_dirs += 1; } else { // It's a file! self.total_files += 1; self.total_size += node.size; // Track file extensions for type distribution. if let Some(ext) = node.path.extension() { if let Some(ext_str) = ext.to_str() { *self.file_types.entry(ext_str.to_string()).or_insert(0) += 1; } } // --- Update Top N Lists --- // These lists are kept sorted and truncated to maintain a fixed size (e.g., top 10). // Update largest files: Add, sort by size (desc), truncate. self.largest_files.push((node.size, node.path.clone())); self.largest_files.sort_by(|a, b| b.0.cmp(&a.0)); // Largest first self.largest_files.truncate(10); // Keep only the top 10 // Update newest files: Add, sort by modification time (desc), truncate. self.newest_files.push((node.modified, node.path.clone())); self.newest_files.sort_by(|a, b| b.0.cmp(&a.0)); // Newest first self.newest_files.truncate(10); // Update oldest files: Add, sort by modification time (asc), truncate. self.oldest_files.push((node.modified, node.path.clone())); self.oldest_files.sort_by(|a, b| a.0.cmp(&b.0)); // Oldest first self.oldest_files.truncate(10); } } } /// # ScannerConfig: The Rider for our Rock Star Scanner /// /// This is the list of demands for our scanner. "Don't show me hidden files," /// "I only want to see files bigger than a tour bus," "Ignore the messy backstage /// area (`.gitignore`)." We build this from the user's command-line arguments /// to make sure the scanner puts on the exact show the user wants to see. #[derive(Default, Clone)] pub struct ScannerConfig { /// Maximum depth to traverse into subdirectories. pub max_depth: usize, /// Should symbolic links be followed? (Currently always `false`). pub follow_symlinks: bool, /// Should `.gitignore` files be respected? pub respect_gitignore: bool, /// Should hidden files (starting with `.`) be shown? pub show_hidden: bool, /// Should ignored files/directories be shown (usually in brackets)? pub show_ignored: bool, /// An optional regex pattern to filter files/directories by name. pub find_pattern: Option<Regex>, /// An optional file extension to filter by (e.g., "rs"). pub file_type_filter: Option<String>, /// Optional entry type filter ("f" for files, "d" for directories). pub entry_type_filter: Option<String>, /// Optional minimum file size filter. pub min_size: Option<u64>, /// Optional maximum file size filter. pub max_size: Option<u64>, /// Optional filter for files newer than a specific date. pub newer_than: Option<SystemTime>, /// Optional filter for files older than a specific date. pub older_than: Option<SystemTime>, /// Should the scanner use its built-in list of default ignore patterns /// (like `node_modules`, `__pycache__`, `target/`)? pub use_default_ignores: bool, /// An optional keyword to search for within file contents. pub search_keyword: Option<String>, /// Should filesystem type indicators be shown? pub show_filesystems: bool, /// Sort field for results (name, size, date, type) pub sort_field: Option<String>, /// Limit results to top N entries (useful with sort) pub top_n: Option<usize>, /// Include actual line content in search results (for AI/MCP use) pub include_line_content: bool, } // --- Default Ignore Patterns: The "Please Don't Play These Songs" List --- // Every band has songs they'd rather not play. This is our list of files and // directories (`node_modules`, `target/`, etc.) that we usually skip to keep // the show clean and focused on the hits. A tidy tree is a happy tree! const DEFAULT_IGNORE_PATTERNS: &[&str] = &[ // Version control systems (but not all hidden dirs like .ssh) ".git", ".svn", ".hg", ".bzr", "_darcs", // Python artifacts "__pycache__", "*.pyc", "*.pyo", "*.pyd", ".Python", ".pytest_cache", ".tox", ".coverage", "*.egg-info", ".eggs", // Node.js / JavaScript artifacts "node_modules", ".npm", ".yarn", ".pnpm-store", "bower_components", ".next", ".nuxt", // General cache directories often found in projects ".cache", // Common cache dir name // Virtual environments "venv", "env", "ENV", "virtualenv", ".venv", ".env", "conda-meta", // Build/compilation artifacts from various languages/systems "target", // Rust "build", "dist", "out", "bin", "obj", // Common build output dirs "*.o", "*.a", "*.so", "*.dll", "*.dylib", // Object files, libraries // Package manager caches/data ".cargo", ".rustup", // Rust ".gem", ".bundle", // Ruby // IDEs and editor-specific files/directories ".idea", ".vscode", ".vs", // Common IDE metadata "*.swp", "*.swo", "*~", // Vim/editor backup/swap files ".project", ".classpath", ".settings", // Eclipse/Java // Development tool caches ".mypy_cache", ".ruff_cache", ".hypothesis", ".pytest_cache", ".tox", ".coverage", ".sass-cache", // OS-specific junk files ".DS_Store", // macOS "Thumbs.db", // Windows "desktop.ini", // Windows "$RECYCLE.BIN", // Windows recycle bin // Common temporary file/directory names and patterns "tmp", "temp", ".tmp", ".temp", "*.tmp", "*.temp", // More cache directories ".sass-cache", // Sass CSS preprocessor "__MACOSX", // macOS archive metadata // System directories that are almost never useful to traverse deeply from a user's project root. // These are more aggressively ignored if `st` is run on `/`. // "proc", "sys", "dev", "lost+found", "mnt", "media", // Handled by DEFAULT_SYSTEM_PATHS // Other common ignores ".vagrant", ".terraform", ]; // Default paths that are almost always too noisy or problematic to scan, // especially if `st` is run from `/` or a very high-level directory. // These are typically mount points for virtual filesystems or system-critical areas. const DEFAULT_SYSTEM_PATHS: &[&str] = &[ "/proc", "/sys", "/dev", "/run", "/tmp", "/var/tmp", "/lost+found", "/mnt", "/media", "/snap", // Common mount points or special dirs ]; // Specific individual files (absolute paths) that should always be ignored // due to their special nature (e.g., virtual files representing system memory). const DEFAULT_IGNORE_FILES: &[&str] = &[ "/proc/kcore", // Virtual file representing physical memory, can be huge & slow. "/proc/kmsg", // Kernel messages, can be an infinite stream. "/proc/kallsyms", // Kernel symbols, can be large. ]; /// # Scanner: The Rock Star of our Show /// /// BEHOLD! The `Scanner` itself! This is the main act. It takes the config, /// the ignore lists, and a path, and it puts on a spectacular show of directory /// traversal. It's fast, it's smart, and it knows all the best moves. pub struct Scanner { /// The configuration for this scanning operation. config: ScannerConfig, /// Compiled `GlobSet` from `.gitignore` files, if respected and found. gitignore: Option<GlobSet>, /// Compiled `GlobSet` from our `DEFAULT_IGNORE_PATTERNS`. default_ignores: Option<GlobSet>, /// A set of absolute system paths to ignore (e.g., /proc, /sys). system_paths: HashSet<PathBuf>, /// A set of specific absolute file paths to ignore (e.g., /proc/kcore). ignore_files: HashSet<PathBuf>, /// The root path from which the scan originates. root: PathBuf, /// Safety limits to prevent crashes on large directories safety_limits: ScannerSafetyLimits, } impl Scanner { /// Returns the canonicalized root path of the scanner pub fn root(&self) -> &Path { &self.root } /// Quick scan for basic project analysis - lighter weight than full scan /// Returns only basic stats and key files for faster integration pub fn quick_scan(&self) -> Result<(Vec<FileNode>, TreeStats)> { let mut config = self.config.clone(); config.max_depth = 3; // Limit depth for quick scan let quick_scanner = Scanner::new(&self.root, config)?; quick_scanner.scan() } /// Find files modified within a specific time range /// Useful for finding recent activity in projects pub fn find_recent_files(&self, hours_ago: u64) -> Result<Vec<FileNode>> { let cutoff_time = std::time::SystemTime::now() - std::time::Duration::from_secs(hours_ago * 3600); let (nodes, _) = self.scan()?; Ok(nodes .into_iter() .filter(|node| !node.is_dir && node.modified > cutoff_time) .collect()) } /// Get key project files (build configs, main files, etc.) /// Returns a filtered list of important files for project analysis pub fn find_key_files(&self) -> Result<Vec<FileNode>> { let (nodes, _) = self.scan()?; let important_patterns = [ "main.rs", "lib.rs", "mod.rs", "package.json", "Cargo.toml", "requirements.txt", "pyproject.toml", "README.md", "LICENSE", "Makefile", "CMakeLists.txt", "index.js", "app.js", "server.js", "main.js", "main.py", "__init__.py", "setup.py", "go.mod", "main.go", "pom.xml", "build.gradle", "build.xml", ".gitignore", "docker-compose.yml", "Dockerfile", ]; Ok(nodes .into_iter() .filter(|node| { if node.is_dir { return false; } let file_name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or(""); important_patterns.contains(&file_name) }) .collect()) } /// ## `get_file_category` /// Determines a `FileCategory` for a given path and `FileType`. /// This function uses a series of heuristics based on file extensions and common names /// to classify files into broad categories, useful for display and understanding content. /// It's like a quick identification guide for files! fn get_file_category(path: &Path, file_type: FileType) -> FileCategory { // Directories don't get a specific content category here; their content defines them. if matches!(file_type, FileType::Directory) { return FileCategory::Unknown; } // First, check for some very specific system file names. if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if name == "swap.img" || name == "swapfile" || name.starts_with("vmlinuz") || name.starts_with("initrd") { return FileCategory::SystemFile; } } // Primary categorization is by file extension. if let Some(ext) = path.extension().and_then(|e| e.to_str()) { match ext.to_lowercase().as_str() { // --- Programming Languages --- "rs" => FileCategory::Rust, "py" | "pyw" | "pyx" | "pyi" => FileCategory::Python, "js" | "mjs" | "cjs" => FileCategory::JavaScript, "ts" | "tsx" => FileCategory::TypeScript, "java" | "class" | "jar" => FileCategory::Java, "c" | "h" => FileCategory::C, "cpp" | "cc" | "cxx" | "hpp" | "hxx" => FileCategory::Cpp, "go" => FileCategory::Go, "rb" => FileCategory::Ruby, "php" => FileCategory::PHP, "sh" | "bash" | "zsh" | "fish" | "ps1" | "bat" | "cmd" => FileCategory::Shell, // --- Markup/Data --- "md" | "markdown" => FileCategory::Markdown, "html" | "htm" => FileCategory::Html, "css" | "scss" | "sass" | "less" => FileCategory::Css, "json" | "jsonc" | "geojson" => FileCategory::Json, "yaml" | "yml" => FileCategory::Yaml, "xml" | "svg" | "plist" | "kml" | "gpx" => FileCategory::Xml, // SVG and others are XML-based "toml" => FileCategory::Toml, // --- Build/Config (some are also by name) --- "dockerfile" => FileCategory::Dockerfile, // Extension variant // .gitignore, .gitconfig are usually by name, handled below // --- Archives --- "zip" | "tar" | "gz" | "tgz" | "bz2" | "tbz2" | "xz" | "txz" | "7z" | "rar" => { FileCategory::Archive } // --- Media --- "jpg" | "jpeg" | "png" | "gif" | "bmp" | "ico" | "webp" | "tiff" | "tif" | "heic" | "heif" => FileCategory::Image, "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" | "mpeg" | "mpg" => { FileCategory::Video } "mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" | "m4a" => FileCategory::Audio, // --- Binary/Executable (some overlap with system, but these are common distributable/object formats) --- "exe" | "dll" | "so" | "dylib" | "o" | "a" | "lib" | "msi" | "deb" | "rpm" | "app" => FileCategory::Binary, // --- Database Files --- "db" | "sqlite" | "sqlitedb" | "sqlite3" | "db3" | "db4" | "db5" | "mdb" | "accdb" | "dbf" => FileCategory::Database, // --- Office & Documents --- "doc" | "docx" | "odt" | "rtf" => FileCategory::Office, "xls" | "xlsx" | "ods" | "csv" | "tsv" => FileCategory::Spreadsheet, "ppt" | "pptx" | "odp" => FileCategory::PowerPoint, "pdf" => FileCategory::Pdf, "epub" | "mobi" | "azw" | "azw3" | "fb2" => FileCategory::Ebook, // --- Text & Config Files --- "txt" | "text" => FileCategory::Txt, "log" => FileCategory::Log, "ini" | "cfg" | "conf" | "config" | "properties" | "env" => FileCategory::Config, // --- Security & Crypto --- "crt" | "cert" | "pem" | "key" | "pub" | "cer" | "der" => FileCategory::Certificate, "gpg" | "pgp" | "aes" | "enc" | "asc" => FileCategory::Encrypted, // --- Fonts --- "ttf" | "otf" | "woff" | "woff2" | "eot" | "fon" | "fnt" => FileCategory::Font, // --- Disk Images --- "img" | "vdi" | "vmdk" | "vhd" | "vhdx" | "dd" | "hdd" | "qcow" | "qcow2" => { FileCategory::DiskImage } "iso" | "dmg" => FileCategory::DiskImage, // These can be both archives and disk images, but treating as disk images // --- 3D & CAD --- "obj" | "stl" | "dae" | "fbx" | "blend" | "3ds" | "ply" | "gltf" | "glb" => { FileCategory::Model3D } // --- Scientific & Data --- "ipynb" => FileCategory::Jupyter, "rdata" | "rds" | "rda" => FileCategory::RData, "m" | "mat" | "mlx" => FileCategory::Matlab, // --- Web Assets --- "wasm" | "map" | "sourcemap" => FileCategory::WebAsset, // --- Memory Files (MEM|8!) --- "mem8" | "m8" => FileCategory::Memory, // --- Backup & Temp --- "bak" | "backup" | "old" | "orig" => FileCategory::Backup, "tmp" | "temp" | "swp" | "swo" | "swn" => FileCategory::Temp, _ => FileCategory::Unknown, // Extension not recognized } } else { // No extension, or extension parsing failed. Try common filenames. if let Some(name) = path.file_name().and_then(|n| n.to_str()) { // Check for test files if name.starts_with("test_") || name.ends_with("_test") || name.contains(".test.") || name.contains(".spec.") { return FileCategory::Test; } // Check for specific filenames match name { "Makefile" | "makefile" | "GNUmakefile" => FileCategory::Makefile, "Dockerfile" => FileCategory::Dockerfile, ".gitignore" | ".gitconfig" | ".gitattributes" | ".gitmodules" => { FileCategory::GitConfig } "LICENSE" | "LICENCE" | "COPYING" => FileCategory::License, "README" | "README.md" | "README.txt" | "README.rst" => FileCategory::Readme, "package.json" | "Cargo.toml" | "requirements.txt" | "pyproject.toml" | "pom.xml" | "build.gradle" | "go.mod" | "composer.json" => { FileCategory::Package } "package-lock.json" | "Cargo.lock" | "yarn.lock" | "pnpm-lock.yaml" | "poetry.lock" | "Gemfile.lock" => FileCategory::Lock, _ => { // Check for backup files ending with ~ if name.ends_with('~') { FileCategory::Backup } else if matches!(file_type, FileType::Executable) { FileCategory::Binary } else { FileCategory::Unknown } } } } else { FileCategory::Unknown // Path has no filename component (should be rare for actual files). } } } /// ## `Scanner::new` - Constructor /// /// Creates a new `Scanner` instance. This involves: /// 1. Storing the provided `config` and `root` path. /// 2. Loading and compiling `.gitignore` patterns if `config.respect_gitignore` is true. /// 3. Compiling the `DEFAULT_IGNORE_PATTERNS` if `config.use_default_ignores` is true. /// 4. Initializing sets of system paths and specific files to always ignore. /// /// This setup prepares the scanner for efficient `should_ignore` checks during traversal. pub fn new(root: &Path, config: ScannerConfig) -> Result<Self> { // Canonicalize the root path to get the absolute path // If canonicalize fails (e.g., path doesn't exist), fall back to absolute path let canonical_root = root .canonicalize() .or_else(|_| std::env::current_dir().map(|cwd| cwd.join(root))) .unwrap_or_else(|_| root.to_path_buf()); // Load .gitignore patterns from the root directory if requested. let gitignore = if config.respect_gitignore { Self::load_gitignore(&canonical_root)? // This can return None if no .gitignore or error. } else { None // Not respecting .gitignore. }; // Build the GlobSet for default ignore patterns if requested. let default_ignores = if config.use_default_ignores { Self::build_default_ignores()? // This can return None if patterns are invalid (unlikely for defaults). } else { None // Not using default ignores.st }; // Initialize the set of system paths to ignore (e.g., /proc, /sys). let system_paths: HashSet<PathBuf> = if config.use_default_ignores { DEFAULT_SYSTEM_PATHS .iter() .map(PathBuf::from) // Convert string slices to PathBufs .collect() // Collect into a HashSet for quick lookups. } else { HashSet::new() // Empty set if not using default ignores. }; // Initialize the set of specific files to ignore (e.g., /proc/kcore). let ignore_files: HashSet<PathBuf> = if config.use_default_ignores { DEFAULT_IGNORE_FILES.iter().map(PathBuf::from).collect() } else { HashSet::new() }; // Determine appropriate safety limits based on the path let safety_limits = if canonical_root == PathBuf::from(&std::env::var("HOME").unwrap_or_default()) { // Home directory needs special care ScannerSafetyLimits::for_home_directory() } else if canonical_root.starts_with("/") && canonical_root.components().count() <= 2 { // Root or near-root paths need limits ScannerSafetyLimits::for_home_directory() } else { // Regular directories can use default limits ScannerSafetyLimits::default() }; Ok(Self { config, gitignore, default_ignores, system_paths, ignore_files, root: canonical_root, // Store a copy of the root path. safety_limits, }) } /// ## `build_default_ignores` /// /// Compiles the `DEFAULT_IGNORE_PATTERNS` array into a `GlobSet` for efficient matching. /// This `GlobSet` is used to quickly check if a file/directory name matches any of the /// common patterns we want to ignore by default (like `node_modules`, `target/`). /// Returns `Ok(Some(GlobSet))` on success, or `Ok(None)` if no patterns (should not happen), /// or an `Err` if glob compilation fails (very unlikely for our hardcoded patterns). fn build_default_ignores() -> Result<Option<GlobSet>> { let mut builder = GlobSetBuilder::new(); // Start with an empty builder. // Add each default pattern to the builder. for pattern_str in DEFAULT_IGNORE_PATTERNS { // Glob::new can fail if the pattern is malformed, but ours should be fine. if let Ok(glob) = Glob::new(pattern_str) { builder.add(glob); } // Silently ignore malformed default patterns, though this shouldn't occur. } // Build the GlobSet from the accumulated patterns. // This can fail if, for example, the set is empty or patterns are incompatible, // but again, highly unlikely for our predefined set. Ok(Some(builder.build()?)) } /// ## `load_gitignore` /// /// Reads the `.gitignore` file from the specified `root` directory (if it exists) /// and compiles its patterns into a `GlobSet`. /// Lines starting with `#` (comments) and empty lines are ignored. /// Returns `Ok(Some(GlobSet))` if `.gitignore` is found and parsed, /// `Ok(None)` if no `.gitignore` file exists, or an `Err` on I/O or parsing issues. fn load_gitignore(root: &Path) -> Result<Option<GlobSet>> { let gitignore_path = root.join(".gitignore"); // Construct path to .gitignore. if !gitignore_path.exists() { return Ok(None); // No .gitignore file found, nothing to load. } let mut builder = GlobSetBuilder::new(); // Read the entire .gitignore file, handling non-UTF-8 content gracefully let content = match fs::read(&gitignore_path) { Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(), Err(e) => { eprintln!( "Warning: Could not read .gitignore at {:?}: {}", gitignore_path, e ); return Ok(None); } }; // Process each line of the .gitignore file. for line in content.lines() { let trimmed_line = line.trim(); // Remove leading/trailing whitespace. // Ignore empty lines and lines that are comments (start with '#'). if !trimmed_line.is_empty() && !trimmed_line.starts_with('#') { // Attempt to compile the line as a glob pattern. // If successful, add it to our GlobSet builder. if let Ok(glob) = Glob::new(trimmed_line) { builder.add(glob); } // Malformed patterns in user's .gitignore are silently skipped. } } // Build the final GlobSet from all valid patterns. Ok(Some(builder.build()?)) } /// Stream nodes as they are discovered /// This version of scan is optimized for the `--stream` flag. /// It sends `FileNode` objects through the `sender` channel as soon as they are processed. /// This allows the formatter to start displaying output immediately, which is great for large directories. /// Returns the final `TreeStats` once the scan is complete. pub fn scan_stream(&self, sender: mpsc::Sender<FileNode>) -> Result<TreeStats> { let mut stats = TreeStats::default(); // When searching, we need to collect all nodes first to determine which directories to show if self.config.search_keyword.is_some() { // Use the non-streaming scan and then send results in order let (nodes, stats) = self.scan()?; for node in nodes { if sender.send(node).is_err() { break; // Receiver disconnected } } return Ok(stats); } // Initialize safety tracker for streaming mode let safety_tracker = ScannerSafetyTracker::new(self.safety_limits.clone()); // Original streaming logic for non-search cases let mut walker = WalkDir::new(&self.root) .max_depth(self.config.max_depth) .follow_links(self.config.follow_symlinks) .into_iter(); // Loop through each entry provided by WalkDir. while let Some(entry_result) = walker.next() { // Check safety limits if let Err(safety_error) = safety_tracker.should_continue() { eprintln!("⚠️ {}", safety_error); eprintln!(" Use --max-depth or scan a more specific directory"); break; } match entry_result { Ok(entry) => { // Successfully read a directory entry. let depth = entry.depth(); let path = entry.path(); // Determine if this entry should be ignored based on various rules. let is_ignored_by_rules = self.should_ignore(path)?; if is_ignored_by_rules { // The entry matches an ignore rule. if self.config.show_ignored { // If we're showing ignored items, process it but mark as ignored. if let Some(mut node) = self.process_entry(&entry, depth, is_ignored_by_rules)? { // Perform content search if applicable, even for ignored files being shown. if !node.is_dir && self.should_search_file(&node) { node.search_matches = self.search_in_file(&node.path); } // Track node for safety limits safety_tracker.add_file(estimate_node_size( node.path.to_string_lossy().len(), )); // Send the (ignored) node through the channel. if sender.send(node.clone()).is_err() { break; // Receiver has disconnected, stop scanning. } // Update stats for ignored items if they aren't permission-denied. // This ensures `show_ignored` gives a full picture. if !node.permission_denied { stats.update_file(&node); } } // If this ignored item is a directory, tell WalkDir not to descend into it. if entry.file_type().is_dir() { // `ignored_dirs.insert(path.to_path_buf());` // Not strictly needed if just skipping. walker.skip_current_dir(); } } else { // We are *not* showing ignored items, and this one is ignored. // If it's a directory, skip its contents. Otherwise, just continue. if entry.file_type().is_dir() { walker.skip_current_dir(); } // `continue;` // Implicitly done by not processing further. } } else { // The entry is NOT ignored by rules. Process it normally. if let Some(mut node) = self.process_entry(&entry, depth, false)? { // `is_ignored` is false here // Perform content search if applicable. if !node.is_dir && self.should_search_file(&node) { node.search_matches = self.search_in_file(&node.path); } // Apply filters (size, date, type, find pattern). // A file is included if it's a directory, or it matches filters, or it has a search match. let has_search_match = node .search_matches .as_ref() .is_some_and(|m| m.total_count > 0); // If we have a search keyword, only include files with matches let should_include_file = if self.config.search_keyword.is_some() { has_search_match } else { self.should_include(&node) }; if node.is_dir || should_include_file { // Track node for safety limits safety_tracker.add_file(estimate_node_size( node.path.to_string_lossy().len(), )); // Send the processed node through the channel. if sender.send(node.clone()).is_err() { break; // Receiver disconnected. } // Update statistics for included, non-permission-denied items. if !node.permission_denied { stats.update_file(&node); } } } else { // process_entry returned None, which means this is a hidden entry and show_hidden is false // If it's a directory, we need to skip its contents if entry.file_type().is_dir() { walker.skip_current_dir(); } } } } Err(e) => { // An error occurred trying to access a directory entry (e.g., permission denied). if let Some(path) = e.path() { let depth = e.depth(); // Check if this is a "directory contents" error vs "directory entry" error. // If this is a permission error, it's likely we already processed the directory // entry successfully but can't read its contents. In that case, skip creating // a duplicate node since we already marked the original as permission_denied. let is_contents_error = e.io_error().is_some_and(|io_err| { io_err.kind() == std::io::ErrorKind::PermissionDenied }); if !is_contents_error { // Create a special node representing the permission-denied entry. let node = self.create_permission_denied_node(path, depth); safety_tracker .add_file(estimate_node_size(node.path.to_string_lossy().len())); if sender.send(node.clone()).is_err() { break; // Receiver disconnected. } // Still update stats (e.g., directory count) for permission-denied entries if shown. stats.update_file(&node); } // Tell WalkDir not to try to descend into this unreadable directory. walker.skip_current_dir(); } // If the error is not path-specific, it might be logged or ignored depending on severity. // For now, we primarily handle path-specific errors like permission issues. } } } // Scan complete, return the accumulated statistics. Ok(stats) } /// ## `should_search_file` /// This function is called before `search_in_file` to decide if it's worth attempting a search. /// It checks if a search keyword is configured and if the file is likely text-based. fn should_search_file(&self, node: &FileNode) -> bool { // No search keyword? No search. if self.config.search_keyword.is_none() { return false; } // If there's a file type filter, only search files that match it if let Some(ref filter_ext) = self.config.file_type_filter { if let Some(ext) = node.path.extension() { if ext.to_str() != Some(filter_ext) { return false; } } else { // No extension, doesn't match filter return false; } } // Skip directories, symlinks, and special files. if node.is_dir || node.is_symlink || node.permission_denied { return false; } // Skip binary and system files based on category. matches!( node.category, FileCategory::Rust | FileCategory::Python | FileCategory::JavaScript | FileCategory::TypeScript | FileCategory::Java | FileCategory::C | FileCategory::Cpp | FileCategory::Go | FileCategory::Ruby | FileCategory::PHP | FileCategory::Shell | FileCategory::Markdown | FileCategory::Html | FileCategory::Css | FileCategory::Json | FileCategory::Yaml | FileCategory::Xml | FileCategory::Toml | FileCategory::Makefile | FileCategory::Dockerfile | FileCategory::GitConfig ) } /// ## `search_in_file` /// /// Searches for the configured keyword within a file and returns match information. /// Returns line and column positions for each match, up to a reasonable limit. /// The search is case-sensitive. Optionally includes the actual line content. fn search_in_file(&self, path: &Path) -> Option<SearchMatches> { // Ensure there's a keyword to search for. let keyword = self.config.search_keyword.as_ref()?; if keyword.is_empty() { return None; } // Attempt to open the file for reading. let file = match fs::File::open(path) { Ok(f) => f, Err(_) => return None, }; let mut positions = Vec::new(); let mut line_content_vec = Vec::new(); let reader = BufReader::new(file); let mut line_number = 1; let mut first_match: Option<(usize, usize)> = None; let mut total_count = 0; // Read and process the file line by line. for line_result in reader.lines() { match line_result { Ok(line_content) => { // Find all occurrences of the keyword in the current line. let mut line_has_match = false; let mut first_column_in_line = None; for (column_index, _) in line_content.match_indices(keyword) { total_count += 1; line_has_match = true; // Column numbers are 1-based for user display let match_pos = (line_number, column_index + 1); if first_match.is_none() { first_match = Some(match_pos); } if first_column_in_line.is_none() { first_column_in_line = Some(column_index + 1); } // Only store first 100 positions to prevent memory issues if positions.len() < 100 { positions.push(match_pos); } // Stop processing this file if we've found too many matches if total_count > 100 { let line_content_option = if self.config.include_line_content { Some(line_content_vec) } else { None }; return Some(SearchMatches { first_match: first_match.unwrap(), total_count, positions, truncated: true, line_content: line_content_option, }); } } // If this line has matches and we're including content, add it if line_has_match && self.config.include_line_content && line_content_vec.len() < 100 { line_content_vec.push(( line_number, line_content.clone(), first_column_in_line.unwrap(), )); } line_number += 1; } Err(_) => { // Invalid UTF-8 or other error, stop searching this file break; } } } // Return matches if any were found first_match.map(|first| { let line_content_option = if self.config.include_line_content && !line_content_vec.is_empty() { Some(line_content_vec) } else { None }; SearchMatches { first_match: first, total_count, positions, truncated: false, line_content: line_content_option, } }) } /// ## `scan` - The Full Scan (Non-Streaming) /// /// Performs a complete directory scan, collecting all `FileNode`s that meet the criteria /// (not ignored, or shown if ignored, and pass filters if any). /// This method first traverses the entire directory structure defined by `config.max_depth`, /// creating `FileNode` objects for each entry. It then performs a second pass if filters /// are active to ensure that directories are only included if they (or their subdirectories) /// contain files that match the filters. /// Returns a tuple: `(Vec<FileNode>, TreeStats)`. /// ## `scan` - The "Scan-It-All-Then-Sort-It-Out" Method /// /// This is the classic way to scan. It's a two-act show: /// 1. **Act I**: Walk through every single file and directory, collecting a huge list of `FileNode`s. /// 2. **Act II**: If there are filters, go through that huge list and pick out only the ones that /// match, making sure to keep their parent directories so the tree still makes sense. /// It's thorough and great for when you need the whole picture before making decisions. pub fn scan(&self) -> Result<(Vec<FileNode>, TreeStats)> { let mut all_nodes_collected = Vec::new(); // Stores all nodes initially encountered. // `ignored_dirs` was here, but its primary use with `skip_current_dir` is within the loop. // If we need to track them for other reasons post-loop, it could be reinstated. // Initialize safety tracker let safety_tracker = ScannerSafetyTracker::new(self.safety_limits.clone()); let mut walker = WalkDir::new(&self.root) .max_depth(self.config.max_depth) .follow_links(self.config.follow_symlinks) .into_iter(); while let Some(entry_result) = walker.next() { // Check safety limits if let Err(safety_error) = safety_tracker.should_continue() { eprintln!("⚠️ {}", safety_error); eprintln!(" Use --max-depth, --stream mode, or scan a more specific directory"); break; } match entry_result { Ok(entry) => { let depth = entry.depth(); let path = entry.path(); let is_ignored_by_rules = self.should_ignore(path)?; if is_ignored_by_rules { if self.config.show_ignored { // Process and add the ignored entry. if let Some(mut node) = self.process_entry(&entry, depth, true)? { if !node.is_dir && self.should_search_file(&node) { node.search_matches = self.search_in_file(&node.path); } safety_tracker.add_file(estimate_node_size( node.path.to_string_lossy().len(), )); all_nodes_collected.push(node); } if entry.file_type().is_dir() { walker.skip_current_dir(); // Don't descend into ignored dirs if showing them. } } else { // Not showing ignored, and it's a directory: skip its contents. if entry.file_type().is_dir() { walker.skip_current_dir(); } // If it's a file, it's simply skipped by not adding to `all_nodes_collected`. } } else { // Not ignored by rules, process normally. if let Some(mut node) = self.process_entry(&entry, depth, false)? { if !node.is_dir && self.should_search_file(&node) { node.search_matches = self.search_in_file(&node.path); } all_nodes_collected.push(node); } else { // process_entry returned None, which means this is a hidden entry and show_hidden is false // If it's a directory, we need to skip its contents if entry.file_type().is_dir() { walker.skip_current_dir(); } } } } Err(e) => { // Handle errors like permission denied. if let Some(path) = e.path() { let depth = e.depth(); all_nodes_collected.push(self.create_permission_denied_node(path, depth)); if e.io_error().is_some_and(|io_err| { io_err.kind() == std::io::ErrorKind::PermissionDenied }) { walker.skip_current_dir(); // Skip unreadable directory. } } } } } // If filters are active, we need a second pass to ensure directories are only included // if they contain (or lead to) matching files. // Also, calculate stats based on the *final* list of nodes. let (final_nodes, final_stats) = if self.has_active_filters() { self.filter_nodes_and_calculate_stats(all_nodes_collected) } else { // No filters, so all collected nodes are final. Calculate stats on them. let mut stats = TreeStats::default(); for node in &all_nodes_collected { // Only update stats for non-permission-denied items, or items that are directories. // (Permission denied files usually have size 0 and aren't "counted" in the same way). if !node.permission_denied || node.is_dir { stats.update_file(node); } } (all_nodes_collected, stats) }; // Apply sorting and top-N filtering if requested let sorted_nodes = self.apply_sorting_and_limit(final_nodes); Ok((sorted_nodes, final_stats)) } /// ## `has_active_filters` /// /// Helper function to quickly check if any of the primary filtering criteria /// (find pattern, type, size, date) are currently set in the configuration. /// This determines if the second filtering pass (`filter_nodes_and_calculate_stats`) is needed. /// Note: `search_keyword` is handled slightly differently; it can make a file appear /// even if other filters would exclude it, so it's part of `should_include` logic. fn has_active_filters(&self) -> bool { self.config.find_pattern.is_some() || self.config.file_type_filter.is_some() || self.config.entry_type_filter.is_some() || self.config.min_size.is_some() || self.config.max_size.is_some() || self.config.newer_than.is_some() || self.config.older_than.is_some() || self.config.search_keyword.is_some() // Now search_keyword is also a filter } /// ## `filter_nodes_and_calculate_stats` (Formerly `filter_nodes_with_ancestors`) /// /// This crucial function takes all nodes collected during the initial traversal /// and filters them based on the `ScannerConfig`. It ensures that: /// 1. Files are included if they directly match all active filters OR if they contain a search match. /// 2. Directories are included if they themselves match a `--find` pattern OR /// if they are an ancestor of an included file. /// It then calculates `TreeStats` based on this final, filtered list of nodes. /// This replaces the older `filter_nodes_with_ancestors` to integrate stat calculation /// and clarify the logic for directory inclusion with `--find`. fn filter_nodes_and_calculate_stats( &self, all_nodes_collected: Vec<FileNode>, ) -> (Vec<FileNode>, TreeStats) { let mut final_stats = TreeStats::default(); let mut included_files_and_matching_dirs = Vec::new(); // Files that pass filters, and Dirs that match --find let mut required_ancestor_dirs = HashSet::new(); // Ancestors of included_files // --- Pass 1: Identify matching files and directories that directly match --find --- for node in &all_nodes_collected { if node.permission_denied { // Skip permission denied entries for filtering logic continue; } let has_search_match = node .search_matches .as_ref() .is_some_and(|m| m.total_count > 0); if node.is_dir { // For directories, only the --find pattern applies directly. // Other filters (size, date, type) don't apply to directories themselves. if self .config .find_pattern .as_ref() .is_some_and(|p| p.is_match(&node.path.to_string_lossy())) { included_files_and_matching_dirs.push(node.clone()); // Add ancestors of this directly matched directory let mut current = node.path.parent(); while let Some(parent_path) = current { if parent_path == self.root || required_ancestor_dirs.contains(parent_path) { break; } required_ancestor_dirs.insert(parent_path.to_path_buf()); current = parent_path.parent(); } } } else { // For files, check if it passes all filters OR has a search match. // If we have a search keyword, ONLY include files with search matches if self.config.search_keyword.is_some() { if has_search_match { // Even with search matches, the file must still pass other filters if self.should_include(node) { included_files_and_matching_dirs.push(node.clone()); // Add all ancestors of this matching file to `required_ancestor_dirs`. let mut current = node.path.parent(); while let Some(parent_path) = current { // Stop if we reach the root or an already added ancestor. if parent_path == self.root || required_ancestor_dirs.contains(parent_path) { break; } required_ancestor_dirs.insert(parent_path.to_path_buf()); current = parent_path.parent(); } } } } else { // No search keyword, use normal filtering if has_search_match || self.should_include(node) { included_files_and_matching_dirs.push(node.clone()); // Add all ancestors of this matching file to `required_ancestor_dirs`. let mut current = node.path.parent(); while let Some(parent_path) = current { // Stop if we reach the root or an already added ancestor. if parent_path == self.root || required_ancestor_dirs.contains(parent_path) { break; } required_ancestor_dirs.insert(parent_path.to_path_buf()); current = parent_path.parent(); } } } } } // --- Pass 2: Build the final list of nodes --- let mut final_node_list = Vec::new(); let mut added_paths = HashSet::new(); // To prevent duplicates if a dir is both an ancestor and matches --find // Always add the root node if there's anything to show. if !included_files_and_matching_dirs.is_empty() { if let Some(root_node) = all_nodes_collected.iter().find(|n| n.path == self.root) { if added_paths.insert(root_node.path.clone()) { final_node_list.push(root_node.clone()); } } } // Add required ancestor directories and directly matching directories from `all_nodes_collected`. for node in &all_nodes_collected { if node.permission_denied { // Also include permission denied nodes if they are part of the path if (required_ancestor_dirs.contains(&node.path) || node.path == self.root && !final_node_list.is_empty()) && added_paths.insert(node.path.clone()) { final_node_list.push(node.clone()); } continue; } if node.is_dir { // Is it a required ancestor OR a directory that itself matched --find? let is_find_match = self .config .find_pattern .as_ref() .is_some_and(|p| p.is_match(&node.path.to_string_lossy())); if (required_ancestor_dirs.contains(&node.path) || (is_find_match && node.path != self.root)) && added_paths.insert(node.path.clone()) { final_node_list.push(node.clone()); } } } // Add the files that passed filters or had search matches. for node in included_files_and_matching_dirs { // If it's a directory, it was already handled above (if it matched --find). // If it's a file, add it now. if !node.is_dir { if added_paths.insert(node.path.clone()) { final_node_list.push(node); } } else { // It's a directory that matched --find if added_paths.insert(node.path.clone()) { final_node_list.push(node); } } } // Sort the final list by path for consistent output. final_node_list.sort_by(|a, b| a.path.cmp(&b.path)); // --- Pass 3: Calculate stats on the final_node_list --- for node in &final_node_list { // Update stats, ensuring not to double-count or miscount permission-denied entries. if !node.permission_denied || node.is_dir { // Dirs (even denied) contribute to dir count. final_stats.update_file(node); } } (final_node_list, final_stats) } /// ## `process_entry` /// /// Converts a `walkdir::DirEntry` into our `FileNode` struct. /// This involves fetching metadata, determining file type, category, hidden status, etc. /// It also incorporates the `is_ignored_by_rules` status passed to it. /// Returns `Ok(Some(FileNode))` on success, `Ok(None)` if the entry should be skipped /// (e.g., hidden and not showing hidden), or an `Err` if metadata access fails. /// The `is_ignored_by_rules` parameter tells this function if `should_ignore` already determined this node is ignored. fn process_entry( &self, entry: &DirEntry, depth: usize, is_ignored_by_rules: bool, ) -> Result<Option<FileNode>> { let path = entry.path(); // Determine if the file is hidden (starts with '.'). let is_hidden = path .file_name() .and_then(|name_osstr| name_osstr.to_str()) // Convert OsStr to &str .is_some_and(|name_str| name_str.starts_with('.')); // Skip if hidden and we are not configured to show hidden files, // UNLESS it's an ignored item that we *are* configured to show (is_ignored_by_rules = true, config.show_ignored = true). // The `is_ignored_by_rules` flag takes precedence for display if `config.show_ignored` is true. if is_hidden && !self.config.show_hidden && !is_ignored_by_rules { // If it's a directory, we need to tell walkdir to skip its contents. if entry.file_type().is_dir() { // This is tricky because `process_entry` doesn't have `walker` to call `skip_current_dir()`. // The caller (`scan` or `scan_stream`) handles `skip_current_dir` based on `should_ignore` // and hidden status before calling `process_entry` or by checking the returned node. // For now, returning None signals to the caller that this node (and its children if a dir) // should not be further processed or added, unless `show_ignored` logic overrides. } return Ok(None); // Skip this hidden entry. } // Try to get metadata for the entry. This can fail (e.g., permission denied). let metadata = match entry.metadata() { Ok(md) => md, Err(_e) => { // If metadata fails, it's likely a permission issue or a broken symlink. // We create a special "permission_denied_node" in the calling `scan`/`scan_stream` methods // because they have access to `walker.skip_current_dir()`. // Here, we can't fully form that node, so we might return an error or a partial node. // For simplicity, if metadata fails here, we treat it as an inaccessible entry. // The main scan loops handle creating a FileNode for permission denied errors from WalkDir. // This specific call path implies WalkDir *could* read the entry but metadata() failed. // This is less common than WalkDir itself erroring. // Let's assume the main loops catch this via `Err(e)` from `walker.next()`. // If `process_entry` is called on an entry that `WalkDir` gave Ok for, but `metadata()` fails, // it's an edge case. We'll return a basic node marked as permission denied. return Ok(Some(self.create_permission_denied_node(path, depth))); } }; let file_type = self.determine_file_type(&metadata); let category = Self::get_file_category(path, file_type); // Determine the size. For special virtual files (like in /proc or /sys), // reported size can be misleading (e.g., 0 or huge). We mark these as size 0. let size = if self.is_special_virtual_file(path, &metadata) { 0 } else { metadata.len() }; // Check if this is a directory that we can't read the contents of let permission_denied = if metadata.is_dir() { // Try to read the directory to see if we have permission std::fs::read_dir(path).is_err() } else { false }; Ok(Some(FileNode { path: path.to_path_buf(), is_dir: metadata.is_dir(), size, permissions: Self::get_permissions(&metadata), uid: Self::get_uid(&metadata), gid: Self::get_gid(&metadata), modified: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH), // Fallback for modified time is_symlink: metadata.file_type().is_symlink(), // Use file_type() for symlink check is_hidden, permission_denied, // Set based on whether we can read directory contents is_ignored: is_ignored_by_rules, // Use the pre-determined ignore status. depth, file_type, category, search_matches: None, // Search matches are added later by the caller if needed. filesystem_type: Self::get_filesystem_type(path), })) } /// ## `get_filesystem_type` /// /// Detects the filesystem type for a given path #[cfg(unix)] fn get_filesystem_type(path: &Path) -> FilesystemType { // Skip filesystem detection in CI environments to avoid hangs if std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok() { return FilesystemType::Unknown; } #[cfg(target_os = "linux")] { Self::get_filesystem_type_linux(path) } #[cfg(not(target_os = "linux"))] { // On non-Linux Unix systems, we can't reliably detect filesystem type // Just check for special paths if let Some(path_str) = path.to_str() { if path_str.starts_with("/proc") { return FilesystemType::Procfs; } else if path_str.starts_with("/sys") { return FilesystemType::Sysfs; } else if path_str.starts_with("/dev") { return FilesystemType::Devfs; } } FilesystemType::Unknown } } /// ## `get_filesystem_type_linux` /// /// Detects the filesystem type for a given path using statfs on Linux systems #[cfg(target_os = "linux")] fn get_filesystem_type_linux(path: &Path) -> FilesystemType { // Double-check for CI environment if std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok() { return FilesystemType::Unknown; } use libc::statfs; use std::ffi::CString; use std::mem; // Filesystem magic numbers from statfs.h type FsType = i64; const EXT4_SUPER_MAGIC: FsType = 0xef53; const XFS_SUPER_MAGIC: FsType = 0x58465342; const BTRFS_SUPER_MAGIC: FsType = 0x9123683e; const ZFS_SUPER_MAGIC: FsType = 0x2fc12fc1; const NTFS_SB_MAGIC: FsType = 0x5346544e; const MSDOS_SUPER_MAGIC: FsType = 0x4d44; // FAT const EXFAT_SUPER_MAGIC: FsType = 0x2011bab0; const APFS_SUPER_MAGIC: FsType = 0x42535041; // 'APFS' const HFS_SUPER_MAGIC: FsType = 0x482b; // HFS+ const NFS_SUPER_MAGIC: FsType = 0x6969; const SMB_SUPER_MAGIC: FsType = 0x517b; const TMPFS_MAGIC: FsType = 0x01021994; const PROC_SUPER_MAGIC: FsType = 0x9fa0; const SYSFS_MAGIC: FsType = 0x62656572; const DEVFS_SUPER_MAGIC: FsType = 0x1373; let path_cstr = match CString::new(path.to_string_lossy().as_bytes()) { Ok(s) => s, Err(_) => return FilesystemType::Unknown, }; let mut stat_buf: libc::statfs = unsafe { mem::zeroed() }; let result = unsafe { statfs(path_cstr.as_ptr(), &mut stat_buf) }; if result != 0 { // statfs failed, fall back to path-based detection for virtual filesystems if let Some(path_str) = path.to_str() { if path_str.starts_with("/proc") { return FilesystemType::Procfs; } else if path_str.starts_with("/sys") { return FilesystemType::Sysfs; } else if path_str.starts_with("/dev") { return FilesystemType::Devfs; } } return FilesystemType::Unknown; } // Check for Mem8 filesystem by looking for .mem8 marker files if path.join(".mem8").exists() || path.to_string_lossy().contains("mem8") { return FilesystemType::Mem8; } match stat_buf.f_type { EXT4_SUPER_MAGIC => FilesystemType::Ext4, // TODO: Distinguish ext2/3/4 XFS_SUPER_MAGIC => FilesystemType::Xfs, BTRFS_SUPER_MAGIC => FilesystemType::Btrfs, ZFS_SUPER_MAGIC => FilesystemType::Zfs, NTFS_SB_MAGIC => FilesystemType::Ntfs, MSDOS_SUPER_MAGIC => FilesystemType::Fat32, EXFAT_SUPER_MAGIC => FilesystemType::ExFat, APFS_SUPER_MAGIC => FilesystemType::Apfs, HFS_SUPER_MAGIC => FilesystemType::Hfs, NFS_SUPER_MAGIC => FilesystemType::Nfs, SMB_SUPER_MAGIC => FilesystemType::Smb, TMPFS_MAGIC => FilesystemType::Tmpfs, PROC_SUPER_MAGIC => FilesystemType::Procfs, SYSFS_MAGIC => FilesystemType::Sysfs, DEVFS_SUPER_MAGIC => FilesystemType::Devfs, _ => FilesystemType::Unknown, } } #[cfg(not(unix))] fn get_filesystem_type(_path: &Path) -> FilesystemType { // On non-Unix systems, we can't easily detect filesystem type FilesystemType::Unknown } /// ## `is_virtual_filesystem` /// /// Checks if a path is on a virtual filesystem fn is_virtual_filesystem(path: &Path) -> bool { Self::get_filesystem_type(path).is_virtual() } /// ## `is_special_virtual_file` /// /// Checks if a file is likely a special virtual file (e.g., in /proc, /sys, /dev) /// where reported metadata like size might be zero, misleading, or cause issues if read. /// This helps in deciding to report size as 0 for such files. #[allow(unused_variables)] fn is_special_virtual_file(&self, path: &Path, metadata: &fs::Metadata) -> bool { // Check if the path starts with known virtual filesystem prefixes. if let Some(path_str) = path.to_str() { if path_str.starts_with("/proc/") || path_str.starts_with("/sys/") || path_str.starts_with("/dev/") { return true; } } // Check for specific problematic files by absolute path. if self.ignore_files.contains(path) { // Uses the pre-built HashSet of specific problem files. return true; } // On Unix, check for special file types like character devices, block devices, FIFOs, sockets. // These often have size 0 or non-standard size reporting. #[cfg(unix)] { use std::os::unix::fs::FileTypeExt; // For is_char_device(), is_block_device(), etc. let ft = metadata.file_type(); if ft.is_char_device() || ft.is_block_device() || ft.is_fifo() || ft.is_socket() { return true; } } false // Not determined to be a special virtual file by these checks. } /// ## `create_permission_denied_node` /// /// Helper to create a `FileNode` representing an entry (usually a directory) /// that could not be accessed due to permission errors. /// These nodes are marked specially so formatters can indicate the issue. fn create_permission_denied_node(&self, path: &Path, depth: usize) -> FileNode { FileNode { path: path.to_path_buf(), is_dir: true, // Assume it's a directory, as that's common for permission errors during traversal. size: 0, // No size info available. permissions: 0, // No permission info. uid: 0, // No UID info. gid: 0, // No GID info. modified: SystemTime::UNIX_EPOCH, // Default timestamp. is_symlink: false, is_hidden: false, // Cannot determine if hidden. permission_denied: true, // Mark as permission denied. is_ignored: false, // Not ignored by rules, but inaccessible. depth, file_type: FileType::Directory, // Assume directory. category: FileCategory::Unknown, search_matches: None, filesystem_type: Self::get_filesystem_type(path), } } /// ## `should_ignore` - The Bouncer at the Club Door /// /// This function is our tough-but-fair bouncer. It checks every file and /// directory against our lists (`.gitignore`, default ignores, etc.). /// "Sorry, `node_modules`, you're not on the list tonight." /// It's the first line of defense against clutter. fn should_ignore(&self, path: &Path) -> Result<bool> { // --- Rule 0: Never ignore the root path itself --- // If the user explicitly asks to scan a directory, we should show it // even if it would normally be ignored (e.g., scanning 'target' directory) if path == self.root { return Ok(false); } // --- Rule 1: Check against specific, always-ignored files (absolute paths) --- if self.config.use_default_ignores && self.ignore_files.contains(path) { return Ok(true); // Matches a specific problematic file. } // --- Rule 2: ALWAYS skip virtual filesystems like /proc, /sys, /dev --- // These are checked regardless of use_default_ignores because they're not real files // and can cause issues (huge fake sizes, hangs, etc.) if Self::is_virtual_filesystem(path) { return Ok(true); } // --- Rule 3: Check against other system paths if using default ignores --- if self.config.use_default_ignores { // Check for exact match of a system path. if self.system_paths.contains(path) { return Ok(true); } // Check if the current path is a child of any registered system path. for system_root_path in &self.system_paths { if path.starts_with(system_root_path) { return Ok(true); // It's inside /tmp, /var/tmp, etc. } } } // --- Rule 3: Check against default ignore patterns (GlobSet) --- // These patterns usually match file/directory names or relative paths within a project. if let Some(ref default_ignore_set) = self.default_ignores { // Check if the simple file/directory name matches any default pattern. // (e.g., "node_modules" will match `path/to/project/node_modules`) if let Some(file_name) = path.file_name() { if default_ignore_set.is_match(Path::new(file_name)) { return Ok(true); } } // Also check the path relative to the scan root against default patterns. // This handles patterns like "*.pyc" or "build/outputs/". if let Ok(relative_path_to_root) = path.strip_prefix(&self.root) { if default_ignore_set.is_match(relative_path_to_root) { return Ok(true); } } } // --- Rule 4: Check against .gitignore patterns (GlobSet) --- // These patterns are always relative to the root of the scan (where .gitignore is located). if let Some(ref gitignore_set) = self.gitignore { if let Ok(relative_path_to_root) = path.strip_prefix(&self.root) { if gitignore_set.is_match(relative_path_to_root) { return Ok(true); // Matches a .gitignore pattern. } } // If strip_prefix fails (path is not under root), it can't match .gitignore relative patterns. } // If none of the above rules triggered, the path is not ignored. Ok(false) } /// ## `should_include` - The Velvet Rope /// /// Once a file gets past the bouncer (`should_ignore`), it has to get past /// the velvet rope. This function checks if the file meets the specific criteria /// for this party: "Are you a `.rs` file? Are you bigger than 1MB?" /// Only the coolest files that match all the rules get in. fn should_include(&self, node: &FileNode) -> bool { // --- Filter by --find pattern (applies to both files and directories) --- if let Some(ref find_regex_pattern) = self.config.find_pattern { // Convert path to string for regex matching. Lossy conversion is acceptable for matching. let path_str = node.path.to_string_lossy(); if !find_regex_pattern.is_match(&path_str) { return false; // Path doesn't match the --find pattern. } } // --- Filter by entry type (--entry-type) --- if let Some(ref entry_type) = self.config.entry_type_filter { match entry_type.as_str() { "f" => { if node.is_dir { return false; // Looking for files only, but this is a directory } } "d" => { if !node.is_dir { return false; // Looking for directories only, but this is a file } } _ => {} // Should not happen due to clap validation } } // --- Filters below only apply to files, not directories --- if !node.is_dir { // --- Filter by file extension (--type) --- if let Some(ref required_extension) = self.config.file_type_filter { match node .path .extension() .and_then(|ext_osstr| ext_osstr.to_str()) { Some(file_ext_str) => { if !file_ext_str.eq_ignore_ascii_case(required_extension) { return false; // Extension doesn't match. } } None => return false, // File has no extension, so cannot match. } } // --- Filter by minimum size (--min-size) --- if let Some(min_allowed_size) = self.config.min_size { if node.size < min_allowed_size { return false; // File is too small. } } // --- Filter by maximum size (--max-size) --- if let Some(max_allowed_size) = self.config.max_size { if node.size > max_allowed_size { return false; // File is too large. } } } // End of file-only filters // --- Date filters (apply to both files and directories based on their modification time) --- // --- Filter by newer_than date (--newer-than) --- if let Some(min_modification_date) = self.config.newer_than { if node.modified < min_modification_date { return false; // Entry is older than required. } } // --- Filter by older_than date (--older-than) --- if let Some(max_modification_date) = self.config.older_than { if node.modified > max_modification_date { return false; // Entry is newer than allowed. } } // If all applicable filters passed (or no filters were active for a category), include the node. true } /// ## `determine_file_type` (Helper for `process_entry`) /// /// Examines `fs::Metadata` to determine a more specific `FileType` /// than just `is_dir` or `is_file`. On Unix, this can identify symlinks, /// sockets, FIFOs, block/char devices, and executables (by permission). /// On non-Unix, it's simpler (dir, symlink, or regular file). fn determine_file_type(&self, metadata: &fs::Metadata) -> FileType { #[cfg(unix)] // Unix-specific detailed file type detection { use std::os::unix::fs::FileTypeExt; // For is_socket, is_fifo, etc. let ft = metadata.file_type(); // Get the rich FileType from metadata. if ft.is_dir() { FileType::Directory } else if ft.is_symlink() { // Check symlink before other types, as it can point to them. FileType::Symlink } else if ft.is_socket() { FileType::Socket } else if ft.is_fifo() { // Named pipe FileType::Pipe } else if ft.is_block_device() { FileType::BlockDevice } else if ft.is_char_device() { FileType::CharDevice // Check for executable permission (any of user, group, other execute bits are set). // This applies to regular files that are not dirs, symlinks, or other special types. } else if ft.is_file() && (metadata.permissions().mode() & 0o111 != 0) { FileType::Executable } else { // If none of the above, it's a regular (non-executable) file. FileType::RegularFile } } #[cfg(not(unix))] // Simpler detection for non-Unix platforms { if metadata.is_dir() { FileType::Directory } else if metadata.file_type().is_symlink() { // `is_symlink()` is part of stable `fs::FileType` FileType::Symlink } else { // No easy cross-platform way to check executable bit without external crates or OS-specific calls. // So, on non-Unix, we don't distinguish Executable from RegularFile here. FileType::RegularFile } } } // --- Platform-Dependent Metadata Helpers --- // These provide a consistent way to get permissions, UID, and GID, // with sensible defaults for non-Unix systems where these concepts might not directly apply // or be easily accessible via standard Rust fs::Metadata. #[cfg(unix)] fn get_permissions(metadata: &fs::Metadata) -> u32 { // On Unix, get the mode and mask it to get the permission bits (e.g., 0o755). metadata.permissions().mode() & 0o777 } #[cfg(not(unix))] fn get_permissions(_metadata: &fs::Metadata) -> u32 { 0o755 // A common default permission (rwxr-xr-x) for non-Unix. } #[cfg(unix)] fn get_uid(metadata: &fs::Metadata) -> u32 { metadata.uid() // Get User ID from metadata. } #[cfg(not(unix))] fn get_uid(_metadata: &fs::Metadata) -> u32 { 1000 // Common default UID placeholder for non-Unix. } #[cfg(unix)] fn get_gid(metadata: &fs::Metadata) -> u32 { metadata.gid() // Get Group ID from metadata. } #[cfg(not(unix))] fn get_gid(_metadata: &fs::Metadata) -> u32 { 0 } /// Apply sorting and optional top-N limit to the results fn apply_sorting_and_limit(&self, mut nodes: Vec<FileNode>) -> Vec<FileNode> { // If no sort field specified, return as-is let sort_field = match &self.config.sort_field { Some(field) => field, None => return nodes, }; // Sort based on the field match sort_field.as_str() { "name" | "a-to-z" => { // Sort by name alphabetically (A to Z) nodes.sort_by(|a, b| { let name_a = a.path.file_name().unwrap_or_default().to_string_lossy(); let name_b = b.path.file_name().unwrap_or_default().to_string_lossy(); name_a.cmp(&name_b) }); } "z-to-a" => { // Sort by name reverse alphabetically (Z to A) nodes.sort_by(|a, b| { let name_a = a.path.file_name().unwrap_or_default().to_string_lossy(); let name_b = b.path.file_name().unwrap_or_default().to_string_lossy(); name_b.cmp(&name_a) }); } "size" | "largest" => { // Sort by size descending (largest first) nodes.sort_by(|a, b| b.size.cmp(&a.size)); } "smallest" => { // Sort by size ascending (smallest first) nodes.sort_by(|a, b| a.size.cmp(&b.size)); } "date" | "newest" => { // Sort by modification time descending (newest first) nodes.sort_by(|a, b| b.modified.cmp(&a.modified)); } "oldest" => { // Sort by modification time ascending (oldest first) nodes.sort_by(|a, b| a.modified.cmp(&b.modified)); } "type" => { // Sort by file extension, then by name nodes.sort_by(|a, b| { let ext_a = a.path.extension().unwrap_or_default().to_string_lossy(); let ext_b = b.path.extension().unwrap_or_default().to_string_lossy(); match ext_a.cmp(&ext_b) { std::cmp::Ordering::Equal => { let name_a = a.path.file_name().unwrap_or_default().to_string_lossy(); let name_b = b.path.file_name().unwrap_or_default().to_string_lossy(); name_a.cmp(&name_b) } other => other, } }); } _ => { // Unknown sort field, don't sort eprintln!("Warning: Unknown sort field '{}', ignoring", sort_field); } } // Apply top-N limit if specified if let Some(limit) = self.config.top_n { nodes.truncate(limit); } nodes } } // end impl Scanner /// # `parse_size` - The Universal Translator for Sizes /// /// This handy function takes something a human understands, like "2.5M", and /// translates it into something a computer understands (2,621,440 bytes). /// It's like having a Babel fish for file sizes. Why should we have to do /// that math when the computer can do it for us? pub fn parse_size(size_str: &str) -> Result<u64> { let size_str = size_str.trim().to_uppercase(); if size_str.is_empty() { return Err(anyhow::anyhow!("Empty size string")); } // Find the first alphabetic character which marks the start of the unit. let unit_start_index = size_str .find(|c: char| c.is_alphabetic()) .unwrap_or(size_str.len()); let (num_part_str, unit_part) = size_str.split_at(unit_start_index); // Trim any space from the number part before parsing. let num_part_str = num_part_str.trim(); if num_part_str.is_empty() { return Err(anyhow::anyhow!("Missing number for size string")); } let num: f64 = match num_part_str.parse() { Ok(n) => n, Err(e) => return Err(anyhow::anyhow!("Invalid number '{}': {}", num_part_str, e)), }; // Check for negative numbers. if num.is_sign_negative() { return Err(anyhow::anyhow!("Size cannot be negative: {}", num)); } let multiplier = match unit_part { "K" | "KB" => 1024.0, "M" | "MB" => 1024.0 * 1024.0, "G" | "GB" => 1024.0 * 1024.0 * 1024.0, "T" | "TB" => 1024.0 * 1024.0 * 1024.0 * 1024.0, "B" | "" => 1.0, _ => return Err(anyhow::anyhow!("Invalid size unit: '{}'", unit_part)), }; Ok((num * multiplier) as u64) } // --- Unit Tests: Ensuring Our Scanner Behaves --- // Aye, even the most brilliant code needs tests to keep it honest! // These tests cover some basic functionality of the scanner. #[cfg(test)] mod tests { use super::*; // Import everything from the parent module (scanner.rs). #[test] fn test_parse_size_valid_inputs() { assert_eq!(parse_size("100").unwrap(), 100); assert_eq!(parse_size("100B").unwrap(), 100); assert_eq!(parse_size("1k").unwrap(), 1024); assert_eq!(parse_size("1K").unwrap(), 1024); assert_eq!(parse_size("1KB").unwrap(), 1024); assert_eq!(parse_size("2.5M").unwrap(), (2.5 * 1024.0 * 1024.0) as u64); assert_eq!(parse_size("1GB").unwrap(), 1024 * 1024 * 1024); assert_eq!( parse_size("0.5T").unwrap(), (0.5 * 1024.0 * 1024.0 * 1024.0 * 1024.0) as u64 ); assert_eq!(parse_size(" 2 MB ").unwrap(), 2 * 1024 * 1024); // Test with whitespace } #[test] fn test_parse_size_invalid_inputs() { assert!(parse_size("100X").is_err()); assert!(parse_size("garbage").is_err()); assert!(parse_size("-100M").is_err()); assert!(parse_size("1..5K").is_err()); } #[test] fn test_parse_size_zero_and_empty() { assert_eq!(parse_size("0").unwrap(), 0); assert!(parse_size("").is_err()); assert!(parse_size(" ").is_err()); } // Basic test for Scanner creation. More comprehensive tests would involve // creating a temporary directory structure and verifying scan results. #[test] fn test_scanner_creation_defaults() { let temp_dir = tempfile::tempdir().unwrap(); let config = ScannerConfig { max_depth: 5, follow_symlinks: false, respect_gitignore: true, show_hidden: false, show_ignored: false, find_pattern: None, file_type_filter: None, entry_type_filter: None, min_size: None, max_size: None, newer_than: None, older_than: None, use_default_ignores: true, search_keyword: None, show_filesystems: false, sort_field: None, top_n: None, include_line_content: false, }; let scanner_result = Scanner::new(temp_dir.path(), config); assert!(scanner_result.is_ok()); } }

Latest Blog Posts

The 50MB Markdown Files That Broke Our Server
By punkpeye on December 3, 2025.
react
react-router
node-js
OpenTelemetry for Model Context Protocol (MCP) Analytics and Agent Observability
By Om-Shree-0709 on November 29, 2025.
observability
mcp
opentelemetry
Securing Enterprise AI Agents with Unique Identities in the Model Context Protocol (MCP)
By Om-Shree-0709 on November 27, 2025.

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/8b-is/smart-tree'

If you have feedback or need assistance with the MCP directory API, please join our Discord server