Skip to main content
Glama
by 8b-is
quantum_scanner.rs11 kB
// Quantum Scanner - The native tree walker that speaks in quantum format // This is where the magic happens - no intermediate representation, just pure quantum output #![allow(dead_code)] // Many constants and fields are reserved for future use use anyhow::Result; use std::collections::HashMap; use std::fs; use std::io::Write; use std::path::Path; use std::time::SystemTime; // Token ranges as suggested const TOKEN_RESERVED_START: u16 = 0x0000; const TOKEN_RESERVED_END: u16 = 0x00FF; const TOKEN_USER_START: u16 = 0x0100; // Pre-defined tokens for common filesystem terms const TOKEN_DIR: u16 = 0x0001; const TOKEN_FILE: u16 = 0x0002; const TOKEN_LINK: u16 = 0x0003; const TOKEN_PERM_755: u16 = 0x0010; const TOKEN_PERM_644: u16 = 0x0011; const TOKEN_PERM_777: u16 = 0x0012; const TOKEN_PERM_600: u16 = 0x0013; // Common extensions (0x20-0x7F) const TOKEN_EXT_JS: u16 = 0x0020; const TOKEN_EXT_RS: u16 = 0x0021; const TOKEN_EXT_PY: u16 = 0x0022; const TOKEN_EXT_GO: u16 = 0x0023; const TOKEN_EXT_MD: u16 = 0x0024; const TOKEN_EXT_JSON: u16 = 0x0025; const TOKEN_EXT_YAML: u16 = 0x0026; const TOKEN_EXT_TXT: u16 = 0x0027; // Common directory names (0x80-0xFF) const TOKEN_NODE_MODULES: u16 = 0x0080; const TOKEN_GIT: u16 = 0x0081; const TOKEN_SRC: u16 = 0x0082; const TOKEN_TARGET: u16 = 0x0083; const TOKEN_BUILD: u16 = 0x0084; const TOKEN_DIST: u16 = 0x0085; const TOKEN_DOCS: u16 = 0x0086; const TOKEN_TESTS: u16 = 0x0087; // Size tokens for common ranges const TOKEN_SIZE_ZERO: u16 = 0x00A0; const TOKEN_SIZE_TINY: u16 = 0x00A1; // 1-1KB const TOKEN_SIZE_SMALL: u16 = 0x00A2; // 1KB-100KB const TOKEN_SIZE_MEDIUM: u16 = 0x00A3; // 100KB-10MB const TOKEN_SIZE_LARGE: u16 = 0x00A4; // 10MB+ // ASCII control codes for tree traversal const TRAVERSE_SAME: u8 = 0x0B; // Vertical Tab const TRAVERSE_DEEPER: u8 = 0x0E; // Shift Out const TRAVERSE_BACK: u8 = 0x0F; // Shift In const TRAVERSE_SUMMARY: u8 = 0x0C; // Form Feed // Header bit flags const HDR_HAS_SIZE: u8 = 0b00000001; const HDR_HAS_PERMS: u8 = 0b00000010; const HDR_HAS_TIME: u8 = 0b00000100; const HDR_HAS_OWNER: u8 = 0b00001000; const HDR_IS_DIR: u8 = 0b00010000; const HDR_IS_LINK: u8 = 0b00100000; const HDR_HAS_XATTR: u8 = 0b01000000; const HDR_TOKENIZED: u8 = 0b10000000; pub struct QuantumScanner<W: Write> { writer: W, token_map: HashMap<String, u16>, #[allow(dead_code)] next_dynamic_token: u16, // Context for delta encoding parent_perms: u32, #[allow(dead_code)] parent_uid: u32, #[allow(dead_code)] parent_gid: u32, #[allow(dead_code)] parent_time: SystemTime, // Stats tracking total_files: u64, total_dirs: u64, total_size: u64, } impl<W: Write> QuantumScanner<W> { // Cross-platform permission handling #[cfg(unix)] fn get_permissions(metadata: &fs::Metadata) -> u32 { use std::os::unix::fs::PermissionsExt; metadata.permissions().mode() & 0o777 } #[cfg(not(unix))] fn get_permissions(_metadata: &fs::Metadata) -> u32 { 0o755 // Default permissions for non-Unix } pub fn new(writer: W) -> Self { let mut token_map = HashMap::new(); // Initialize with predefined tokens token_map.insert("node_modules".to_string(), TOKEN_NODE_MODULES); token_map.insert(".git".to_string(), TOKEN_GIT); token_map.insert("src".to_string(), TOKEN_SRC); token_map.insert("target".to_string(), TOKEN_TARGET); token_map.insert("build".to_string(), TOKEN_BUILD); token_map.insert("dist".to_string(), TOKEN_DIST); token_map.insert("docs".to_string(), TOKEN_DOCS); token_map.insert("tests".to_string(), TOKEN_TESTS); // Extension tokens token_map.insert(".js".to_string(), TOKEN_EXT_JS); token_map.insert(".rs".to_string(), TOKEN_EXT_RS); token_map.insert(".py".to_string(), TOKEN_EXT_PY); token_map.insert(".go".to_string(), TOKEN_EXT_GO); token_map.insert(".md".to_string(), TOKEN_EXT_MD); token_map.insert(".json".to_string(), TOKEN_EXT_JSON); token_map.insert(".yaml".to_string(), TOKEN_EXT_YAML); token_map.insert(".txt".to_string(), TOKEN_EXT_TXT); Self { writer, token_map, next_dynamic_token: TOKEN_USER_START, parent_perms: 0o755, parent_uid: 1000, parent_gid: 1000, parent_time: SystemTime::UNIX_EPOCH, total_files: 0, total_dirs: 0, total_size: 0, } } /// Write the quantum format header pub fn write_header(&mut self) -> Result<()> { writeln!(self.writer, "QUANTUM_NATIVE_V1:")?; writeln!(self.writer, "TOKENS:")?; // Write token map in sorted order let mut tokens: Vec<_> = self.token_map.iter().collect(); tokens.sort_by_key(|(_, &token)| token); for (name, token) in tokens { writeln!(self.writer, " {:04X}={}", token, name)?; } writeln!(self.writer, "DATA:")?; Ok(()) } /// Scan a path and emit quantum format directly pub fn scan(&mut self, path: &Path) -> Result<()> { self.write_header()?; self.scan_recursive(path, 0)?; self.write_summary()?; Ok(()) } fn scan_recursive(&mut self, path: &Path, depth: usize) -> Result<()> { let metadata = fs::metadata(path)?; // Emit quantum entry if metadata.is_dir() { self.emit_directory(path, &metadata, depth)?; // Update parent context let old_perms = self.parent_perms; self.parent_perms = Self::get_permissions(&metadata); // Scan children let mut entries: Vec<_> = fs::read_dir(path)?.filter_map(|e| e.ok()).collect(); // Sort for consistent output entries.sort_by_key(|e| e.file_name()); for (i, entry) in entries.iter().enumerate() { let child_path = entry.path(); self.scan_recursive(&child_path, depth + 1)?; // Emit traversal code if i < entries.len() - 1 { self.writer.write_all(&[TRAVERSE_SAME])?; } } // Restore parent context self.parent_perms = old_perms; // Emit back traversal if not at root if depth > 0 { self.writer.write_all(&[TRAVERSE_BACK])?; } self.total_dirs += 1; } else { self.emit_file(path, &metadata)?; self.total_files += 1; self.total_size += metadata.len(); } Ok(()) } fn emit_directory(&mut self, path: &Path, metadata: &fs::Metadata, depth: usize) -> Result<()> { let mut header = HDR_IS_DIR; let mut data = Vec::new(); // Size (for directories, this is the entry size) header |= HDR_HAS_SIZE; data.extend(&self.encode_size(metadata.len())); // Permissions if different let perms = Self::get_permissions(metadata); if perms != self.parent_perms { header |= HDR_HAS_PERMS; let delta = perms ^ self.parent_perms; data.push((delta >> 8) as u8); data.push(delta as u8); } // Emit header and data self.writer.write_all(&[header])?; self.writer.write_all(&data)?; // Emit name (tokenized if possible) self.emit_name(path)?; // Emit traversal code if depth == 0 { // Root directory self.writer.write_all(&[TRAVERSE_DEEPER])?; } Ok(()) } fn emit_file(&mut self, path: &Path, metadata: &fs::Metadata) -> Result<()> { let mut header = 0u8; let mut data = Vec::new(); // Size header |= HDR_HAS_SIZE; data.extend(&self.encode_size(metadata.len())); // Permissions if different let perms = Self::get_permissions(metadata); if perms != self.parent_perms { header |= HDR_HAS_PERMS; let delta = perms ^ self.parent_perms; data.push((delta >> 8) as u8); data.push(delta as u8); } // Emit header and data self.writer.write_all(&[header])?; self.writer.write_all(&data)?; // Emit name self.emit_name(path)?; Ok(()) } fn emit_name(&mut self, path: &Path) -> Result<()> { let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); // Check for exact token match if let Some(&token) = self.token_map.get(name) { self.writer.write_all(&token.to_le_bytes())?; return Ok(()); } // Check for extension token if let Some(dot_pos) = name.rfind('.') { let ext = &name[dot_pos..]; if let Some(&token) = self.token_map.get(ext) { // Write base name + extension token self.writer.write_all(&name.as_bytes()[..dot_pos])?; self.writer.write_all(&token.to_le_bytes())?; return Ok(()); } } // No token found - consider adding dynamically for frequently seen patterns // For now, just write the raw name self.writer.write_all(name.as_bytes())?; Ok(()) } fn encode_size(&self, size: u64) -> Vec<u8> { // Size-based tokenization match size { 0 => vec![TOKEN_SIZE_ZERO as u8, (TOKEN_SIZE_ZERO >> 8) as u8], 1..=1024 => vec![ TOKEN_SIZE_TINY as u8, (TOKEN_SIZE_TINY >> 8) as u8, size as u8, ], 1025..=102400 => { let kb = (size / 1024) as u16; vec![ TOKEN_SIZE_SMALL as u8, (TOKEN_SIZE_SMALL >> 8) as u8, kb as u8, (kb >> 8) as u8, ] } _ => { // For larger sizes, use standard encoding match size { 0..=255 => vec![0x00, size as u8], 256..=65535 => { let bytes = (size as u16).to_le_bytes(); vec![0x01, bytes[0], bytes[1]] } _ => { let bytes = (size as u32).to_le_bytes(); vec![0x02, bytes[0], bytes[1], bytes[2], bytes[3]] } } } } } fn write_summary(&mut self) -> Result<()> { writeln!(self.writer, "\nSUMMARY:")?; writeln!(self.writer, "FILES: {}", self.total_files)?; writeln!(self.writer, "DIRS: {}", self.total_dirs)?; writeln!(self.writer, "SIZE: {}", self.total_size)?; Ok(()) } } // PermissionsExt import removed - not currently used // Will be re-added when permission handling is implemented

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/8b-is/smart-tree'

If you have feedback or need assistance with the MCP directory API, please join our Discord server