Skip to main content
Glama
by 8b-is
mem8_binary.rs16.8 kB
// MEM8 Binary Format - "The REAL wave-based memory!" 🌊 // Proper binary .m8 format with wave interference and temporal encoding // "No more JSON pretenders!" - Hue use anyhow::{Context, Result}; use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; /// Magic bytes for .m8 files const M8_MAGIC: &[u8; 4] = b"MEM8"; /// Version of the .m8 format const M8_VERSION: u8 = 1; /// Block size (4KB for efficiency) const BLOCK_SIZE: usize = 4096; /// Header for .m8 files #[repr(C, packed)] pub struct M8Header { magic: [u8; 4], version: u8, flags: u8, block_count: u32, identity_freq: f64, temporal_phase: f64, crc32: u32, } impl M8Header { fn to_bytes(&self) -> Vec<u8> { let mut bytes = Vec::with_capacity(std::mem::size_of::<Self>()); bytes.extend_from_slice(&self.magic); bytes.push(self.version); bytes.push(self.flags); bytes.extend_from_slice(&self.block_count.to_le_bytes()); bytes.extend_from_slice(&self.identity_freq.to_le_bytes()); bytes.extend_from_slice(&self.temporal_phase.to_le_bytes()); bytes.extend_from_slice(&self.crc32.to_le_bytes()); bytes } fn from_bytes(bytes: &[u8]) -> Result<Self> { if bytes.len() < std::mem::size_of::<Self>() { anyhow::bail!("Invalid header size"); } let mut cursor = 0; let mut magic = [0u8; 4]; magic.copy_from_slice(&bytes[cursor..cursor + 4]); cursor += 4; let version = bytes[cursor]; cursor += 1; let flags = bytes[cursor]; cursor += 1; let block_count = u32::from_le_bytes(bytes[cursor..cursor + 4].try_into()?); cursor += 4; let identity_freq = f64::from_le_bytes(bytes[cursor..cursor + 8].try_into()?); cursor += 8; let temporal_phase = f64::from_le_bytes(bytes[cursor..cursor + 8].try_into()?); cursor += 8; let crc32 = u32::from_le_bytes(bytes[cursor..cursor + 4].try_into()?); Ok(Self { magic, version, flags, block_count, identity_freq, temporal_phase, crc32, }) } } /// Memory block in .m8 format #[repr(C)] #[derive(Debug, Clone)] pub struct M8Block { /// Block index (even = consciousness, odd = context) pub index: u64, /// Wave signature (16 bytes) pub wave_signature: [u8; 16], /// Temporal timestamp (microseconds since epoch) pub timestamp: u64, /// Importance score (0.0 to 1.0 encoded as u16) pub importance: u16, /// Token ID (dynamic tokenization) pub token_id: u16, /// Previous block hash (for chaining) pub prev_hash: [u8; 32], /// Content length pub content_len: u32, /// Content (variable length, padded to BLOCK_SIZE) pub content: Vec<u8>, } /// Token mapping for dynamic tokenization pub struct TokenMap { /// String to token ID str_to_token: HashMap<String, u16>, /// Token ID to string token_to_str: HashMap<u16, String>, /// Next available token ID next_id: u16, /// Reserved tokens (0x80 = node_modules, etc) reserved: HashMap<u16, String>, } impl Default for TokenMap { fn default() -> Self { Self::new() } } impl TokenMap { pub fn new() -> Self { let mut reserved = HashMap::new(); reserved.insert(0x80, "node_modules".to_string()); reserved.insert(0x81, ".git".to_string()); reserved.insert(0x82, "target".to_string()); reserved.insert(0x83, "dist".to_string()); reserved.insert(0x84, "build".to_string()); reserved.insert(0x90, ".rs".to_string()); reserved.insert(0x91, ".py".to_string()); reserved.insert(0x92, ".js".to_string()); reserved.insert(0x93, ".ts".to_string()); reserved.insert(0xFFFE, "Claude".to_string()); reserved.insert(0xFFFF, "Hue".to_string()); let mut str_to_token = HashMap::new(); let mut token_to_str = HashMap::new(); for (&id, value) in &reserved { str_to_token.insert(value.clone(), id); token_to_str.insert(id, value.clone()); } Self { str_to_token, token_to_str, next_id: 0x100, // Start after reserved range reserved, } } /// Get or create token for string pub fn get_token(&mut self, s: &str) -> u16 { if let Some(&token) = self.str_to_token.get(s) { return token; } let token = self.next_id; self.next_id += 1; self.str_to_token.insert(s.to_string(), token); self.token_to_str.insert(token, s.to_string()); token } /// Decode token to string pub fn decode_token(&self, token: u16) -> Option<&str> { self.token_to_str.get(&token).map(|s| s.as_str()) } } /// MEM8 Binary File - append-only wave-based memory pub struct M8BinaryFile { path: PathBuf, file: File, header: M8Header, tokens: TokenMap, /// Current position for backwards reading read_position: u64, /// Cache of important blocks for quick access importance_index: Vec<(u64, f32)>, // (block_offset, importance) } impl M8BinaryFile { /// Create new .m8 file pub fn create(path: impl AsRef<Path>) -> Result<Self> { let path = path.as_ref().to_path_buf(); let mut file = OpenOptions::new() .create(true) .write(true) .read(true) .truncate(true) .open(&path) .context("Failed to create .m8 file")?; let header = M8Header { magic: *M8_MAGIC, version: M8_VERSION, flags: 0, block_count: 0, identity_freq: 440.0, // A440 Hz by default temporal_phase: 0.0, crc32: 0, }; // Write header file.write_all(&header.to_bytes())?; Ok(Self { path, file, header, tokens: TokenMap::new(), read_position: std::mem::size_of::<M8Header>() as u64, importance_index: Vec::new(), }) } /// Open existing .m8 file pub fn open(path: impl AsRef<Path>) -> Result<Self> { let path = path.as_ref().to_path_buf(); let mut file = OpenOptions::new() .read(true) .write(true) .open(&path) .context("Failed to open .m8 file")?; // Read header let mut header_bytes = vec![0u8; std::mem::size_of::<M8Header>()]; file.read_exact(&mut header_bytes)?; let header = M8Header::from_bytes(&header_bytes)?; // Verify magic if header.magic != *M8_MAGIC { anyhow::bail!("Invalid .m8 file (bad magic)"); } // Seek to end for backwards reading let file_size = file.seek(SeekFrom::End(0))?; let mut m8 = Self { path, file, header, tokens: TokenMap::new(), read_position: file_size, importance_index: Vec::new(), }; // Build importance index m8.build_importance_index()?; Ok(m8) } /// Append memory block (never modifies existing blocks) pub fn append_block(&mut self, content: &[u8], importance: f32) -> Result<()> { let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_micros() as u64; let wave_sig = self.generate_wave_signature(content); let block = M8Block { index: self.header.block_count as u64, wave_signature: wave_sig, timestamp, importance: (importance * 65535.0) as u16, token_id: 0, // TODO: tokenize content prev_hash: [0; 32], // TODO: compute hash content_len: content.len() as u32, content: content.to_vec(), }; // Seek to end self.file.seek(SeekFrom::End(0))?; // Write block self.write_block(&block)?; // Update header self.header.block_count += 1; self.update_header()?; // Update importance index let offset = self.file.stream_position()?; self.importance_index.push((offset, importance)); Ok(()) } /// Read backwards from end (most recent first) pub fn read_backwards(&mut self) -> Result<Option<M8Block>> { if self.read_position <= std::mem::size_of::<M8Header>() as u64 { return Ok(None); } // Seek to previous block self.read_position -= BLOCK_SIZE as u64; self.file.seek(SeekFrom::Start(self.read_position))?; // Read block self.read_block() } /// Read blocks by importance (user keywords boost importance) pub fn read_by_importance(&mut self, keywords: &[String]) -> Result<Vec<M8Block>> { let mut blocks = Vec::new(); // Sort by importance self.importance_index .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); // Extract offsets to avoid holding any borrow of self while reading blocks let offsets: Vec<u64> = self.importance_index.iter().map(|(off, _)| *off).collect(); for offset in offsets { self.file.seek(SeekFrom::Start(offset))?; if let Some(block) = self.read_block()? { // Check if block contains keywords let content_str = String::from_utf8_lossy(&block.content); let has_keyword = keywords.iter().any(|kw| content_str.contains(kw)); if has_keyword || blocks.len() < 10 { blocks.push(block); } if blocks.len() >= 20 { break; } } } Ok(blocks) } /// Generate wave signature from content fn generate_wave_signature(&self, content: &[u8]) -> [u8; 16] { use sha2::{Digest, Sha256}; let mut hasher = Sha256::new(); hasher.update(content); hasher.update(self.header.identity_freq.to_le_bytes()); let hash = hasher.finalize(); let mut signature = [0u8; 16]; signature.copy_from_slice(&hash[..16]); signature } /// Write block to file fn write_block(&mut self, block: &M8Block) -> Result<()> { // Create fixed-size block buffer let mut buffer = vec![0u8; BLOCK_SIZE]; let mut cursor = 0; // Write block fields buffer[cursor..cursor + 8].copy_from_slice(&block.index.to_le_bytes()); cursor += 8; buffer[cursor..cursor + 16].copy_from_slice(&block.wave_signature); cursor += 16; buffer[cursor..cursor + 8].copy_from_slice(&block.timestamp.to_le_bytes()); cursor += 8; buffer[cursor..cursor + 2].copy_from_slice(&block.importance.to_le_bytes()); cursor += 2; buffer[cursor..cursor + 2].copy_from_slice(&block.token_id.to_le_bytes()); cursor += 2; buffer[cursor..cursor + 32].copy_from_slice(&block.prev_hash); cursor += 32; buffer[cursor..cursor + 4].copy_from_slice(&block.content_len.to_le_bytes()); cursor += 4; // Copy content (up to remaining space) let content_space = BLOCK_SIZE - cursor; let content_to_copy = block.content.len().min(content_space); buffer[cursor..cursor + content_to_copy].copy_from_slice(&block.content[..content_to_copy]); self.file.write_all(&buffer)?; Ok(()) } /// Read block from file fn read_block(&mut self) -> Result<Option<M8Block>> { let mut buffer = vec![0u8; BLOCK_SIZE]; match self.file.read_exact(&mut buffer) { Ok(_) => {} Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None), Err(e) => return Err(e.into()), } let mut cursor = 0; let index = u64::from_le_bytes(buffer[cursor..cursor + 8].try_into()?); cursor += 8; let mut wave_signature = [0u8; 16]; wave_signature.copy_from_slice(&buffer[cursor..cursor + 16]); cursor += 16; let timestamp = u64::from_le_bytes(buffer[cursor..cursor + 8].try_into()?); cursor += 8; let importance = u16::from_le_bytes(buffer[cursor..cursor + 2].try_into()?); cursor += 2; let token_id = u16::from_le_bytes(buffer[cursor..cursor + 2].try_into()?); cursor += 2; let mut prev_hash = [0u8; 32]; prev_hash.copy_from_slice(&buffer[cursor..cursor + 32]); cursor += 32; let content_len = u32::from_le_bytes(buffer[cursor..cursor + 4].try_into()?); cursor += 4; let content = buffer[cursor..cursor + content_len as usize].to_vec(); Ok(Some(M8Block { index, wave_signature, timestamp, importance, token_id, prev_hash, content_len, content, })) } /// Update header in file fn update_header(&mut self) -> Result<()> { self.file.seek(SeekFrom::Start(0))?; self.file.write_all(&self.header.to_bytes())?; self.file.flush()?; Ok(()) } /// Build importance index from file fn build_importance_index(&mut self) -> Result<()> { self.file .seek(SeekFrom::Start(std::mem::size_of::<M8Header>() as u64))?; loop { let offset = self.file.stream_position()?; match self.read_block()? { Some(block) => { let importance = block.importance as f32 / 65535.0; self.importance_index.push((offset, importance)); } None => break, } } Ok(()) } } /// Convert existing JSON .m8 files to proper binary format pub fn convert_json_to_binary(json_path: &Path, binary_path: &Path) -> Result<()> { use std::fs; // Read and decompress if needed let data = fs::read(json_path)?; let json_str = if data.starts_with(b"\x78\x9c") || data.starts_with(b"\x78\xda") { // zlib compressed use flate2::read::ZlibDecoder; let mut decoder = ZlibDecoder::new(&data[..]); let mut decompressed = String::new(); decoder.read_to_string(&mut decompressed)?; decompressed } else { String::from_utf8(data)? }; // Parse JSON let contexts: serde_json::Value = serde_json::from_str(&json_str)?; // Create binary .m8 file let mut m8_file = M8BinaryFile::create(binary_path)?; // Convert contexts to blocks if let Some(contexts_array) = contexts.get("contexts").and_then(|c| c.as_array()) { for context in contexts_array { let content = serde_json::to_vec(context)?; let importance = context.get("score").and_then(|s| s.as_f64()).unwrap_or(0.5) as f32; m8_file.append_block(&content, importance)?; } } Ok(()) } #[cfg(test)] mod tests { use super::*; use tempfile::tempdir; #[test] fn test_m8_binary_format() { let dir = tempdir().unwrap(); let path = dir.path().join("test.m8"); // Create file let mut m8 = M8BinaryFile::create(&path).unwrap(); // Append some blocks m8.append_block(b"First memory", 0.8).unwrap(); m8.append_block(b"Second memory", 0.5).unwrap(); m8.append_block(b"Important memory", 1.0).unwrap(); // Reopen and read backwards let mut m8 = M8BinaryFile::open(&path).unwrap(); // Should read "Important memory" first (most recent) let block = m8.read_backwards().unwrap().unwrap(); assert_eq!(&block.content, b"Important memory"); // Read by importance let important_blocks = m8.read_by_importance(&["Important".to_string()]).unwrap(); assert!(!important_blocks.is_empty()); } #[test] fn test_tokenization() { let mut tokens = TokenMap::new(); // Reserved tokens assert_eq!(tokens.get_token("node_modules"), 0x80); assert_eq!(tokens.get_token(".rs"), 0x90); assert_eq!(tokens.get_token("Claude"), 0xFFFE); // Dynamic tokens let token1 = tokens.get_token("custom_string"); let token2 = tokens.get_token("another_string"); assert!(token1 >= 0x100); assert!(token2 > token1); // Decode assert_eq!(tokens.decode_token(0x80), Some("node_modules")); assert_eq!(tokens.decode_token(token1), Some("custom_string")); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/8b-is/smart-tree'

If you have feedback or need assistance with the MCP directory API, please join our Discord server