//! 📖 SmartRead - Context-Aware File Reading
//!
//! This module implements intelligent file reading that focuses on relevant
//! sections based on task context, achieving 70-90% token reduction while
//! maintaining all necessary information for the user's current task.
use super::context::ContextAnalyzer;
use super::{RelevanceScore, SmartResponse, TaskContext, TokenSavings};
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
/// 📖 Smart file reader with context awareness
pub struct SmartReader {
#[allow(dead_code)]
context_analyzer: ContextAnalyzer,
}
/// 📄 A relevant section of a file
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileSection {
/// Section content
pub content: String,
/// Line range (start, end)
pub line_range: (usize, usize),
/// Section type (function, class, comment, etc.)
pub section_type: SectionType,
/// Relevance score for this section
pub relevance: RelevanceScore,
}
/// 🏷️ Types of file sections we can identify
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum SectionType {
Function,
Class,
Struct,
Enum,
Import,
Comment,
Documentation,
Configuration,
Test,
Error,
Unknown,
}
/// 📊 Smart read response with context-aware results
pub type SmartReadResponse = SmartResponse<FileSection>;
impl SmartReader {
/// Create a new smart reader
pub fn new() -> Self {
Self {
context_analyzer: ContextAnalyzer::new(),
}
}
/// 📖 Read file with context awareness
pub fn read_contextual(&self, path: &Path, context: &TaskContext) -> Result<SmartReadResponse> {
// Read the full file
let content = fs::read_to_string(path)
.map_err(|e| anyhow!("Failed to read file {}: {}", path.display(), e))?;
// Split into sections
let sections = self.identify_sections(&content, path)?;
// Score sections by relevance
let scored_sections = self.score_sections(§ions, context)?;
// Filter and categorize by relevance
let (primary, secondary) = self.categorize_by_relevance(&scored_sections, context);
// Calculate token savings
let original_tokens = self.estimate_tokens(&content);
let compressed_tokens = self.estimate_tokens_for_sections(&primary)
+ self.estimate_tokens_for_sections(&secondary);
let token_savings = TokenSavings::new(original_tokens, compressed_tokens, "smart-read");
// Generate context summary
let context_summary = self.generate_context_summary(&primary, &secondary, context);
// Generate suggestions
let suggestions = self.generate_suggestions(&primary, &secondary, context);
Ok(SmartReadResponse {
primary,
secondary,
context_summary,
token_savings,
suggestions,
})
}
/// 🔍 Identify sections within file content
fn identify_sections(&self, content: &str, path: &Path) -> Result<Vec<FileSection>> {
let lines: Vec<&str> = content.lines().collect();
let mut sections = Vec::new();
// Determine file type from extension
let extension = path
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("")
.to_lowercase();
match extension.as_str() {
"rs" => self.identify_rust_sections(&lines, &mut sections)?,
"py" => self.identify_python_sections(&lines, &mut sections)?,
"js" | "ts" => self.identify_javascript_sections(&lines, &mut sections)?,
"json" => self.identify_json_sections(&lines, &mut sections)?,
"yaml" | "yml" => self.identify_yaml_sections(&lines, &mut sections)?,
"md" => self.identify_markdown_sections(&lines, &mut sections)?,
_ => self.identify_generic_sections(&lines, &mut sections)?,
}
Ok(sections)
}
/// 🦀 Identify Rust code sections
fn identify_rust_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
let mut current_section: Option<(usize, SectionType, Vec<String>)> = None;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
// Function definitions
if trimmed.starts_with("fn ")
|| trimmed.starts_with("pub fn ")
|| trimmed.starts_with("async fn ")
|| trimmed.starts_with("pub async fn ")
{
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Function, vec![line.to_string()]));
}
// Struct definitions
else if trimmed.starts_with("struct ") || trimmed.starts_with("pub struct ") {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Struct, vec![line.to_string()]));
}
// Enum definitions
else if trimmed.starts_with("enum ") || trimmed.starts_with("pub enum ") {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Enum, vec![line.to_string()]));
}
// Impl blocks
else if trimmed.starts_with("impl ") {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Class, vec![line.to_string()]));
}
// Use statements
else if trimmed.starts_with("use ") {
if current_section.is_none()
|| current_section.as_ref().unwrap().1 != SectionType::Import
{
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Import, vec![line.to_string()]));
} else if let Some((_, _, ref mut content)) = current_section {
content.push(line.to_string());
}
}
// Documentation comments
else if trimmed.starts_with("///") || trimmed.starts_with("//!") {
if current_section.is_none()
|| current_section.as_ref().unwrap().1 != SectionType::Documentation
{
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Documentation, vec![line.to_string()]));
} else if let Some((_, _, ref mut content)) = current_section {
content.push(line.to_string());
}
}
// Test functions
else if trimmed.contains("#[test]") || trimmed.contains("#[tokio::test]") {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Test, vec![line.to_string()]));
}
// Continue current section
else if let Some((_, _, ref mut content)) = current_section {
content.push(line.to_string());
// End section on closing brace at start of line
if trimmed == "}" {
self.finish_current_section(&mut current_section, sections);
}
}
}
// Finish any remaining section
self.finish_current_section(&mut current_section, sections);
Ok(())
}
/// 🐍 Identify Python code sections
fn identify_python_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
let mut current_section: Option<(usize, SectionType, Vec<String>)> = None;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
// Function definitions
if trimmed.starts_with("def ") || trimmed.starts_with("async def ") {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Function, vec![line.to_string()]));
}
// Class definitions
else if trimmed.starts_with("class ") {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Class, vec![line.to_string()]));
}
// Import statements
else if trimmed.starts_with("import ") || trimmed.starts_with("from ") {
if current_section.is_none()
|| current_section.as_ref().unwrap().1 != SectionType::Import
{
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Import, vec![line.to_string()]));
} else if let Some((_, _, ref mut content)) = current_section {
content.push(line.to_string());
}
}
// Continue current section
else if let Some((_, _, ref mut content)) = current_section {
content.push(line.to_string());
}
}
self.finish_current_section(&mut current_section, sections);
Ok(())
}
/// 🟨 Identify JavaScript/TypeScript sections
fn identify_javascript_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
// Similar pattern to Rust but with JS/TS syntax
self.identify_generic_sections(lines, sections)
}
/// 📄 Identify JSON sections
fn identify_json_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
// For JSON, treat the whole file as a configuration section
let content = lines.join("\n");
sections.push(FileSection {
content,
line_range: (0, lines.len()),
section_type: SectionType::Configuration,
relevance: RelevanceScore {
score: 0.7,
reasons: vec!["JSON configuration file".to_string()],
focus_matches: vec![],
},
});
Ok(())
}
/// 📄 Identify YAML sections
fn identify_yaml_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
// For YAML, treat as configuration
let content = lines.join("\n");
sections.push(FileSection {
content,
line_range: (0, lines.len()),
section_type: SectionType::Configuration,
relevance: RelevanceScore {
score: 0.7,
reasons: vec!["YAML configuration file".to_string()],
focus_matches: vec![],
},
});
Ok(())
}
/// 📝 Identify Markdown sections
fn identify_markdown_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
let mut current_section: Option<(usize, SectionType, Vec<String>)> = None;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
// Headers start new sections
if trimmed.starts_with('#') {
self.finish_current_section(&mut current_section, sections);
current_section = Some((i, SectionType::Documentation, vec![line.to_string()]));
}
// Continue current section
else if let Some((_, _, ref mut content)) = current_section {
content.push(line.to_string());
}
// Start new section if no current section
else {
current_section = Some((i, SectionType::Documentation, vec![line.to_string()]));
}
}
self.finish_current_section(&mut current_section, sections);
Ok(())
}
/// 📄 Identify generic file sections
fn identify_generic_sections(
&self,
lines: &[&str],
sections: &mut Vec<FileSection>,
) -> Result<()> {
// For unknown file types, create one section with the entire content
let content = lines.join("\n");
sections.push(FileSection {
content,
line_range: (0, lines.len()),
section_type: SectionType::Unknown,
relevance: RelevanceScore {
score: 0.5,
reasons: vec!["Generic file content".to_string()],
focus_matches: vec![],
},
});
Ok(())
}
/// ✅ Finish current section and add to sections list
fn finish_current_section(
&self,
current_section: &mut Option<(usize, SectionType, Vec<String>)>,
sections: &mut Vec<FileSection>,
) {
if let Some((start_line, section_type, content)) = current_section.take() {
let end_line = start_line + content.len();
sections.push(FileSection {
content: content.join("\n"),
line_range: (start_line, end_line),
section_type,
relevance: RelevanceScore {
score: 0.5, // Will be updated by scoring
reasons: vec![],
focus_matches: vec![],
},
});
}
}
/// 📊 Score sections by relevance to context
fn score_sections(
&self,
sections: &[FileSection],
context: &TaskContext,
) -> Result<Vec<FileSection>> {
let mut scored_sections = Vec::new();
for section in sections {
let mut relevance_score: f32 = 0.0;
let mut reasons = Vec::new();
let mut focus_matches = Vec::new();
// Score based on section type
relevance_score += match section.section_type {
SectionType::Function => 0.8,
SectionType::Class | SectionType::Struct => 0.7,
SectionType::Import => 0.4,
SectionType::Configuration => 0.6,
SectionType::Test => 0.5,
SectionType::Documentation => 0.3,
_ => 0.5,
};
// Score based on content matching focus areas
let content_lower = section.content.to_lowercase();
for focus_area in &context.focus_areas {
for keyword in focus_area.keywords() {
if content_lower.contains(keyword) {
relevance_score += 0.2;
reasons.push(format!("Contains '{}' keyword", keyword));
if !focus_matches.contains(focus_area) {
focus_matches.push(focus_area.clone());
}
}
}
}
// Normalize score
relevance_score = relevance_score.min(1.0);
let mut scored_section = section.clone();
scored_section.relevance = RelevanceScore {
score: relevance_score,
reasons,
focus_matches,
};
scored_sections.push(scored_section);
}
Ok(scored_sections)
}
/// 🏷️ Categorize sections by relevance threshold
fn categorize_by_relevance(
&self,
sections: &[FileSection],
context: &TaskContext,
) -> (Vec<FileSection>, Vec<FileSection>) {
let mut primary = Vec::new();
let mut secondary = Vec::new();
for section in sections {
if section.relevance.score >= context.relevance_threshold {
primary.push(section.clone());
} else if section.relevance.score >= context.relevance_threshold * 0.7 {
secondary.push(section.clone());
}
// Sections below 70% of threshold are filtered out
}
// Sort by relevance score (highest first)
primary.sort_by(|a, b| b.relevance.score.partial_cmp(&a.relevance.score).unwrap());
secondary.sort_by(|a, b| b.relevance.score.partial_cmp(&a.relevance.score).unwrap());
(primary, secondary)
}
/// 🧮 Estimate token count for content
fn estimate_tokens(&self, content: &str) -> usize {
// Rough estimation: ~4 characters per token
content.len() / 4
}
/// 🧮 Estimate token count for sections
fn estimate_tokens_for_sections(&self, sections: &[FileSection]) -> usize {
sections
.iter()
.map(|s| self.estimate_tokens(&s.content))
.sum()
}
/// 📝 Generate context summary
fn generate_context_summary(
&self,
primary: &[FileSection],
secondary: &[FileSection],
context: &TaskContext,
) -> String {
format!(
"SmartRead analyzed file for task: '{}'. Found {} high-relevance sections and {} medium-relevance sections. Focus areas: {:?}",
context.task,
primary.len(),
secondary.len(),
context.focus_areas
)
}
/// 💡 Generate proactive suggestions
fn generate_suggestions(
&self,
primary: &[FileSection],
secondary: &[FileSection],
_context: &TaskContext,
) -> Vec<String> {
let mut suggestions = Vec::new();
if primary.is_empty() {
suggestions.push("No highly relevant sections found. Consider adjusting the task context or relevance threshold.".to_string());
}
if secondary.len() > 10 {
suggestions.push("Many medium-relevance sections found. Consider using a more specific task context.".to_string());
}
// Suggest related tools based on section types
let has_functions = primary
.iter()
.any(|s| s.section_type == SectionType::Function);
let has_tests = primary.iter().any(|s| s.section_type == SectionType::Test);
if has_functions && !has_tests {
suggestions.push("Consider using find_tests to locate related test files.".to_string());
}
suggestions
}
}
impl Default for SmartReader {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
// use std::path::PathBuf; // Commented out as unused
#[test]
fn test_rust_section_identification() {
let reader = SmartReader::new();
let lines = vec![
"use std::collections::HashMap;",
"",
"/// This is a test function",
"pub fn test_function() {",
" println!(\"Hello\");",
"}",
];
let mut sections = Vec::new();
reader
.identify_rust_sections(&lines, &mut sections)
.unwrap();
assert_eq!(sections.len(), 3); // Import, documentation, and function sections
assert_eq!(sections[0].section_type, SectionType::Import);
assert_eq!(sections[1].section_type, SectionType::Documentation);
assert_eq!(sections[2].section_type, SectionType::Function);
}
}