# CodeGraph Configuration File
# Copy this to .codegraph.toml or ~/.codegraph/config.toml and customize
# ============================================================================
# Embedding Configuration
# ============================================================================
[embedding]
# Provider: "auto", "onnx", "ollama", "openai", or "lmstudio"
# "auto" will detect available models automatically
# "lmstudio" recommended for MLX + Flash Attention 2 (macOS)
provider = "lmstudio"
# Model path or identifier
# For ONNX: Absolute path to model directory (auto-detected from HuggingFace cache)
# For Ollama: Model name (e.g., "all-minilm:latest")
# For LM Studio: Model name (e.g., "jinaai/jina-embeddings-v4")
# For OpenAI: Model name (e.g., "text-embedding-3-small")
# Recommended: jinaai/jina-embeddings-v4 (2048-dim, optimized for code)
model = "jinaai/jina-embeddings-v4"
# LM Studio URL (default port 1234)
lmstudio_url = "http://localhost:1234"
# Ollama URL (only used if provider is "ollama")
ollama_url = "http://localhost:11434"
# OpenAI API key (only used if provider is "openai")
# Can also be set via OPENAI_API_KEY environment variable
# openai_api_key = "sk-..."
# Embedding dimension (2048 for jina-embeddings-v4, 384 for all-MiniLM, 1024 for qwen3-embedding:0.6b etc. check your model and what's available in the schema)
# All mini-minilm:latest (ollama for speed)
# embeddingsgemma or qwen3-embedding:0.6b or 4b or8b for local SOTA
# Jina for SOTA accuracy
dimension = 2048
# Batch size for embedding generation (GPU optimization)
batch_size = 64
# ============================================================================
# LLM Configuration (for insights generation)
# ============================================================================
[llm]
# Enable LLM insights (false = context-only mode for agents like Claude/GPT-4)
# Set to false for maximum speed if using an external agent
enabled = false
# LLM provider: "ollama", "lmstudio", "anthropic", "openai", "xai", or "openai-compatible"
# - "lmstudio": Local LLMs via LM Studio (recommended for MLX + Flash Attention 2 on macOS)
# - "ollama": Local LLMs via Ollama
# - "anthropic": Anthropic Claude API (requires API key)
# - "openai": OpenAI GPT API (requires API key)
# - "xai": xAI Grok API (2M context, $0.50-$1.50/M tokens, requires API key)
# - "openai-compatible": Any OpenAI-compatible API endpoint
provider = "lmstudio"
# LLM model identifier
# For LM Studio: lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF
# For Ollama: Model name (e.g., "qwen2.5-coder:14b", "codellama:13b")
# For Anthropic: Model name (e.g., "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022")
# For OpenAI: Model name (e.g., "gpt-4o", "gpt-4o-mini", "gpt-4-turbo")
# For xAI: Model name (e.g., "grok-4-fast", "grok-4-turbo")
# For OpenAI-compatible: Custom model name
# Recommended: DeepSeek Coder v2 Lite Instruct Q4_K_M (local), or Claude 3.5 Sonnet (cloud)
model = "lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF"
# LM Studio URL (only used if provider is "lmstudio")
lmstudio_url = "http://localhost:1234"
# Ollama URL (only used if provider is "ollama")
ollama_url = "http://localhost:11434"
# OpenAI-compatible base URL (only used if provider is "openai-compatible")
# Example: "http://localhost:1234/v1" for LM Studio OpenAI endpoint
# openai_compatible_url = "http://localhost:1234/v1"
# Anthropic API key (only used if provider is "anthropic")
# Can also be set via ANTHROPIC_API_KEY environment variable
# anthropic_api_key = "sk-ant-..."
# OpenAI API key (only used if provider is "openai" or some "openai-compatible" endpoints)
# Can also be set via OPENAI_API_KEY environment variable
# openai_api_key = "sk-..."
# xAI API key (only used if provider is "xai")
# Can also be set via XAI_API_KEY environment variable
# xai_api_key = "xai-..."
# xAI base URL (only used if provider is "xai")
# Default: "https://api.x.ai/v1"
# xai_base_url = "https://api.x.ai/v1"
# Context window size (tokens)
# DeepSeek Coder v2 Lite: 32768 tokens
# Claude 3.5 Sonnet: 200000 tokens
# GPT-4o: 128000 tokens
# Grok-4 (xAI): 2000000 tokens (2M!)
context_window = 32000
# Temperature for generation (0.0 = deterministic, 2.0 = very creative)
temperature = 0.1
# Maximum tokens to generate in responses (legacy parameter, use max_completion_token for Responses API)
max_tokens = 4096
# Maximum output tokens for Responses API and reasoning models
# If not set, falls back to max_tokens
# max_completion_token = 4096
# Reasoning effort for reasoning models (o1, o3, o4-mini, GPT-5)
# Options: "minimal", "low", "medium", "high"
# Higher effort = more reasoning tokens = better quality but slower and more expensive
# Only applies to reasoning models, ignored by standard models
# reasoning_effort = "medium"
# Request timeout in seconds
timeout_secs = 120
# Use legacy Chat Completions API instead of modern Responses API
# Only needed for old providers that don't support Responses API
# Default: false (use Responses API)
# Note: Ollama always uses its native API regardless of this setting
# use_completions_api = false
# Insights mode: "context-only", "balanced", or "deep"
# - context-only: Return context only (fastest, for agents)
# - balanced: Process top 10 files with LLM (good speed/quality)
# - deep: Process all reranked files (comprehensive)
insights_mode = "context-only"
# ============================================================================
# Agent Architecture Configuration (AutoAgents Experimental)
# ============================================================================
# Note: Requires building with --features autoagents-experimental
# Agent architecture: "react" or "lats"
# - "react": Fast single-pass reasoning (default)
# - "lats": Language Agent Tree Search for higher quality (requires --features autoagents-lats)
# agent_architecture = "react"
# LATS-specific configuration (only used when agent_architecture = "lats")
# Note: All LATS features require --features autoagents-lats at build time
# Selection phase: Fast, cheap model for choosing best nodes to explore
# lats_selection_provider = "openai"
# lats_selection_model = "gpt-4o-mini"
# Expansion phase: Reasoning model for generating high-quality next steps
# lats_expansion_provider = "anthropic"
# lats_expansion_model = "claude-3-5-sonnet-20241022"
# Evaluation phase: Specialized model for accurate state assessment
# lats_evaluation_provider = "openai"
# lats_evaluation_model = "o1-preview"
# Algorithm tuning
# lats_beam_width = 3 # Number of best paths to keep during search (default: 3)
# lats_max_depth = 5 # Maximum search depth (default: 5)
# ============================================================================
# Performance Configuration
# ============================================================================
[performance]
# Number of worker threads (defaults to CPU count)
num_threads = 0 # 0 = auto-detect
# Cache size in MB
cache_size_mb = 512
# Enable GPU acceleration (requires CUDA/Metal support)
enable_gpu = false
# Maximum concurrent requests for embedding/LLM
max_concurrent_requests = 4
# ============================================================================
# Logging Configuration
# ============================================================================
[logging]
# Log level: "trace", "debug", "info", "warn", "error"
# Use "warn" during indexing for clean TUI output (recommended)
# Use "info" for development/debugging
level = "warn"
# Log format: "pretty", "json", "compact"
format = "pretty"