Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
.codegraph.toml.example7.59 kB
# CodeGraph Configuration File # Copy this to .codegraph.toml or ~/.codegraph/config.toml and customize # ============================================================================ # Embedding Configuration # ============================================================================ [embedding] # Provider: "auto", "onnx", "ollama", "openai", or "lmstudio" # "auto" will detect available models automatically # "lmstudio" recommended for MLX + Flash Attention 2 (macOS) provider = "lmstudio" # Model path or identifier # For ONNX: Absolute path to model directory (auto-detected from HuggingFace cache) # For Ollama: Model name (e.g., "all-minilm:latest") # For LM Studio: Model name (e.g., "jinaai/jina-embeddings-v4") # For OpenAI: Model name (e.g., "text-embedding-3-small") # Recommended: jinaai/jina-embeddings-v4 (2048-dim, optimized for code) model = "jinaai/jina-embeddings-v4" # LM Studio URL (default port 1234) lmstudio_url = "http://localhost:1234" # Ollama URL (only used if provider is "ollama") ollama_url = "http://localhost:11434" # OpenAI API key (only used if provider is "openai") # Can also be set via OPENAI_API_KEY environment variable # openai_api_key = "sk-..." # Embedding dimension (2048 for jina-embeddings-v4, 384 for all-MiniLM, 1024 for qwen3-embedding:0.6b etc. check your model and what's available in the schema) # All mini-minilm:latest (ollama for speed) # embeddingsgemma or qwen3-embedding:0.6b or 4b or8b for local SOTA # Jina for SOTA accuracy dimension = 2048 # Batch size for embedding generation (GPU optimization) batch_size = 64 # ============================================================================ # LLM Configuration (for insights generation) # ============================================================================ [llm] # Enable LLM insights (false = context-only mode for agents like Claude/GPT-4) # Set to false for maximum speed if using an external agent enabled = false # LLM provider: "ollama", "lmstudio", "anthropic", "openai", "xai", or "openai-compatible" # - "lmstudio": Local LLMs via LM Studio (recommended for MLX + Flash Attention 2 on macOS) # - "ollama": Local LLMs via Ollama # - "anthropic": Anthropic Claude API (requires API key) # - "openai": OpenAI GPT API (requires API key) # - "xai": xAI Grok API (2M context, $0.50-$1.50/M tokens, requires API key) # - "openai-compatible": Any OpenAI-compatible API endpoint provider = "lmstudio" # LLM model identifier # For LM Studio: lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF # For Ollama: Model name (e.g., "qwen2.5-coder:14b", "codellama:13b") # For Anthropic: Model name (e.g., "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022") # For OpenAI: Model name (e.g., "gpt-4o", "gpt-4o-mini", "gpt-4-turbo") # For xAI: Model name (e.g., "grok-4-fast", "grok-4-turbo") # For OpenAI-compatible: Custom model name # Recommended: DeepSeek Coder v2 Lite Instruct Q4_K_M (local), or Claude 3.5 Sonnet (cloud) model = "lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF" # LM Studio URL (only used if provider is "lmstudio") lmstudio_url = "http://localhost:1234" # Ollama URL (only used if provider is "ollama") ollama_url = "http://localhost:11434" # OpenAI-compatible base URL (only used if provider is "openai-compatible") # Example: "http://localhost:1234/v1" for LM Studio OpenAI endpoint # openai_compatible_url = "http://localhost:1234/v1" # Anthropic API key (only used if provider is "anthropic") # Can also be set via ANTHROPIC_API_KEY environment variable # anthropic_api_key = "sk-ant-..." # OpenAI API key (only used if provider is "openai" or some "openai-compatible" endpoints) # Can also be set via OPENAI_API_KEY environment variable # openai_api_key = "sk-..." # xAI API key (only used if provider is "xai") # Can also be set via XAI_API_KEY environment variable # xai_api_key = "xai-..." # xAI base URL (only used if provider is "xai") # Default: "https://api.x.ai/v1" # xai_base_url = "https://api.x.ai/v1" # Context window size (tokens) # DeepSeek Coder v2 Lite: 32768 tokens # Claude 3.5 Sonnet: 200000 tokens # GPT-4o: 128000 tokens # Grok-4 (xAI): 2000000 tokens (2M!) context_window = 32000 # Temperature for generation (0.0 = deterministic, 2.0 = very creative) temperature = 0.1 # Maximum tokens to generate in responses (legacy parameter, use max_completion_token for Responses API) max_tokens = 4096 # Maximum output tokens for Responses API and reasoning models # If not set, falls back to max_tokens # max_completion_token = 4096 # Reasoning effort for reasoning models (o1, o3, o4-mini, GPT-5) # Options: "minimal", "low", "medium", "high" # Higher effort = more reasoning tokens = better quality but slower and more expensive # Only applies to reasoning models, ignored by standard models # reasoning_effort = "medium" # Request timeout in seconds timeout_secs = 120 # Use legacy Chat Completions API instead of modern Responses API # Only needed for old providers that don't support Responses API # Default: false (use Responses API) # Note: Ollama always uses its native API regardless of this setting # use_completions_api = false # Insights mode: "context-only", "balanced", or "deep" # - context-only: Return context only (fastest, for agents) # - balanced: Process top 10 files with LLM (good speed/quality) # - deep: Process all reranked files (comprehensive) insights_mode = "context-only" # ============================================================================ # Agent Architecture Configuration (AutoAgents Experimental) # ============================================================================ # Note: Requires building with --features autoagents-experimental # Agent architecture: "react" or "lats" # - "react": Fast single-pass reasoning (default) # - "lats": Language Agent Tree Search for higher quality (requires --features autoagents-lats) # agent_architecture = "react" # LATS-specific configuration (only used when agent_architecture = "lats") # Note: All LATS features require --features autoagents-lats at build time # Selection phase: Fast, cheap model for choosing best nodes to explore # lats_selection_provider = "openai" # lats_selection_model = "gpt-4o-mini" # Expansion phase: Reasoning model for generating high-quality next steps # lats_expansion_provider = "anthropic" # lats_expansion_model = "claude-3-5-sonnet-20241022" # Evaluation phase: Specialized model for accurate state assessment # lats_evaluation_provider = "openai" # lats_evaluation_model = "o1-preview" # Algorithm tuning # lats_beam_width = 3 # Number of best paths to keep during search (default: 3) # lats_max_depth = 5 # Maximum search depth (default: 5) # ============================================================================ # Performance Configuration # ============================================================================ [performance] # Number of worker threads (defaults to CPU count) num_threads = 0 # 0 = auto-detect # Cache size in MB cache_size_mb = 512 # Enable GPU acceleration (requires CUDA/Metal support) enable_gpu = false # Maximum concurrent requests for embedding/LLM max_concurrent_requests = 4 # ============================================================================ # Logging Configuration # ============================================================================ [logging] # Log level: "trace", "debug", "info", "warn", "error" # Use "warn" during indexing for clean TUI output (recommended) # Use "info" for development/debugging level = "warn" # Log format: "pretty", "json", "compact" format = "pretty"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server