Skip to main content
Glama
server.py13 kB
import sys import traceback import os import json import logging from typing import Any, Dict, List, Optional, Union from mcp.server.fastmcp import FastMCP import httpx # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.StreamHandler(sys.stderr) ] ) # Print startup message logging.info("Starting LMStudio bridge server...") try: # ===== Configuration ===== # Load from environment variables with defaults LMSTUDIO_HOST = os.getenv("LMSTUDIO_HOST", "127.0.0.1") LMSTUDIO_PORT = os.getenv("LMSTUDIO_PORT", "1234") LMSTUDIO_API_URL = f"http://{LMSTUDIO_HOST}:{LMSTUDIO_PORT}/v1" DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes") # Set more verbose logging if debug mode is enabled if DEBUG: logging.getLogger().setLevel(logging.DEBUG) logging.debug(f"Debug mode enabled") logging.info(f"Configured LM Studio API URL: {LMSTUDIO_API_URL}") # Initialize FastMCP server mcp = FastMCP("lmstudio-bridge") # ===== Helper Functions ===== async def call_lmstudio_api(endpoint: str, payload: Dict[str, Any], timeout: float = 60.0) -> Dict[str, Any]: """Unified API communication function with better error handling""" headers = { "Content-Type": "application/json", "User-Agent": "claude-lmstudio-bridge/1.0" } url = f"{LMSTUDIO_API_URL}/{endpoint}" logging.debug(f"Making request to {url}") logging.debug(f"Payload: {json.dumps(payload, indent=2)}") try: async with httpx.AsyncClient() as client: response = await client.post( url, json=payload, headers=headers, timeout=timeout ) # Better error handling with specific error messages if response.status_code != 200: error_message = f"LM Studio API error: {response.status_code}" try: error_json = response.json() if "error" in error_json: if isinstance(error_json["error"], dict) and "message" in error_json["error"]: error_message += f" - {error_json['error']['message']}" else: error_message += f" - {error_json['error']}" except: error_message += f" - {response.text[:100]}" logging.error(f"Error response: {error_message}") return {"error": error_message} result = response.json() logging.debug(f"Response received: {json.dumps(result, indent=2, default=str)[:200]}...") return result except httpx.RequestError as e: logging.error(f"Request error: {str(e)}") return {"error": f"Connection error: {str(e)}"} except Exception as e: logging.error(f"Unexpected error: {str(e)}") return {"error": f"Unexpected error: {str(e)}"} def prepare_chat_messages(messages_input: Union[str, List, Dict]) -> List[Dict[str, str]]: """Convert various input formats to what LMStudio expects""" try: # If messages_input is a string if isinstance(messages_input, str): # Try to parse it as JSON try: parsed = json.loads(messages_input) if isinstance(parsed, list): return parsed else: # If it's parsed but not a list, make it a user message return [{"role": "user", "content": messages_input}] except json.JSONDecodeError: # If not valid JSON, assume it's a simple message return [{"role": "user", "content": messages_input}] # If it's a list already elif isinstance(messages_input, list): return messages_input # If it's a dict, assume it's a single message elif isinstance(messages_input, dict) and "content" in messages_input: if "role" not in messages_input: messages_input["role"] = "user" return [messages_input] # If it's some other format, convert to string and make it a user message else: return [{"role": "user", "content": str(messages_input)}] except Exception as e: logging.error(f"Error preparing chat messages: {str(e)}") # Fallback to simplest format return [{"role": "user", "content": str(messages_input)}] # ===== MCP Tools ===== @mcp.tool() async def check_lmstudio_connection() -> str: """Check if the LM Studio server is running and accessible. Returns: Connection status and model information """ try: # Try to get the server status via models endpoint async with httpx.AsyncClient() as client: response = await client.get(f"{LMSTUDIO_API_URL}/models", timeout=5.0) if response.status_code == 200: models_data = response.json() if "data" in models_data and len(models_data["data"]) > 0: active_model = models_data["data"][0]["id"] return f"✅ Connected to LM Studio. Active model: {active_model}" else: return "✅ Connected to LM Studio but no models are currently loaded" else: return f"❌ LM Studio returned an error: {response.status_code}" except Exception as e: return f"❌ Failed to connect to LM Studio: {str(e)}" @mcp.tool() async def list_lmstudio_models() -> str: """List available LLM models in LM Studio. Returns: A formatted list of available models with their details. """ logging.info("list_lmstudio_models function called") try: # Use the API helper function models_response = await call_lmstudio_api("models", {}, timeout=10.0) # Check for errors from the API helper if "error" in models_response: return f"Error listing models: {models_response['error']}" if not models_response or "data" not in models_response: return "No models found or unexpected response format." models = models_response["data"] model_info = [] for model in models: model_info.append(f"ID: {model.get('id', 'Unknown')}") model_info.append(f"Name: {model.get('name', 'Unknown')}") if model.get('description'): model_info.append(f"Description: {model.get('description')}") model_info.append("---") if not model_info: return "No models available in LM Studio." return "\n".join(model_info) except Exception as e: logging.error(f"Unexpected error in list_lmstudio_models: {str(e)}") traceback.print_exc(file=sys.stderr) return f"Unexpected error: {str(e)}" @mcp.tool() async def generate_text( prompt: str, model_id: str = "", max_tokens: int = 1000, temperature: float = 0.7 ) -> str: """Generate text using a local LLM in LM Studio. Args: prompt: The text prompt to send to the model model_id: ID of the model to use (leave empty for default model) max_tokens: Maximum number of tokens in the response (default: 1000) temperature: Randomness of the output (0-1, default: 0.7) Returns: The generated text from the local LLM """ logging.info("generate_text function called") try: # Validate inputs if not prompt or not prompt.strip(): return "Error: Prompt cannot be empty." if max_tokens < 1: return "Error: max_tokens must be a positive integer." if temperature < 0 or temperature > 1: return "Error: temperature must be between 0 and 1." # Prepare payload payload = { "prompt": prompt, "max_tokens": max_tokens, "temperature": temperature, "stream": False } # Add model if specified if model_id and model_id.strip(): payload["model"] = model_id.strip() # Make request to LM Studio API using the helper function response = await call_lmstudio_api("completions", payload) # Check for errors from the API helper if "error" in response: return f"Error generating text: {response['error']}" # Extract and return the generated text if "choices" in response and len(response["choices"]) > 0: return response["choices"][0].get("text", "") return "No response generated." except Exception as e: logging.error(f"Unexpected error in generate_text: {str(e)}") traceback.print_exc(file=sys.stderr) return f"Unexpected error: {str(e)}" @mcp.tool() async def chat_completion( messages: str, model_id: str = "", max_tokens: int = 1000, temperature: float = 0.7 ) -> str: """Generate a chat completion using a local LLM in LM Studio. Args: messages: JSON string of messages in the format [{"role": "user", "content": "Hello"}, ...] or a simple text string which will be treated as a user message model_id: ID of the model to use (leave empty for default model) max_tokens: Maximum number of tokens in the response (default: 1000) temperature: Randomness of the output (0-1, default: 0.7) Returns: The generated text from the local LLM """ logging.info("chat_completion function called") try: # Standardize message format using the helper function messages_formatted = prepare_chat_messages(messages) logging.debug(f"Formatted messages: {json.dumps(messages_formatted, indent=2)}") # Validate inputs if not messages_formatted: return "Error: At least one message is required." if max_tokens < 1: return "Error: max_tokens must be a positive integer." if temperature < 0 or temperature > 1: return "Error: temperature must be between 0 and 1." # Prepare payload payload = { "messages": messages_formatted, "max_tokens": max_tokens, "temperature": temperature, "stream": False } # Add model if specified if model_id and model_id.strip(): payload["model"] = model_id.strip() # Make request to LM Studio API using the helper function response = await call_lmstudio_api("chat/completions", payload) # Check for errors from the API helper if "error" in response: return f"Error generating chat completion: {response['error']}" # Extract and return the generated text if "choices" in response and len(response["choices"]) > 0: choice = response["choices"][0] if "message" in choice and "content" in choice["message"]: return choice["message"]["content"] return "No response generated." except Exception as e: logging.error(f"Unexpected error in chat_completion: {str(e)}") traceback.print_exc(file=sys.stderr) return f"Unexpected error: {str(e)}" if __name__ == "__main__": logging.info("Starting server with stdio transport...") # Initialize and run the server mcp.run(transport='stdio') except Exception as e: logging.critical(f"CRITICAL ERROR: {str(e)}") logging.critical("Traceback:") traceback.print_exc(file=sys.stderr) sys.exit(1)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/infinitimeless/claude-lmstudio-bridge'

If you have feedback or need assistance with the MCP directory API, please join our Discord server