Skip to main content
Glama
pro_image_service.py16.3 kB
"""Gemini 3 Pro Image specialized service for high-quality generation.""" import base64 import logging from typing import Any from fastmcp.utilities.types import Image as MCPImage from ..config.settings import MediaResolution, ProImageConfig, ThinkingLevel from ..core.progress_tracker import ProgressContext from ..utils.image_utils import validate_image_format from .gemini_client import GeminiClient from .image_storage_service import ImageStorageService class ProImageService: """Service for high-quality image generation using Gemini 3 Pro Image model.""" def __init__( self, gemini_client: GeminiClient, config: ProImageConfig, storage_service: ImageStorageService | None = None, ): self.gemini_client = gemini_client self.config = config self.storage_service = storage_service self.logger = logging.getLogger(__name__) def generate_images( self, prompt: str, n: int = 1, resolution: str = "high", thinking_level: ThinkingLevel | None = None, enable_grounding: bool | None = None, media_resolution: MediaResolution | None = None, negative_prompt: str | None = None, system_instruction: str | None = None, input_images: list[tuple[str, str]] | None = None, use_storage: bool = True, ) -> tuple[list[MCPImage], list[dict[str, Any]]]: """ Generate high-quality images using Gemini 3 Pro Image. Features: - Up to 4K resolution support - Google Search grounding for factual accuracy - Advanced reasoning with configurable thinking levels - Professional-grade outputs Args: prompt: Main generation prompt n: Number of images to generate resolution: Output resolution ('high', '4k', '2k', '1k') thinking_level: Reasoning depth (LOW or HIGH) enable_grounding: Enable Google Search grounding media_resolution: Vision processing detail level negative_prompt: Optional constraints to avoid system_instruction: Optional system-level guidance input_images: List of (base64, mime_type) tuples for conditioning use_storage: Store images and return resource links with thumbnails Returns: Tuple of (image_blocks_or_resource_links, metadata_list) """ # Apply defaults from config if thinking_level is None: thinking_level = self.config.default_thinking_level if enable_grounding is None: enable_grounding = self.config.enable_search_grounding if media_resolution is None: media_resolution = self.config.default_media_resolution with ProgressContext( "pro_image_generation", f"Generating {n} high-quality image(s) with Gemini 3 Pro...", {"prompt": prompt[:100], "count": n, "resolution": resolution} ) as progress: progress.update(5, "Configuring Pro model parameters...") self.logger.info( f"Pro generation: prompt='{prompt[:50]}...', n={n}, " f"resolution={resolution}, thinking={thinking_level.value}, " f"grounding={enable_grounding}" ) progress.update(10, "Preparing generation request...") # Build content with Pro-optimized prompt contents = [] # System instruction (optional) if system_instruction: contents.append(system_instruction) elif enable_grounding: # Add grounding hint for Pro model contents.append( "Use real-world knowledge and current information " "to create accurate, detailed images." ) # Enhanced prompt for Pro model enhanced_prompt = self._enhance_prompt_for_pro( prompt, resolution, negative_prompt ) contents.append(enhanced_prompt) # Add input images if provided (Pro benefits from images-first) if input_images: images_b64, mime_types = zip(*input_images, strict=False) image_parts = self.gemini_client.create_image_parts( list(images_b64), list(mime_types) ) # Pro model: place images before text for better context contents = image_parts + contents progress.update(20, "Sending requests to Gemini 3 Pro API...") # Generate images all_images = [] all_metadata = [] for i in range(n): try: progress.update( 20 + (i * 70 // n), f"Generating high-quality image {i + 1}/{n}..." ) # Build generation config for Pro model gen_config = { "thinking_level": thinking_level.value, } # Add Pro-specific parameters if self.config.supports_media_resolution: gen_config["media_resolution"] = media_resolution.value # Note: Grounding is controlled via prompt/system instruction # The API may not expose enable_grounding as a direct parameter # depending on SDK version response = self.gemini_client.generate_content( contents, config=gen_config ) images = self.gemini_client.extract_images(response) for j, image_bytes in enumerate(images): # Pro metadata metadata = { "model": self.config.model_name, "model_tier": "pro", "response_index": i + 1, "image_index": j + 1, "resolution": resolution, "thinking_level": thinking_level.value, "media_resolution": media_resolution.value, "grounding_enabled": enable_grounding, "mime_type": f"image/{self.config.default_image_format}", "synthid_watermark": True, "prompt": prompt, "enhanced_prompt": enhanced_prompt, "negative_prompt": negative_prompt, } # Storage handling if use_storage and self.storage_service: stored_info = self.storage_service.store_image( image_bytes, f"image/{self.config.default_image_format}", metadata ) thumbnail_b64 = self.storage_service.get_thumbnail_base64( stored_info.id ) if thumbnail_b64: thumbnail_bytes = base64.b64decode(thumbnail_b64) thumbnail_image = MCPImage(data=thumbnail_bytes, format="jpeg") all_images.append(thumbnail_image) metadata.update({ "storage_id": stored_info.id, "full_image_uri": f"file://images/{stored_info.id}", "full_path": stored_info.full_path, "thumbnail_uri": f"file://images/{stored_info.id}/thumbnail", "size_bytes": stored_info.size_bytes, "thumbnail_size_bytes": stored_info.thumbnail_size_bytes, "width": stored_info.width, "height": stored_info.height, "expires_at": stored_info.expires_at, "is_stored": True, }) all_metadata.append(metadata) self.logger.info( f"Generated Pro image {i + 1}.{j + 1} - " f"stored as {stored_info.id} " f"({stored_info.size_bytes} bytes, {stored_info.width}x{stored_info.height})" ) else: # Direct return without storage mcp_image = MCPImage( data=image_bytes, format=self.config.default_image_format ) all_images.append(mcp_image) all_metadata.append(metadata) self.logger.info( f"Generated Pro image {i + 1}.{j + 1} " f"(size: {len(image_bytes)} bytes)" ) except Exception as e: self.logger.error(f"Failed to generate Pro image {i + 1}: {e}") # Continue with other images rather than failing completely continue progress.update(100, f"Generated {len(all_images)} high-quality image(s)") if not all_images: self.logger.warning("No images were generated by Pro model") return all_images, all_metadata def edit_image( self, instruction: str, base_image_b64: str, mime_type: str = "image/png", thinking_level: ThinkingLevel | None = None, media_resolution: MediaResolution | None = None, use_storage: bool = True, ) -> tuple[list[MCPImage], int]: """ Edit images with Pro model's enhanced understanding. Benefits: - Better context understanding - Higher quality edits - Maintains fine details Args: instruction: Natural language editing instruction base_image_b64: Base64 encoded source image mime_type: MIME type of source image thinking_level: Reasoning depth media_resolution: Vision processing detail level use_storage: Store edited images and return resource links Returns: Tuple of (edited_images_or_resource_links, count) """ # Apply defaults if thinking_level is None: thinking_level = self.config.default_thinking_level if media_resolution is None: media_resolution = self.config.default_media_resolution with ProgressContext( "pro_image_editing", "Editing image with Gemini 3 Pro...", {"instruction": instruction[:100]} ) as progress: try: progress.update(10, "Configuring Pro editing parameters...") self.logger.info( f"Pro edit: instruction='{instruction[:50]}...', " f"thinking={thinking_level.value}" ) # Validate image validate_image_format(mime_type) progress.update(20, "Preparing edit request...") # Enhanced instruction for Pro model enhanced_instruction = ( f"{instruction}\n\n" "Maintain the original image's quality and style. " "Make precise, high-quality edits." ) # Create parts image_parts = self.gemini_client.create_image_parts( [base_image_b64], [mime_type] ) contents = [*image_parts, enhanced_instruction] progress.update(40, "Sending edit request to Gemini 3 Pro API...") # Generate edited image with Pro config gen_config = { "thinking_level": thinking_level.value, "media_resolution": media_resolution.value, } response = self.gemini_client.generate_content( contents, config=gen_config ) image_bytes_list = self.gemini_client.extract_images(response) progress.update(70, "Processing edited images...") mcp_images = [] for i, image_bytes in enumerate(image_bytes_list): metadata = { "model": self.config.model_name, "model_tier": "pro", "instruction": instruction, "thinking_level": thinking_level.value, "media_resolution": media_resolution.value, "source_mime_type": mime_type, "result_mime_type": f"image/{self.config.default_image_format}", "synthid_watermark": True, "edit_index": i + 1, } if use_storage and self.storage_service: stored_info = self.storage_service.store_image( image_bytes, f"image/{self.config.default_image_format}", metadata ) thumbnail_b64 = self.storage_service.get_thumbnail_base64( stored_info.id ) if thumbnail_b64: thumbnail_bytes = base64.b64decode(thumbnail_b64) thumbnail_image = MCPImage(data=thumbnail_bytes, format="jpeg") mcp_images.append(thumbnail_image) self.logger.info( f"Edited image {i + 1} with Pro - stored as {stored_info.id} " f"({stored_info.size_bytes} bytes)" ) else: mcp_image = MCPImage( data=image_bytes, format=self.config.default_image_format ) mcp_images.append(mcp_image) self.logger.info( f"Edited image {i + 1} with Pro (size: {len(image_bytes)} bytes)" ) progress.update( 100, f"Successfully edited image with Pro, generated {len(mcp_images)} result(s)" ) return mcp_images, len(mcp_images) except Exception as e: self.logger.error(f"Failed to edit image with Pro: {e}") raise def _enhance_prompt_for_pro( self, prompt: str, resolution: str, negative_prompt: str | None ) -> str: """ Enhance prompt to leverage Pro model capabilities. Pro model benefits from: - Narrative, descriptive prompts - Specific composition/lighting details - Quality and detail emphasis """ enhanced = prompt # Pro model benefits from narrative prompts if len(prompt) < 50: enhanced = ( f"Create a high-quality, detailed image: {prompt}. " "Pay attention to composition, lighting, and fine details." ) # Resolution hints for 4K/high-res if resolution in ["4k", "high", "2k"]: if "text" in prompt.lower() or "diagram" in prompt.lower(): enhanced += " Ensure text is sharp and clearly readable at high resolution." if resolution == "4k": enhanced += " Render at maximum 4K quality with exceptional detail." # Negative constraints if negative_prompt: enhanced += f"\n\nAvoid: {negative_prompt}" return enhanced

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/zhongweili/nanobanana-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server