Skip to main content
Glama
ingest_v2.py17.5 kB
"""Documentation indexing and search for PrestaShop MCP server - Version 2. This version supports indexing ALL documentation types from prestashop-docs folder. """ import json import logging import sqlite3 from pathlib import Path from typing import Dict, List, Optional from .config import CATEGORIES, DB_PATH, DOCS_PATH, INDEXING_CONFIG, DOC_TYPES from .parsers.base_parser import ParserRegistry logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def create_database_v2(): """Create SQLite database with enhanced schema for all doc types.""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Main documentation table with new columns cursor.execute( """ CREATE TABLE IF NOT EXISTS prestashop_docs ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, title TEXT NOT NULL, category TEXT NOT NULL, subcategory TEXT, doc_type TEXT NOT NULL, path TEXT NOT NULL, origin TEXT, location TEXT, content TEXT NOT NULL, metadata TEXT, version TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """ ) # FTS5 full-text search table with doc_type cursor.execute( """ CREATE VIRTUAL TABLE IF NOT EXISTS prestashop_docs_fts USING fts5( name, title, category, subcategory, doc_type, origin, location, content, content='prestashop_docs', content_rowid='id', tokenize='porter unicode61' ) """ ) # Specialized hooks table (backward compatibility) cursor.execute( """ CREATE TABLE IF NOT EXISTS hooks ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL, type TEXT NOT NULL, origin TEXT NOT NULL, locations TEXT NOT NULL, description TEXT, aliases TEXT, github_refs TEXT, code_examples TEXT, doc_id INTEGER, FOREIGN KEY(doc_id) REFERENCES prestashop_docs(id) ) """ ) # New: Domain commands/queries table cursor.execute( """ CREATE TABLE IF NOT EXISTS domain_references ( id INTEGER PRIMARY KEY AUTOINCREMENT, entity TEXT NOT NULL, name TEXT NOT NULL, type TEXT NOT NULL, description TEXT, parameters TEXT, code_examples TEXT, doc_id INTEGER, FOREIGN KEY(doc_id) REFERENCES prestashop_docs(id) ) """ ) # New: Components table (forms, grids, etc.) cursor.execute( """ CREATE TABLE IF NOT EXISTS components ( id INTEGER PRIMARY KEY AUTOINCREMENT, component_type TEXT NOT NULL, name TEXT NOT NULL, category TEXT, description TEXT, options TEXT, code_examples TEXT, doc_id INTEGER, FOREIGN KEY(doc_id) REFERENCES prestashop_docs(id) ) """ ) # New: API resources table cursor.execute( """ CREATE TABLE IF NOT EXISTS api_resources ( id INTEGER PRIMARY KEY AUTOINCREMENT, api_type TEXT NOT NULL, resource_name TEXT NOT NULL, methods TEXT, schema TEXT, authentication TEXT, doc_id INTEGER, FOREIGN KEY(doc_id) REFERENCES prestashop_docs(id) ) """ ) # Create indexes cursor.execute( "CREATE INDEX IF NOT EXISTS idx_category ON prestashop_docs(category)" ) cursor.execute( "CREATE INDEX IF NOT EXISTS idx_subcategory ON prestashop_docs(subcategory)" ) cursor.execute( "CREATE INDEX IF NOT EXISTS idx_doc_type ON prestashop_docs(doc_type)" ) cursor.execute("CREATE INDEX IF NOT EXISTS idx_origin ON prestashop_docs(origin)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_version ON prestashop_docs(version)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_hook_type ON hooks(type)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_hook_origin ON hooks(origin)") cursor.execute( "CREATE INDEX IF NOT EXISTS idx_domain_entity ON domain_references(entity)" ) cursor.execute( "CREATE INDEX IF NOT EXISTS idx_component_type ON components(component_type)" ) cursor.execute( "CREATE INDEX IF NOT EXISTS idx_api_resource ON api_resources(resource_name)" ) conn.commit() conn.close() logger.info(f"Database created at {DB_PATH}") def migrate_database(): """Migrate from old schema to new schema if needed.""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() try: # Check if doc_type column exists cursor.execute("PRAGMA table_info(prestashop_docs)") columns = [row[1] for row in cursor.fetchall()] if "doc_type" not in columns: logger.info("Migrating database schema...") # Add missing columns cursor.execute("ALTER TABLE prestashop_docs ADD COLUMN doc_type TEXT") cursor.execute("ALTER TABLE prestashop_docs ADD COLUMN version TEXT") cursor.execute( "ALTER TABLE prestashop_docs ADD COLUMN updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP" ) # Update existing records cursor.execute("UPDATE prestashop_docs SET doc_type = 'hook'") # Recreate FTS5 table with new schema cursor.execute("DROP TABLE IF EXISTS prestashop_docs_fts") cursor.execute( """ CREATE VIRTUAL TABLE prestashop_docs_fts USING fts5( name, title, category, subcategory, doc_type, origin, location, content, content='prestashop_docs', content_rowid='id', tokenize='porter unicode61' ) """ ) # Repopulate FTS5 cursor.execute( """ INSERT INTO prestashop_docs_fts (rowid, name, title, category, subcategory, doc_type, origin, location, content) SELECT id, name, title, category, subcategory, doc_type, origin, location, content FROM prestashop_docs """ ) conn.commit() logger.info("Database migration completed") except sqlite3.Error as e: logger.error(f"Migration error: {e}") conn.rollback() finally: conn.close() def clear_database_v2(): """Clear all data from database tables.""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Clear all tables tables = [ "api_resources", "components", "domain_references", "hooks", "prestashop_docs", "prestashop_docs_fts", ] for table in tables: try: cursor.execute(f"DELETE FROM {table}") except sqlite3.Error: pass # Table might not exist yet conn.commit() conn.close() logger.info("Database cleared") def should_skip_file(file_path: Path) -> bool: """Check if file should be skipped. Args: file_path: Path to check Returns: True if file should be skipped """ path_str = str(file_path) # Check skip patterns from config for pattern in INDEXING_CONFIG["skip_patterns"]: # Convert glob pattern to simple check pattern_simple = pattern.replace("**/", "").replace("/**", "") if pattern_simple in path_str: return True # Skip non-markdown if file_path.suffix != ".md": return True # Skip _index.md files (they're usually just navigation) if file_path.name == "_index.md": return True return False def index_all_documentation( parser_registry: ParserRegistry, force: bool = False ) -> Dict[str, int]: """Index all PrestaShop documentation from all categories. Args: parser_registry: Parser registry with registered parsers force: Force re-indexing Returns: Dictionary with counts per category """ if not DOCS_PATH.exists(): logger.error(f"Documentation path not found: {DOCS_PATH}") return {} conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() category_counts = {} total_indexed = 0 total_errors = 0 for category_name, category_path in CATEGORIES.items(): full_path = DOCS_PATH / category_path if not full_path.exists(): logger.warning(f"Category path not found: {full_path}") continue logger.info(f"Indexing category: {category_name}") md_files = list(full_path.rglob("*.md")) logger.info(f"Found {len(md_files)} markdown files in {category_name}") indexed_count = 0 for i, md_file in enumerate(md_files, 1): # Skip certain files if should_skip_file(md_file): continue # Parse file try: parsed_data = parser_registry.parse_file(md_file) if not parsed_data: continue # Insert into main docs table cursor.execute( """ INSERT INTO prestashop_docs ( name, title, category, subcategory, doc_type, path, origin, location, content, metadata, version ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( parsed_data.get("name", ""), parsed_data.get("title", ""), parsed_data.get("category", category_name), parsed_data.get("subcategory", ""), parsed_data.get("doc_type", DOC_TYPES["general"]), parsed_data.get("path", ""), parsed_data.get("origin", ""), parsed_data.get("location", ""), parsed_data.get("content", ""), json.dumps(parsed_data.get("metadata", {})), parsed_data.get("version", ""), ), ) doc_id = cursor.lastrowid # Insert into FTS5 cursor.execute( """ INSERT INTO prestashop_docs_fts ( rowid, name, title, category, subcategory, doc_type, origin, location, content ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( doc_id, parsed_data.get("name", ""), parsed_data.get("title", ""), parsed_data.get("category", category_name), parsed_data.get("subcategory", ""), parsed_data.get("doc_type", DOC_TYPES["general"]), parsed_data.get("origin", ""), parsed_data.get("location", ""), parsed_data.get("content", ""), ), ) # Insert into specialized tables based on doc_type doc_type = parsed_data.get("doc_type", "") metadata = parsed_data.get("metadata", {}) # Populate domain_references table for CQRS references if doc_type == DOC_TYPES["reference"] and "references" in metadata: entity = metadata.get("entity", "") references = metadata.get("references", []) for ref in references: try: cursor.execute( """ INSERT INTO domain_references ( entity, name, type, description, parameters, code_examples, doc_id ) VALUES (?, ?, ?, ?, ?, ?, ?) """, ( entity, ref.get("name", ""), ref.get("type", "command"), ref.get("description", ""), json.dumps(ref.get("parameters", [])), json.dumps([]), # Code examples per reference not extracted yet doc_id, ), ) except Exception as e: logger.error(f"Error inserting domain reference {ref.get('name', '')}: {e}") # Populate components table for form types, grid columns, etc. if doc_type == DOC_TYPES["component"]: component_type = metadata.get("component_type", "component") options = metadata.get("options", []) try: cursor.execute( """ INSERT INTO components ( component_type, name, category, description, options, code_examples, doc_id ) VALUES (?, ?, ?, ?, ?, ?, ?) """, ( component_type, parsed_data.get("name", ""), parsed_data.get("category", ""), metadata.get("description", ""), json.dumps(options), json.dumps(metadata.get("code_examples", [])), doc_id, ), ) except Exception as e: logger.error(f"Error inserting component {parsed_data.get('name', '')}: {e}") indexed_count += 1 total_indexed += 1 # Progress logging if i % 50 == 0: logger.info(f" Progress: {i}/{len(md_files)} files...") conn.commit() # Commit in batches except Exception as e: logger.error(f"Error indexing {md_file}: {e}") total_errors += 1 continue category_counts[category_name] = indexed_count logger.info(f"Indexed {indexed_count} files from {category_name}") # Commit after each category conn.commit() conn.close() logger.info(f"Total indexed: {total_indexed} files") logger.info(f"Total errors: {total_errors} files") return category_counts def index_documentation_v2(force: bool = False) -> int: """Index all PrestaShop documentation - new version. Args: force: Force re-indexing Returns: Total number of documents indexed """ # Create/migrate database create_database_v2() migrate_database() # Clear if force re-index if force: logger.info("Clearing existing documentation...") clear_database_v2() # Check if already indexed conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM prestashop_docs") count = cursor.fetchone()[0] conn.close() if count > 0 and not force: logger.info(f"Documentation already indexed ({count} documents)") return count # Create parser registry and register parsers # Order matters: more specific parsers first, general parser last from .parsers.hooks import HookParser from .parsers.domain_reference_parser import DomainReferenceParser from .parsers.component_parser import ComponentParser from .parsers.guide_parser import GuideParser from .parsers.general_parser import GeneralParser parser_registry = ParserRegistry() parser_registry.register(HookParser()) # Most specific parser_registry.register(DomainReferenceParser()) # CQRS commands/queries parser_registry.register(ComponentParser()) # Form types, grid columns parser_registry.register(GuideParser()) # Installation guides, tutorials parser_registry.register(GeneralParser()) # Fallback for everything else # Index all documentation category_counts = index_all_documentation(parser_registry, force=force) # Calculate total total = sum(category_counts.values()) logger.info(f"\n=== Indexing Complete ===") logger.info(f"Total documents indexed: {total}") for category, count in sorted(category_counts.items()): logger.info(f" {category}: {count}") return total if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Index PrestaShop documentation (Version 2)" ) parser.add_argument( "--force", action="store_true", help="Force re-indexing of all documentation" ) args = parser.parse_args() count = index_documentation_v2(force=args.force) print(f"\nIndexed {count} documents")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/florinel-chis/prestashop-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server