ingest.py•12.1 kB
"""Documentation indexing and search for PrestaShop MCP server."""
import json
import logging
import sqlite3
from pathlib import Path
from typing import Dict, List, Optional
from .config import CATEGORIES, DB_PATH, DOCS_PATH
from .parsers.hooks import parse_hook_file
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_database():
"""Create SQLite database with FTS5 tables."""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# Main documentation table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS prestashop_docs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
title TEXT NOT NULL,
category TEXT NOT NULL,
subcategory TEXT,
path TEXT NOT NULL,
origin TEXT,
location TEXT,
content TEXT NOT NULL,
metadata TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
)
# FTS5 full-text search table
cursor.execute(
"""
CREATE VIRTUAL TABLE IF NOT EXISTS prestashop_docs_fts USING fts5(
name, title, category, subcategory, origin, location, content,
content='prestashop_docs',
content_rowid='id'
)
"""
)
# Specialized hooks table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS hooks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
type TEXT NOT NULL,
origin TEXT NOT NULL,
locations TEXT NOT NULL,
description TEXT,
aliases TEXT,
github_refs TEXT,
code_examples TEXT,
doc_id INTEGER,
FOREIGN KEY(doc_id) REFERENCES prestashop_docs(id)
)
"""
)
# Create indexes
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_category ON prestashop_docs(category)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_subcategory ON prestashop_docs(subcategory)"
)
cursor.execute("CREATE INDEX IF NOT EXISTS idx_origin ON prestashop_docs(origin)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_hook_type ON hooks(type)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_hook_origin ON hooks(origin)")
conn.commit()
conn.close()
logger.info(f"Database created at {DB_PATH}")
def clear_database():
"""Clear all data from database tables."""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("DELETE FROM hooks")
cursor.execute("DELETE FROM prestashop_docs")
cursor.execute("DELETE FROM prestashop_docs_fts")
conn.commit()
conn.close()
logger.info("Database cleared")
def index_hooks(force: bool = False) -> int:
"""Index all PrestaShop hook files.
Args:
force: Force re-indexing even if already indexed
Returns:
Number of hooks indexed
"""
hooks_path = DOCS_PATH / CATEGORIES["hooks"]
if not hooks_path.exists():
logger.error(f"Hooks directory not found: {hooks_path}")
return 0
# Find all hook markdown files
hook_files = list(hooks_path.glob("*.md"))
logger.info(f"Found {len(hook_files)} hook files to index")
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
indexed_count = 0
for i, hook_file in enumerate(hook_files, 1):
hook_data = parse_hook_file(hook_file)
if not hook_data:
continue
try:
# Insert into main docs table
cursor.execute(
"""
INSERT INTO prestashop_docs (
name, title, category, subcategory, path, origin, location, content, metadata
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
hook_data["name"],
hook_data["name"],
"hooks",
hook_data["type"],
hook_data["path"],
hook_data["origin"],
hook_data["locations"],
hook_data["content"],
json.dumps(
{
"aliases": hook_data["aliases"],
"github_refs": hook_data["github_refs"],
"code_examples": hook_data["code_examples"],
}
),
),
)
doc_id = cursor.lastrowid
# Insert into FTS5 table
cursor.execute(
"""
INSERT INTO prestashop_docs_fts (
rowid, name, title, category, subcategory, origin, location, content
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
doc_id,
hook_data["name"],
hook_data["name"],
"hooks",
hook_data["type"],
hook_data["origin"],
hook_data["locations"],
hook_data["content"],
),
)
# Insert into hooks table
cursor.execute(
"""
INSERT OR REPLACE INTO hooks (
name, type, origin, locations, description, aliases,
github_refs, code_examples, doc_id
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
hook_data["name"],
hook_data["type"],
hook_data["origin"],
hook_data["locations"],
hook_data["description"],
json.dumps(hook_data["aliases"]),
json.dumps(hook_data["github_refs"]),
json.dumps(hook_data["code_examples"]),
doc_id,
),
)
indexed_count += 1
if i % 50 == 0:
logger.info(f"Indexed {i}/{len(hook_files)} hooks...")
except sqlite3.Error as e:
logger.error(f"Error indexing {hook_file.name}: {e}")
continue
conn.commit()
conn.close()
logger.info(f"Successfully indexed {indexed_count} hooks")
return indexed_count
def search_hooks(
queries: List[str],
hook_type: Optional[str] = None,
origin: Optional[str] = None,
limit: int = 10,
) -> List[Dict]:
"""Search PrestaShop hooks using FTS5.
Args:
queries: List of search terms
hook_type: Filter by hook type (display, action)
origin: Filter by origin (core, module, theme)
limit: Maximum number of results
Returns:
List of matching hooks with metadata
"""
if not queries:
return []
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
# Build search query
search_query = " OR ".join(queries)
# Build WHERE clause for filters
where_clauses = ["prestashop_docs_fts MATCH ?"]
params = [search_query]
if hook_type:
where_clauses.append("hooks.type = ?")
params.append(hook_type)
if origin:
where_clauses.append("hooks.origin = ?")
params.append(origin)
where_clause = " AND ".join(where_clauses)
params.append(limit)
sql = f"""
SELECT
hooks.name,
hooks.type,
hooks.origin,
hooks.locations,
hooks.description,
hooks.aliases,
hooks.github_refs,
prestashop_docs.path,
snippet(prestashop_docs_fts, 6, '<mark>', '</mark>', '...', 32) as snippet
FROM prestashop_docs_fts
JOIN prestashop_docs ON prestashop_docs.id = prestashop_docs_fts.rowid
JOIN hooks ON hooks.doc_id = prestashop_docs.id
WHERE {where_clause}
ORDER BY rank
LIMIT ?
"""
cursor.execute(sql, params)
results = []
for row in cursor.fetchall():
results.append(
{
"name": row["name"],
"type": row["type"],
"origin": row["origin"],
"locations": row["locations"],
"description": row["description"],
"aliases": json.loads(row["aliases"]) if row["aliases"] else [],
"github_refs": (
json.loads(row["github_refs"]) if row["github_refs"] else []
),
"path": row["path"],
"snippet": row["snippet"],
}
)
conn.close()
return results
def get_hook(hook_name: str) -> Optional[Dict]:
"""Get complete documentation for a specific hook.
Args:
hook_name: Name of the hook
Returns:
Hook documentation dict or None if not found
"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute(
"""
SELECT
hooks.*,
prestashop_docs.content,
prestashop_docs.path
FROM hooks
JOIN prestashop_docs ON hooks.doc_id = prestashop_docs.id
WHERE hooks.name = ?
""",
(hook_name,),
)
row = cursor.fetchone()
conn.close()
if not row:
return None
return {
"name": row["name"],
"type": row["type"],
"origin": row["origin"],
"locations": row["locations"],
"description": row["description"],
"aliases": json.loads(row["aliases"]) if row["aliases"] else [],
"github_refs": json.loads(row["github_refs"]) if row["github_refs"] else [],
"code_examples": (
json.loads(row["code_examples"]) if row["code_examples"] else []
),
"content": row["content"],
"path": row["path"],
}
def list_hooks(
hook_type: Optional[str] = None, origin: Optional[str] = None
) -> List[Dict]:
"""List all hooks with optional filters.
Args:
hook_type: Filter by type (display, action)
origin: Filter by origin (core, module, theme)
Returns:
List of hooks
"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
where_clauses = []
params = []
if hook_type:
where_clauses.append("type = ?")
params.append(hook_type)
if origin:
where_clauses.append("origin = ?")
params.append(origin)
where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
cursor.execute(
f"""
SELECT name, type, origin, locations, description
FROM hooks
{where_clause}
ORDER BY name
""",
params,
)
results = [dict(row) for row in cursor.fetchall()]
conn.close()
return results
def index_documentation(force: bool = False) -> int:
"""Index all PrestaShop documentation.
Args:
force: Force re-indexing
Returns:
Total number of documents indexed
"""
# Create database if doesn't exist
create_database()
# Clear if force re-index
if force:
logger.info("Clearing existing documentation...")
clear_database()
# Check if already indexed
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM prestashop_docs")
count = cursor.fetchone()[0]
conn.close()
if count > 0 and not force:
logger.info(f"Documentation already indexed ({count} documents)")
return count
# Index hooks (MVP - only hooks for now)
hooks_count = index_hooks(force=force)
return hooks_count
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Index PrestaShop documentation")
parser.add_argument(
"--force", action="store_true", help="Force re-indexing of all documentation"
)
args = parser.parse_args()
count = index_documentation(force=args.force)
print(f"Indexed {count} documents")