Skip to main content
Glama
fetcher.py9.39 kB
import re import time from pathlib import Path from typing import Dict, Any, List import requests from bs4 import BeautifulSoup import json BASE_URL = "https://4x.ant.design" OVERVIEW_PATH = "/components/overview-cn/" # Use project root cache directory if available to reuse previously cached HTML. CACHE_DIR = Path(__file__).resolve().parent.parent / "cache" CACHE_DIR.mkdir(exist_ok=True) HEADERS = {"User-Agent": "Mozilla/5.0 (MCP Antd Fetcher)"} EXPORT_DIR = Path(__file__).parent / "exports" EXPORT_DIR.mkdir(exist_ok=True) class FetchError(Exception): pass def fetch_url(url: str, *, force: bool = False, sleep: float = 0.5) -> str: cache_file = CACHE_DIR / (re.sub(r'[^a-zA-Z0-9]+', '_', url) + '.html') if cache_file.exists() and not force: return cache_file.read_text(encoding='utf-8', errors='ignore') resp = requests.get(url, headers=HEADERS, timeout=15) if resp.status_code != 200: raise FetchError(f"Failed {url} status={resp.status_code}") text = resp.text cache_file.write_text(text, encoding='utf-8') time.sleep(sleep) return text def parse_overview(html: str) -> List[Dict[str, Any]]: soup = BeautifulSoup(html, 'lxml') components: List[Dict[str, Any]] = [] def normalize_name(raw: str) -> str: raw = raw.strip() m = re.match(r'([A-Za-z0-9]+)', raw) if m: return m.group(1) return raw.split()[0] if raw else raw for card in soup.select('.components-overview-card'): title_el = card.select_one('.components-overview-title') if not title_el: continue full_title = title_el.get_text(strip=True) name = normalize_name(full_title) link_el = card.find_parent('a') or card.select_one('a[href]') href = link_el['href'] if link_el and link_el.has_attr('href') else None desc = '' url = BASE_URL + href if href and href.startswith('/') else href components.append({ 'name': name, 'display_name': full_title, 'url': url, 'description': desc, }) for li in soup.select('ul.ant-menu li a[href*="/components/"]'): spans_text = ''.join(span.get_text(strip=True) for span in li.select('span')) or li.get_text(strip=True) eng_name = normalize_name(spans_text) href = li.get('href') url = BASE_URL + href if href and href.startswith('/') else href components.append({ 'name': eng_name, 'display_name': spans_text, 'url': url, 'description': '' }) def is_valid(c: Dict[str, Any]) -> bool: u = c.get('url') if not u or not isinstance(u, str) or '/components/' not in u: return False if not (u.startswith('http://') or u.startswith('https://')): return False return True cleaned: Dict[str, Dict[str, Any]] = {} for c in components: if not is_valid(c): continue key = c['name'].lower() if key not in cleaned or len(c.get('display_name','')) > len(cleaned[key].get('display_name','')): cleaned[key] = c return list(cleaned.values()) def parse_component(html: str) -> Dict[str, Any]: soup = BeautifulSoup(html, 'lxml') data: Dict[str, Any] = {} h1 = soup.select_one('h1') if h1: data['title'] = h1.get_text(strip=True) intro_parts = [] for p in soup.select('p')[:5]: text = p.get_text(strip=True) if text: intro_parts.append(text) data['intro'] = intro_parts props_tables: List[Dict[str, Any]] = [] event_tables: List[Dict[str, Any]] = [] method_tables: List[Dict[str, Any]] = [] other_tables: List[Dict[str, Any]] = [] def classify(header: List[str]) -> str: h_join = ' '.join(header) if any(k in h_join for k in ["事件", "回调", "listener", "on" ]): return 'events' if any(k in h_join for k in ["方法", "method", "函数"]): return 'methods' prop_keywords = ["参数", "属性", "属性名", "名称", "配置项", "参数名", "字段", "Prop", "Property", "选项", "可配置项"] if any(pk in h_join for pk in prop_keywords): return 'props' if any(k in h_join for k in ["类型", "默认", "必填", "必选", "可选值"]) and 'API' in h_join: return 'props' if 'API' in h_join and any(k in h_join for k in ["类型", "默认", "参数"]): return 'props' return 'other' for tbl in soup.select('table'): header = [th.get_text(strip=True) for th in tbl.select('thead tr th')] rows_struct = [] for tr in tbl.select('tbody tr'): cells = [td.get_text('\n', strip=True) for td in tr.select('td')] if cells: if header and len(header) == len(cells): row_dict = {header[i]: cells[i] for i in range(len(header))} else: row_dict = {'cells': cells} rows_struct.append(row_dict) table_obj = {'header': header, 'rows': rows_struct} kind = classify(header) if kind == 'props': props_tables.append(table_obj) elif kind == 'events': event_tables.append(table_obj) elif kind == 'methods': method_tables.append(table_obj) else: other_tables.append(table_obj) data['props'] = props_tables data['events'] = event_tables data['methods'] = method_tables data['other_tables'] = other_tables data['table_summary'] = { 'props': len(props_tables), 'events': len(event_tables), 'methods': len(method_tables), 'other': len(other_tables) } examples = [] for code in soup.select('pre code'): content = code.get_text('\n', strip=False) if content: examples.append(content) data['examples'] = examples header_synonyms = { '参数': 'name', '属性': 'name', '属性名': 'name', '名称': 'name', '配置项': 'name', '参数名': 'name', '字段': 'name', 'Prop': 'name', 'Property': 'name', '可配置项': 'name', '说明': 'description', '描述': 'description', '备注': 'description', '含义': 'description', '类型': 'type', 'Type': 'type', '数据类型': 'type', '默认值': 'default', '默认': 'default', '缺省值': 'default', '版本': 'version', 'Since': 'version', '可选值': 'options', '选项': 'options', '可选': 'options', '枚举': 'options', '是否必填': 'required', '必填': 'required', '必选': 'required', '是否必选': 'required' } props_flat: List[Dict[str, Any]] = [] def normalize_row(row: Dict[str, Any], header: List[str]) -> Dict[str, Any]: normalized: Dict[str, Any] = {'raw': row} if 'cells' in row: for i, h in enumerate(header): key = header_synonyms.get(h, h) cells = row['cells'] if i < len(cells): normalized[key] = cells[i] else: for k, v in row.items(): key = header_synonyms.get(k, k) normalized[key] = v name = normalized.get('name') if name: normalized['name'] = name.split('\n')[0].strip() req_val = normalized.get('required') if isinstance(req_val, str): if any(token in req_val for token in ['是', '必填', '必选', 'true', '必须']): normalized['required'] = True elif any(token in req_val for token in ['否', '可选', 'false', '选填']): normalized['required'] = False return normalized for tbl in props_tables: header = tbl['header'] for row in tbl['rows']: props_flat.append(normalize_row(row, header)) data['props_flat'] = props_flat return data def build_component_index(force: bool = False) -> List[Dict[str, Any]]: html = fetch_url(BASE_URL + OVERVIEW_PATH, force=force) return parse_overview(html) def get_component_detail(url: str, force: bool = False) -> Dict[str, Any]: html = fetch_url(url, force=force) data = parse_component(html) data['source_url'] = url return data def export_all_components(*, force: bool = False, filepath: str | None = None, validate: bool = True) -> Dict[str, Any]: index = build_component_index(force=force) all_details = [] for comp in index: try: if not comp.get('url'): raise FetchError('Missing URL') detail = get_component_detail(comp['url'], force=force) detail['name'] = comp['name'] all_details.append(detail) except Exception as e: all_details.append({'name': comp.get('name'), 'error': str(e)}) if validate: all_details = [d for d in all_details if 'error' not in d] export_path = filepath or str(EXPORT_DIR / 'antd_components_all.json') with open(export_path, 'w', encoding='utf-8') as f: json.dump({'generated_at': time.time(), 'count': len(all_details), 'components': all_details}, f, ensure_ascii=False, indent=2) return {'filepath': export_path, 'count': len(all_details), 'errors': sum(1 for d in all_details if 'error' in d)}

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fnlearner/antd-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server