Skip to main content
Glama

MCP Starter for Puch AI

by Kulraj69
parsing.py5.92 kB
import re import tldextract from typing import List, Dict, Any, Set, Optional from collections import Counter # Sentiment keywords POSITIVE_WORDS = { "best", "recommended", "affordable", "powerful", "reliable", "intuitive", "robust", "excellent", "great", "good", "top", "leading", "premium", "efficient", "fast", "easy", "simple", "comprehensive", "complete" } NEGATIVE_WORDS = { "expensive", "buggy", "limited", "poor", "bad", "slow", "complicated", "difficult", "unreliable", "outdated", "weak", "basic", "incomplete", "problematic", "frustrating", "confusing", "overpriced" } def extract_domains(raw_answer: str) -> List[Dict[str, Any]]: """Extract domains from raw answer using regex and tldextract""" # Find URLs and domains url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+' domain_pattern = r'\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b' domains = set() # Extract from URLs urls = re.findall(url_pattern, raw_answer) for url in urls: try: extracted = tldextract.extract(url) if extracted.domain and extracted.suffix: domain = f"{extracted.domain}.{extracted.suffix}" domains.add(domain) except: continue # Extract standalone domains standalone_domains = re.findall(domain_pattern, raw_answer) for domain in standalone_domains: try: extracted = tldextract.extract(domain) if extracted.domain and extracted.suffix: clean_domain = f"{extracted.domain}.{extracted.suffix}" domains.add(clean_domain) except: continue # Count occurrences domain_counts = Counter() for domain in domains: count = len(re.findall(re.escape(domain), raw_answer, re.IGNORECASE)) domain_counts[domain] = count return [{"domain": domain, "count": count} for domain, count in domain_counts.most_common()] def extract_mentions(raw_answer: str, brand_set: Set[str]) -> List[Dict[str, Any]]: """Extract brand mentions with count and first index""" mentions = [] for brand in brand_set: # Case-insensitive search pattern = re.compile(re.escape(brand), re.IGNORECASE) matches = list(pattern.finditer(raw_answer)) if matches: first_index = matches[0].start() count = len(matches) mentions.append({ "brand": brand, "count": count, "first_index": first_index }) # Sort by first_index (earliest mention first) mentions.sort(key=lambda x: x["first_index"]) return mentions def naive_sentiment(raw_answer: str, brand: str) -> Dict[str, float]: """Compute sentiment scores for a brand using keyword windows""" positive_count = 0 negative_count = 0 total_contexts = 0 # Find all mentions of the brand pattern = re.compile(re.escape(brand), re.IGNORECASE) matches = list(pattern.finditer(raw_answer)) for match in matches: start = max(0, match.start() - 40) end = min(len(raw_answer), match.end() + 40) context = raw_answer[start:end].lower() # Count positive and negative words in context context_positive = sum(1 for word in POSITIVE_WORDS if word in context) context_negative = sum(1 for word in NEGATIVE_WORDS if word in context) positive_count += context_positive negative_count += context_negative total_contexts += 1 if total_contexts == 0: return {"positive": 0.0, "neutral": 1.0, "negative": 0.0} # Calculate ratios positive_ratio = positive_count / (positive_count + negative_count + 1) # +1 for smoothing negative_ratio = negative_count / (positive_count + negative_count + 1) neutral_ratio = 1.0 - positive_ratio - negative_ratio return { "positive": round(positive_ratio, 3), "neutral": round(max(0, neutral_ratio), 3), "negative": round(negative_ratio, 3) } def compute_sov(mentions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Compute share of voice from mentions""" if not mentions: return [] total_mentions = sum(m["count"] for m in mentions) sov_data = [] for mention in mentions: brand_sov = mention["count"] / total_mentions sov_data.append({ "brand": mention["brand"], "overall": round(brand_sov, 3), "by_platform": {} # Will be filled by platform-specific data }) return sov_data def compute_first_position(mentions: List[Dict[str, Any]]) -> Optional[str]: """Find brand with earliest first mention""" if not mentions: return None # Sort by first_index and return the brand with earliest mention sorted_mentions = sorted(mentions, key=lambda x: x["first_index"]) return sorted_mentions[0]["brand"] def auto_detect_brands(raw_answer: str, max_brands: int = 10) -> List[str]: """Auto-detect brands as Proper Nouns (A-Z leading tokens)""" # Find words that start with capital letters (potential brand names) words = re.findall(r'\b[A-Z][a-zA-Z0-9]*\b', raw_answer) # Filter out common words and short words common_words = { "The", "This", "That", "These", "Those", "And", "Or", "But", "For", "With", "From", "About", "When", "Where", "How", "Why", "What", "Which", "Who", "You", "Your", "We", "Our", "They", "Their", "He", "She", "It", "Its" } potential_brands = [ word for word in words if word not in common_words and len(word) > 2 ] # Count occurrences and return top brands brand_counts = Counter(potential_brands) return [brand for brand, count in brand_counts.most_common(max_brands)]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Kulraj69/mcp-llm'

If you have feedback or need assistance with the MCP directory API, please join our Discord server