notebooklm-mcp

MIT License

581

page-utils.ts•14.8 kB

/** * Page utilities for extracting responses from NotebookLM web UI * * This module provides functions to: * - Extract latest assistant responses from the page * - Wait for new responses with streaming detection * - Detect placeholders and loading states * - Snapshot existing responses for comparison * * Based on the Python implementation from page_utils.py */ import type { Page } from 'patchright'; import { log } from './logger.js'; // ============================================================================ // Constants // ============================================================================ /** * CSS selectors to find assistant response elements * Ordered by priority (most specific first) */ const RESPONSE_SELECTORS = [ '.to-user-container .message-text-content', "[data-message-author='bot']", "[data-message-author='assistant']", "[data-message-role='assistant']", "[data-author='assistant']", "[data-renderer*='assistant']", "[data-automation-id='response-text']", "[data-automation-id='assistant-response']", "[data-automation-id='chat-response']", "[data-testid*='assistant']", "[data-testid*='response']", "[aria-live='polite']", "[role='listitem'][data-message-author]", ]; /** * Text snippets that indicate a placeholder/loading state */ const PLACEHOLDER_SNIPPETS = [ 'antwort wird erstellt', 'answer wird erstellt', 'answer is being created', 'answer is being generated', 'creating answer', 'generating answer', 'wird erstellt', 'getting the context', // NotebookLM initial loading message 'loading', 'please wait', ]; // ============================================================================ // Helper Functions // ============================================================================ /** * Simple string hash function (for efficient comparison) */ function hashString(str: string): number { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32bit integer } return hash; } /** * Check if text is a placeholder/loading message */ function isPlaceholder(text: string): boolean { const lower = text.toLowerCase(); return PLACEHOLDER_SNIPPETS.some((snippet) => lower.includes(snippet)); } // ============================================================================ // Main Functions // ============================================================================ /** * Snapshot the latest response text currently visible * Returns null if no response found */ export async function snapshotLatestResponse(page: Page): Promise<string | null> { return await extractLatestText(page, new Set(), false, 0); } /** * Snapshot ALL existing assistant response texts * Used to capture visible responses BEFORE submitting a new question */ export async function snapshotAllResponses(page: Page): Promise<string[]> { const allTexts: string[] = []; const primarySelector = '.to-user-container'; try { const containers = await page.$$(primarySelector); if (containers.length > 0) { for (const container of containers) { try { const textElement = await container.$('.message-text-content'); if (textElement) { const text = await textElement.innerText(); if (text && text.trim()) { allTexts.push(text.trim()); } } } catch { continue; } } log.info(`📸 [SNAPSHOT] Captured ${allTexts.length} existing responses`); } } catch (error) { log.warning(`⚠️ [SNAPSHOT] Failed to snapshot responses: ${error}`); } return allTexts; } /** * Count the number of visible assistant response elements */ export async function countResponseElements(page: Page): Promise<number> { let count = 0; for (const selector of RESPONSE_SELECTORS) { try { const elements = await page.$$(selector); if (elements.length > 0) { // Count only visible elements for (const el of elements) { try { const isVisible = await el.isVisible(); if (isVisible) { count++; } } catch { continue; } } // If we found elements with this selector, stop trying others if (count > 0) { break; } } } catch { continue; } } return count; } /** * Wait for a new assistant response with streaming detection * * This function: * 1. Polls the page for new response text * 2. Detects streaming (text changes) vs. complete (text stable) * 3. Requires text to be stable for 3 consecutive polls before returning * 4. Ignores placeholders, question echoes, and known responses * * @param page Playwright page instance * @param options Options for waiting * @returns The new response text, or null if timeout */ export async function waitForLatestAnswer( page: Page, options: { question?: string; timeoutMs?: number; pollIntervalMs?: number; ignoreTexts?: string[]; debug?: boolean; } = {} ): Promise<string | null> { const { question = '', timeoutMs = 120000, pollIntervalMs = 1000, ignoreTexts = [], debug = false, } = options; const deadline = Date.now() + timeoutMs; const sanitizedQuestion = question.trim().toLowerCase(); // Track ALL known texts as HASHES (memory efficient!) const knownHashes = new Set<number>(); for (const text of ignoreTexts) { if (typeof text === 'string' && text.trim()) { knownHashes.add(hashString(text.trim())); } } if (debug) { log.debug(`🔍 [DEBUG] Waiting for NEW answer. Ignoring ${knownHashes.size} known responses`); } let pollCount = 0; let lastCandidate: string | null = null; let stableCount = 0; // Track how many times we see the same text const requiredStablePolls = 8; // Text must be stable for 8 consecutive polls (~8 seconds) while (Date.now() < deadline) { pollCount++; // Extract latest NEW text const candidate = await extractLatestText(page, knownHashes, debug, pollCount); if (candidate) { const normalized = candidate.trim(); if (normalized) { const lower = normalized.toLowerCase(); // Check if it's a placeholder if (isPlaceholder(lower)) { if (debug) { log.debug( `🔍 [DEBUG] Found placeholder: "${normalized.substring(0, 50)}..." - continuing...` ); } await page.waitForTimeout(250); continue; } // DEBUG: Log the candidate text to see what we're getting if (debug && normalized !== lastCandidate) { log.debug( `🔍 [DEBUG] New candidate text (${normalized.length} chars): "${normalized.substring(0, 100)}..."` ); } // Check if it's the question echo if (lower === sanitizedQuestion) { if (debug) { log.debug('🔍 [DEBUG] Found question echo, ignoring'); } knownHashes.add(hashString(normalized)); // Mark as seen await page.waitForTimeout(pollIntervalMs); continue; } // ======================================== // STREAMING DETECTION: Check if text is stable // ======================================== if (normalized === lastCandidate) { // Text hasn't changed - it's stable stableCount++; if (debug && stableCount === requiredStablePolls) { log.debug( `✅ [DEBUG] Text stable for ${stableCount} polls (${normalized.length} chars)` ); } } else { // Text changed - streaming in progress if (debug && lastCandidate) { log.debug( `🔄 [DEBUG] Text changed (${normalized.length} chars, was ${lastCandidate.length})` ); } stableCount = 1; lastCandidate = normalized; } // Only return once text is stable if (stableCount >= requiredStablePolls) { if (debug) { log.debug(`✅ [DEBUG] Returning stable answer (${normalized.length} chars)`); } return normalized; } } } await page.waitForTimeout(pollIntervalMs); } if (debug) { log.debug(`⏱️ [DEBUG] Timeout after ${pollCount} polls`); } return null; } /** * Extract the latest NEW response text from the page * Uses hash-based comparison for efficiency * * @param page Playwright page instance * @param knownHashes Set of hashes of already-seen response texts * @param debug Enable debug logging * @param pollCount Current poll number (for conditional logging) * @returns First NEW response text found, or null */ async function extractLatestText( page: Page, knownHashes: Set<number>, debug: boolean, pollCount: number ): Promise<string | null> { // Try the primary selector first (most specific for NotebookLM) const primarySelector = '.to-user-container'; try { const containers = await page.$$(primarySelector); const totalContainers = containers.length; // Early exit if no new containers possible if (totalContainers <= knownHashes.size) { if (debug && pollCount % 5 === 0) { log.dim( `⏭️ [EXTRACT] No new containers (${totalContainers} total, ${knownHashes.size} known)` ); } return null; } if (containers.length > 0) { // Only log every 5th poll to reduce noise if (debug && pollCount % 5 === 0) { log.dim(`🔍 [EXTRACT] Scanning ${totalContainers} containers (${knownHashes.size} known)`); } let skipped = 0; let empty = 0; // Scan ALL containers to find the FIRST with NEW text for (let idx = 0; idx < containers.length; idx++) { const container = containers[idx]; try { const textElement = await container.$('.message-text-content'); if (textElement) { const text = await textElement.innerText(); if (text && text.trim()) { // Hash-based comparison (faster & less memory) const textHash = hashString(text.trim()); if (!knownHashes.has(textHash)) { log.success( `✅ [EXTRACT] Found NEW text in container[${idx}]: ${text.trim().length} chars` ); return text.trim(); } else { skipped++; } } else { empty++; } } } catch { continue; } } // Only log summary if debug enabled if (debug && pollCount % 5 === 0) { log.dim(`⏭️ [EXTRACT] No NEW text (skipped ${skipped} known, ${empty} empty)`); } return null; // Don't fall through to fallback! } else { if (debug) { log.warning('⚠️ [EXTRACT] No containers found'); } } } catch (error) { log.error(`❌ [EXTRACT] Primary selector failed: ${error}`); } // Fallback: Try other selectors (only if primary selector failed/found nothing) if (debug) { log.dim('🔄 [EXTRACT] Trying fallback selectors...'); } for (const selector of RESPONSE_SELECTORS) { try { const elements = await page.$$(selector); if (elements.length === 0) continue; // Scan ALL elements to find the first with NEW text for (const element of elements) { try { // Prefer full container text when available let container = element; try { const closest = await element.evaluateHandle((el) => { return el.closest( '[data-message-author], [data-message-role], [data-author], ' + "[data-testid*='assistant'], [data-automation-id*='response'], article, section" ); }); if (closest) { container = closest.asElement() || element; } } catch { container = element; } const text = await container.innerText(); if (text && text.trim() && !knownHashes.has(hashString(text.trim()))) { return text.trim(); } } catch { continue; } } } catch { continue; } } // Final fallback: JavaScript evaluation try { const fallbackText = await page.evaluate((): string | null => { // @ts-expect-error - DOM types available in browser context const unique = new Set<Element>(); // @ts-expect-error - DOM types available in browser context const isVisible = (el: Element): boolean => { // @ts-expect-error - DOM types available in browser context if (!el || !(el as HTMLElement).isConnected) return false; const rect = el.getBoundingClientRect(); if (rect.width === 0 || rect.height === 0) return false; // @ts-expect-error - window available in browser context const style = window.getComputedStyle(el as HTMLElement); if ( style.visibility === 'hidden' || style.display === 'none' || parseFloat(style.opacity || '1') === 0 ) { return false; } return true; }; const selectors = [ '[data-message-author]', '[data-message-role]', '[data-author]', "[data-renderer*='assistant']", "[data-testid*='assistant']", "[data-automation-id*='response']", ]; const candidates: string[] = []; for (const selector of selectors) { // @ts-expect-error - document available in browser context for (const el of document.querySelectorAll(selector)) { if (!isVisible(el)) continue; if (unique.has(el)) continue; unique.add(el); // @ts-expect-error - DOM types available in browser context const text = (el as HTMLElement).innerText || (el as HTMLElement).textContent || ''; if (!text.trim()) continue; candidates.push(text.trim()); } } if (candidates.length > 0) { return candidates[candidates.length - 1]; } return null; }); if (typeof fallbackText === 'string' && fallbackText.trim()) { return fallbackText.trim(); } } catch { // Ignore evaluation errors } return null; } // ============================================================================ // Exports // ============================================================================ export default { snapshotLatestResponse, snapshotAllResponses, countResponseElements, waitForLatestAnswer, };

Latest Blog Posts

The 50MB Markdown Files That Broke Our Server
By punkpeye on December 3, 2025.
react
react-router
node-js
OpenTelemetry for Model Context Protocol (MCP) Analytics and Agent Observability
By Om-Shree-0709 on November 29, 2025.
observability
mcp
opentelemetry
Securing Enterprise AI Agents with Unique Identities in the Model Context Protocol (MCP)
By Om-Shree-0709 on November 27, 2025.

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/roomi-fields/notebooklm-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server