import { chromium, type Browser } from 'playwright';
import type { APIDiscoveryResult, APIEndpoint, NetworkRequest } from '../types/index.js';
import { Validators } from '../utils/validators.js';
export class APIScraper {
private browser: Browser | null = null;
/**
* Get or create browser instance
*/
private async getBrowser(): Promise<Browser> {
if (!this.browser) {
this.browser = await chromium.launch({
headless: true,
});
}
return this.browser;
}
/**
* Close browser
*/
async close(): Promise<void> {
if (this.browser) {
await this.browser.close();
this.browser = null;
}
}
/**
* Discover API endpoints by monitoring network requests
*/
async discoverAPIEndpoints(url: string, options?: { timeout?: number }): Promise<APIDiscoveryResult> {
if (!Validators.isValidUrl(url)) {
throw new Error('Invalid URL');
}
const browser = await this.getBrowser();
const page = await browser.newPage();
const endpoints: APIEndpoint[] = [];
const requests: NetworkRequest[] = [];
try {
// Monitor network requests
page.on('request', (request) => {
const requestUrl = request.url();
const method = request.method();
// Filter for API-like requests (JSON, XML, or common API patterns)
if (
requestUrl.includes('/api/') ||
requestUrl.endsWith('.json') ||
requestUrl.includes('?') ||
method !== 'GET'
) {
requests.push({
url: requestUrl,
method,
status: 0,
statusText: '',
headers: request.headers(),
requestHeaders: request.headers(),
});
}
});
page.on('response', (response) => {
const responseUrl = response.url();
const request = requests.find((r) => r.url === responseUrl);
if (request) {
request.status = response.status();
request.statusText = response.statusText();
request.type = response.headers()['content-type'] || '';
}
});
// Navigate to page
await page.goto(url, {
waitUntil: 'networkidle',
timeout: options?.timeout || 30000,
});
// Wait a bit for all requests to complete
await page.waitForTimeout(2000);
// Process requests into endpoints
for (const request of requests) {
try {
const urlObj = new URL(request.url);
const method = request.method as APIEndpoint['method'];
// Extract parameters from URL
const parameters: APIEndpoint['parameters'] = [];
urlObj.searchParams.forEach((_value, key) => {
parameters.push({
name: key,
type: 'string',
required: false,
location: 'query',
});
});
endpoints.push({
method,
path: urlObj.pathname,
fullUrl: request.url,
parameters: parameters.length > 0 ? parameters : undefined,
statusCode: request.status,
headers: request.headers,
});
} catch {
// Skip invalid URLs
}
}
// Detect authentication
const authInfo = this.detectAuthentication(requests);
return {
endpoints: [...new Map(endpoints.map((e) => [e.fullUrl, e])).values()], // Remove duplicates
baseUrl: new URL(url).origin,
authentication: authInfo,
};
} finally {
await page.close();
}
}
/**
* Analyze network requests
*/
async analyzeNetworkRequests(url: string, options?: { timeout?: number }): Promise<NetworkRequest[]> {
if (!Validators.isValidUrl(url)) {
throw new Error('Invalid URL');
}
const browser = await this.getBrowser();
const page = await browser.newPage();
const requests: NetworkRequest[] = [];
const startTime = Date.now();
try {
page.on('request', (request) => {
requests.push({
url: request.url(),
method: request.method(),
status: 0,
statusText: '',
headers: request.headers(),
requestHeaders: request.headers(),
});
});
page.on('response', (response) => {
const request = requests.find((r) => r.url === response.url());
if (request) {
request.status = response.status();
request.statusText = response.statusText();
request.type = response.headers()['content-type'] || '';
request.responseTime = Date.now() - startTime;
}
});
await page.goto(url, {
waitUntil: 'networkidle',
timeout: options?.timeout || 30000,
});
await page.waitForTimeout(2000);
return requests;
} finally {
await page.close();
}
}
/**
* Extract API schema from responses
*/
async extractAPISchema(url: string, _endpoint?: string): Promise<Record<string, unknown>> {
// This is a simplified version. In production, would analyze actual API responses
const result = await this.discoverAPIEndpoints(url);
const schema: Record<string, unknown> = {
baseUrl: result.baseUrl,
endpoints: result.endpoints.map((e) => ({
method: e.method,
path: e.path,
parameters: e.parameters,
})),
};
return schema;
}
/**
* Test API endpoint
*/
async testAPIEndpoint(
url: string,
options?: { method?: string; headers?: Record<string, string>; body?: unknown }
): Promise<{
status: number;
statusText: string;
headers: Record<string, string>;
body?: unknown;
responseTime: number;
}> {
const axios = (await import('axios')).default;
const startTime = Date.now();
try {
const response = await axios({
url,
method: (options?.method as any) || 'GET',
headers: options?.headers,
data: options?.body,
validateStatus: () => true,
});
return {
status: response.status,
statusText: response.statusText,
headers: response.headers as Record<string, string>,
body: response.data,
responseTime: Date.now() - startTime,
};
} catch (error) {
throw new Error(`Failed to test endpoint: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Detect authentication from requests
*/
private detectAuthentication(requests: NetworkRequest[]): APIDiscoveryResult['authentication'] {
for (const request of requests) {
const headers = request.headers || {};
// Check for Bearer token
if (headers.authorization && headers.authorization.startsWith('Bearer ')) {
return {
type: 'bearer',
location: 'header',
name: 'Authorization',
};
}
// Check for Basic auth
if (headers.authorization && headers.authorization.startsWith('Basic ')) {
return {
type: 'basic',
location: 'header',
name: 'Authorization',
};
}
// Check for API key in headers
const apiKeyHeaders = Object.keys(headers).filter((key) =>
key.toLowerCase().includes('api') && key.toLowerCase().includes('key')
);
if (apiKeyHeaders.length > 0) {
return {
type: 'api_key',
location: 'header',
name: apiKeyHeaders[0],
};
}
}
return {
type: 'none',
};
}
}