/** * Site Crawler Utility * Crawls the site discovering links and capturing errors */ import { Page, BrowserContext } from '@playwright/test'; export interface CrawlError { url: string; type: 'console' | 'network' | 'broken-link' | 'page-error'; message: string; details?: string; timestamp: Date; } export interface CrawlResult { url: string; status: 'success' | 'error' | 'skipped'; title?: string; errors: CrawlError[]; linksFound: string[]; duration: number; } export interface CrawlReport { startTime: Date; endTime: Date; totalPages: number; totalErrors: number; results: CrawlResult[]; summary: { consoleErrors: number; networkErrors: number; brokenLinks: number; pageErrors: number; }; } export interface CrawlerOptions { maxPages?: number; timeout?: number; excludePatterns?: RegExp[]; includeExternalLinks?: boolean; waitForNetworkIdle?: boolean; screenshotOnError?: boolean; screenshotDir?: string; verbose?: boolean; } const DEFAULT_OPTIONS: CrawlerOptions = { maxPages: 0, // 0 = unlimited timeout: 30000, excludePatterns: [ /\.(pdf|zip|tar|gz|exe|dmg|pkg)$/i, /^mailto:/i, /^tel:/i, /^javascript:/i, /logout/i, /sign-out/i, ], includeExternalLinks: false, waitForNetworkIdle: true, screenshotOnError: false, screenshotDir: 'test-results/crawler-screenshots', verbose: false, }; export class SiteCrawler { private page: Page; private context: BrowserContext; private options: CrawlerOptions; private visited: Set = new Set(); private queue: string[] = []; private results: CrawlResult[] = []; private baseUrl: string = ''; private baseDomain: string = ''; constructor(page: Page, context: BrowserContext, options: Partial = {}) { this.page = page; this.context = context; this.options = { ...DEFAULT_OPTIONS, ...options }; } private log(message: string, ...args: unknown[]) { if (this.options.verbose) { console.log(`[Crawler] ${message}`, ...args); } } private normalizeUrl(url: string): string { try { const parsed = new URL(url, this.baseUrl); // Remove hash and trailing slash for comparison parsed.hash = ''; let normalized = parsed.href; if (normalized.endsWith('/') && normalized !== parsed.origin + '/') { normalized = normalized.slice(0, -1); } return normalized; } catch { return url; } } private isInternalUrl(url: string): boolean { try { const parsed = new URL(url, this.baseUrl); // Check if it's on the same domain or a subdomain of lvh.me return parsed.hostname.endsWith('lvh.me') || parsed.hostname === 'localhost' || parsed.hostname === '127.0.0.1'; } catch { return false; } } private shouldCrawl(url: string): boolean { // Skip if already visited if (this.visited.has(url)) { return false; } // Skip if matches exclude patterns for (const pattern of this.options.excludePatterns || []) { if (pattern.test(url)) { this.log(`Skipping excluded URL: ${url}`); return false; } } // Skip external links unless explicitly included if (!this.options.includeExternalLinks && !this.isInternalUrl(url)) { this.log(`Skipping external URL: ${url}`); return false; } return true; } private async extractLinks(): Promise { const links = await this.page.evaluate(() => { const anchors = document.querySelectorAll('a[href]'); const hrefs: string[] = []; anchors.forEach(anchor => { const href = anchor.getAttribute('href'); if (href) { hrefs.push(href); } }); return hrefs; }); // Normalize and filter links const normalizedLinks: string[] = []; for (const link of links) { try { const normalized = this.normalizeUrl(link); if (this.shouldCrawl(normalized)) { normalizedLinks.push(normalized); } } catch { // Invalid URL, skip } } return [...new Set(normalizedLinks)]; } private async crawlPage(url: string): Promise { const startTime = Date.now(); const errors: CrawlError[] = []; const linksFound: string[] = []; this.log(`Crawling: ${url}`); // Set up error listeners const consoleHandler = (msg: { type: () => string; text: () => string; location: () => { url: string; lineNumber: number } }) => { const type = msg.type(); if (type === 'error' || type === 'warning') { const text = msg.text(); // Filter out non-critical warnings if (text.includes('width(-1) and height(-1) of chart')) return; // Recharts initial render warning if (text.includes('WebSocket')) return; // WebSocket connection issues in dev if (text.includes('Cross-Origin-Opener-Policy')) return; // COOP header in dev environment if (text.includes('must use HTTPS') && text.includes('Stripe')) return; // Stripe dev mode warning // Show all backend errors (403, 404, 500) for debugging errors.push({ url, type: 'console', message: text, details: `${type.toUpperCase()} at ${msg.location().url}:${msg.location().lineNumber}`, timestamp: new Date(), }); } }; const pageErrorHandler = (error: Error) => { errors.push({ url, type: 'page-error', message: error.message, details: error.stack, timestamp: new Date(), }); }; const requestFailedHandler = (request: { url: () => string; failure: () => { errorText: string } | null }) => { const failedUrl = request.url(); // Ignore some common non-critical failures if (failedUrl.includes('favicon.ico') || failedUrl.includes('hot-update')) { return; } // Ignore Stripe external requests (tracking/monitoring that gets cancelled) if (failedUrl.includes('stripe.com') || failedUrl.includes('stripe.network')) { return; } // Show all API failures for debugging errors.push({ url, type: 'network', message: `Request failed: ${failedUrl}`, details: request.failure()?.errorText || 'Unknown error', timestamp: new Date(), }); }; const responseHandler = (response: { url: () => string; status: () => number }) => { const status = response.status(); const responseUrl = response.url(); // Track 4xx and 5xx responses (excluding some common benign ones) if (status >= 400 && !responseUrl.includes('favicon.ico')) { // Show all API errors (403, 404, 500) for debugging errors.push({ url, type: 'network', message: `HTTP ${status}: ${responseUrl}`, details: `Response status ${status}`, timestamp: new Date(), }); } }; this.page.on('console', consoleHandler); this.page.on('pageerror', pageErrorHandler); this.page.on('requestfailed', requestFailedHandler); this.page.on('response', responseHandler); try { // Navigate to the page const response = await this.page.goto(url, { timeout: this.options.timeout, waitUntil: this.options.waitForNetworkIdle ? 'networkidle' : 'domcontentloaded', }); if (!response) { errors.push({ url, type: 'network', message: 'No response received', timestamp: new Date(), }); return { url, status: 'error', errors, linksFound, duration: Date.now() - startTime, }; } const status = response.status(); if (status >= 400) { errors.push({ url, type: 'broken-link', message: `HTTP ${status}`, timestamp: new Date(), }); } // Wait a bit for React to render and any async operations await this.page.waitForTimeout(500); // Get page title const title = await this.page.title(); // Extract links const links = await this.extractLinks(); linksFound.push(...links); // Add new links to queue for (const link of links) { if (!this.visited.has(link) && !this.queue.includes(link)) { this.queue.push(link); } } // Screenshot on error if enabled if (errors.length > 0 && this.options.screenshotOnError) { const filename = url.replace(/[^a-zA-Z0-9]/g, '_').substring(0, 100); await this.page.screenshot({ path: `${this.options.screenshotDir}/${filename}.png`, fullPage: true, }); } return { url, status: errors.length > 0 ? 'error' : 'success', title, errors, linksFound, duration: Date.now() - startTime, }; } catch (error) { errors.push({ url, type: 'page-error', message: error instanceof Error ? error.message : String(error), timestamp: new Date(), }); return { url, status: 'error', errors, linksFound, duration: Date.now() - startTime, }; } finally { // Remove listeners this.page.off('console', consoleHandler); this.page.off('pageerror', pageErrorHandler); this.page.off('requestfailed', requestFailedHandler); this.page.off('response', responseHandler); } } async crawl(startUrl: string): Promise { const startTime = new Date(); this.baseUrl = startUrl; this.baseDomain = new URL(startUrl).hostname; this.queue = [this.normalizeUrl(startUrl)]; this.visited.clear(); this.results = []; console.log(`\nšŸ•·ļø Starting crawl from: ${startUrl}`); console.log(` Max pages: ${this.options.maxPages || 'unlimited'}`); console.log(''); const maxPages = this.options.maxPages || 0; // 0 = unlimited while (this.queue.length > 0 && (maxPages === 0 || this.results.length < maxPages)) { const url = this.queue.shift()!; if (this.visited.has(url)) { continue; } this.visited.add(url); const result = await this.crawlPage(url); this.results.push(result); // Progress indicator const errorCount = result.errors.length; const statusIcon = result.status === 'success' ? 'āœ“' : 'āœ—'; const errorInfo = errorCount > 0 ? ` (${errorCount} error${errorCount > 1 ? 's' : ''})` : ''; const maxDisplay = maxPages === 0 ? 'āˆž' : maxPages; console.log(` ${statusIcon} [${this.results.length}/${maxDisplay}] ${url}${errorInfo}`); } const endTime = new Date(); // Calculate summary const summary = { consoleErrors: 0, networkErrors: 0, brokenLinks: 0, pageErrors: 0, }; for (const result of this.results) { for (const error of result.errors) { switch (error.type) { case 'console': summary.consoleErrors++; break; case 'network': summary.networkErrors++; break; case 'broken-link': summary.brokenLinks++; break; case 'page-error': summary.pageErrors++; break; } } } return { startTime, endTime, totalPages: this.results.length, totalErrors: summary.consoleErrors + summary.networkErrors + summary.brokenLinks + summary.pageErrors, results: this.results, summary, }; } } export function formatReport(report: CrawlReport): string { const duration = (report.endTime.getTime() - report.startTime.getTime()) / 1000; let output = '\n'; output += '═'.repeat(60) + '\n'; output += ' CRAWL REPORT\n'; output += '═'.repeat(60) + '\n\n'; output += `šŸ“Š Summary\n`; output += ` Pages crawled: ${report.totalPages}\n`; output += ` Total errors: ${report.totalErrors}\n`; output += ` Duration: ${duration.toFixed(1)}s\n\n`; output += `šŸ“‹ Error Breakdown\n`; output += ` Console errors: ${report.summary.consoleErrors}\n`; output += ` Network errors: ${report.summary.networkErrors}\n`; output += ` Broken links: ${report.summary.brokenLinks}\n`; output += ` Page errors: ${report.summary.pageErrors}\n\n`; // List pages with errors const pagesWithErrors = report.results.filter(r => r.errors.length > 0); if (pagesWithErrors.length > 0) { output += '─'.repeat(60) + '\n'; output += ' ERROR DETAILS\n'; output += '─'.repeat(60) + '\n\n'; for (const result of pagesWithErrors) { output += `šŸ”— ${result.url}\n`; output += ` Title: ${result.title || 'N/A'}\n`; for (const error of result.errors) { const icon = error.type === 'console' ? 'āš ļø' : error.type === 'network' ? '🌐' : error.type === 'broken-link' ? 'šŸ”“' : 'šŸ’„'; output += ` ${icon} [${error.type.toUpperCase()}] ${error.message}\n`; if (error.details) { output += ` Details: ${error.details.substring(0, 200)}\n`; } } output += '\n'; } } else { output += 'āœ… No errors found!\n\n'; } output += '═'.repeat(60) + '\n'; return output; } // Authentication helpers for different user types export interface UserCredentials { username: string; password: string; loginUrl: string; description: string; } export const TEST_USERS: Record = { platformSuperuser: { username: 'poduck@gmail.com', password: 'starry12', loginUrl: 'http://platform.lvh.me:5173/platform/login', description: 'Platform Superuser', }, businessOwner: { username: 'owner@demo.com', password: 'password123', loginUrl: 'http://demo.lvh.me:5173/login', description: 'Business Owner', }, businessManager: { username: 'manager@demo.com', password: 'password123', loginUrl: 'http://demo.lvh.me:5173/login', description: 'Business Manager', }, businessStaff: { username: 'staff@demo.com', password: 'password123', loginUrl: 'http://demo.lvh.me:5173/login', description: 'Staff Member', }, customer: { username: 'customer@demo.com', password: 'password123', loginUrl: 'http://demo.lvh.me:5173/login', description: 'Customer', }, }; export async function loginAsUser(page: Page, user: UserCredentials): Promise { console.log(`\nšŸ” Logging in as ${user.description}...`); console.log(` Login URL: ${user.loginUrl}`); try { await page.goto(user.loginUrl); await page.waitForLoadState('networkidle'); await page.waitForTimeout(1000); // Check if already logged in (dashboard visible) const isDashboard = page.url().includes('/dashboard') || await page.getByRole('heading', { name: /dashboard/i }).isVisible().catch(() => false); if (isDashboard) { console.log(` āœ“ Already logged in. Current URL: ${page.url()}`); return true; } // Try quick login buttons first (dev mode) const quickLoginButton = page.getByRole('button', { name: new RegExp(user.description, 'i') }); const hasQuickLogin = await quickLoginButton.isVisible().catch(() => false); if (hasQuickLogin) { console.log(` Using quick login button for ${user.description}...`); await quickLoginButton.click(); } else { // Fall back to form login let emailInput = page.locator('#email'); let passwordInput = page.locator('#password'); const formFound = await emailInput.waitFor({ timeout: 10000 }).then(() => true).catch(() => false); if (!formFound) { emailInput = page.getByPlaceholder(/enter your email/i); passwordInput = page.getByPlaceholder(/password/i); await emailInput.waitFor({ timeout: 5000 }); } await emailInput.fill(user.username); await passwordInput.fill(user.password); await page.getByRole('button', { name: /^sign in$/i }).click(); } // Wait for navigation after login await page.waitForLoadState('networkidle'); await page.waitForTimeout(2000); // Verify we're logged in (not on login page anymore) const currentUrl = page.url(); const isLoggedIn = !currentUrl.includes('/login') && !currentUrl.endsWith(':5173/') && !currentUrl.endsWith(':5173'); if (isLoggedIn) { console.log(` āœ“ Logged in successfully. Current URL: ${currentUrl}`); } else { console.log(` āœ— Login may have failed. Current URL: ${currentUrl}`); } return isLoggedIn; } catch (error) { console.error(` āœ— Login failed:`, error); return false; } }