#!/usr/bin/env tsx /** * Site Crawler Script * * Crawls the SmoothSchedule site looking for: * - Broken links * - Console errors * - Network failures * - Page errors * * Usage: * npx ts-node scripts/crawl-site.ts [options] * * Options: * --public Crawl public marketing site only * --platform Crawl platform dashboard (requires login) * --tenant Crawl tenant dashboard (requires login) * --all Crawl all areas (default) * --max-pages=N Maximum pages to crawl per area (default: 50) * --verbose Show detailed logging * --screenshots Save screenshots on errors * * Examples: * npx ts-node scripts/crawl-site.ts --public * npx ts-node scripts/crawl-site.ts --all --max-pages=100 * npx ts-node scripts/crawl-site.ts --platform --verbose */ import { chromium, Browser, BrowserContext, Page } from 'playwright'; import { SiteCrawler, CrawlerOptions, CrawlReport, formatReport, loginAsUser, TEST_USERS, UserCredentials, } from '../tests/e2e/utils/crawler'; import * as fs from 'fs'; import * as path from 'path'; interface CrawlArea { name: string; startUrl: string; requiresAuth: boolean; user?: UserCredentials; } const CRAWL_AREAS: Record = { public: { name: 'Public Site', startUrl: 'http://lvh.me:5173', requiresAuth: false, }, platform: { name: 'Platform Dashboard', startUrl: 'http://platform.lvh.me:5173/platform/login', requiresAuth: true, user: TEST_USERS.platformSuperuser, }, tenant: { name: 'Tenant Dashboard (Owner)', startUrl: 'http://demo.lvh.me:5173/login', requiresAuth: true, user: TEST_USERS.businessOwner, }, tenantStaff: { name: 'Tenant Dashboard (Staff)', startUrl: 'http://demo.lvh.me:5173/login', requiresAuth: true, user: TEST_USERS.businessStaff, }, customer: { name: 'Customer Booking Portal (Public)', startUrl: 'http://demo.lvh.me:5173/book', requiresAuth: false, // Public booking page }, customerPortal: { name: 'Customer Portal (Logged In)', startUrl: 'http://demo.lvh.me:5173/login', requiresAuth: true, user: TEST_USERS.customer, }, }; function parseArgs(): { areas: string[]; maxPages: number; verbose: boolean; screenshots: boolean; } { const args = process.argv.slice(2); const result = { areas: [] as string[], maxPages: 0, // 0 = unlimited verbose: false, screenshots: false, }; for (const arg of args) { if (arg === '--public') result.areas.push('public'); else if (arg === '--platform') result.areas.push('platform'); else if (arg === '--tenant') result.areas.push('tenant'); else if (arg === '--tenant-staff') result.areas.push('tenantStaff'); else if (arg === '--customer') result.areas.push('customer'); else if (arg === '--customer-portal') result.areas.push('customerPortal'); else if (arg === '--all') result.areas = Object.keys(CRAWL_AREAS); else if (arg.startsWith('--max-pages=')) result.maxPages = parseInt(arg.split('=')[1], 10); else if (arg === '--verbose') result.verbose = true; else if (arg === '--screenshots') result.screenshots = true; else if (arg === '--help' || arg === '-h') { console.log(` Site Crawler - Find broken links and errors Usage: npm run crawl -- [options] Options: --public Crawl public marketing site only --platform Crawl platform dashboard (requires login) --tenant Crawl tenant dashboard as Business Owner --tenant-staff Crawl tenant dashboard as Staff Member --customer Crawl customer booking portal (public) --all Crawl all areas --max-pages=N Maximum pages to crawl per area (default: 50) --verbose Show detailed logging --screenshots Save screenshots on errors Examples: npm run crawl -- --public npm run crawl -- --tenant --max-pages=50 npm run crawl -- --all --verbose `); process.exit(0); } } // Default to public only if no areas specified if (result.areas.length === 0) { result.areas = ['public']; } // Filter out areas that don't exist in CRAWL_AREAS result.areas = result.areas.filter(area => CRAWL_AREAS[area]); return result; } async function crawlArea( browser: Browser, area: CrawlArea, options: CrawlerOptions ): Promise { console.log(`\n${'═'.repeat(60)}`); console.log(` Crawling: ${area.name}`); console.log(` URL: ${area.startUrl}`); console.log(`${'═'.repeat(60)}`); const context = await browser.newContext({ viewport: { width: 1280, height: 720 }, ignoreHTTPSErrors: true, }); const page = await context.newPage(); try { // Login if required let crawlStartUrl = area.startUrl; if (area.requiresAuth && area.user) { const loggedIn = await loginAsUser(page, area.user); if (!loggedIn) { console.error(`Failed to login for ${area.name}. Skipping.`); return { startTime: new Date(), endTime: new Date(), totalPages: 0, totalErrors: 1, results: [], summary: { consoleErrors: 0, networkErrors: 0, brokenLinks: 0, pageErrors: 1, }, }; } // After login, start from the current URL (typically dashboard) crawlStartUrl = page.url(); } // Create crawler and run const crawler = new SiteCrawler(page, context, options); const report = await crawler.crawl(crawlStartUrl); return report; } finally { await context.close(); } } function saveReport(reports: Map): string { // Ensure output directory exists const outputDir = path.join(process.cwd(), 'test-results'); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const filename = path.join(outputDir, `crawl-report-${timestamp}.json`); const reportData = { timestamp: new Date().toISOString(), areas: Object.fromEntries(reports), }; fs.writeFileSync(filename, JSON.stringify(reportData, null, 2)); return filename; } async function main() { const { areas, maxPages, verbose, screenshots } = parseArgs(); console.log('\nšŸ•·ļø SmoothSchedule Site Crawler'); console.log('─'.repeat(40)); console.log(`Areas to crawl: ${areas.join(', ')}`); console.log(`Max pages per area: ${maxPages}`); console.log(`Verbose: ${verbose}`); console.log(`Screenshots: ${screenshots}`); const browser = await chromium.launch({ headless: true, }); const options: CrawlerOptions = { maxPages, verbose, screenshotOnError: screenshots, screenshotDir: 'test-results/crawler-screenshots', timeout: 30000, waitForNetworkIdle: true, }; // Ensure screenshot directory exists if enabled if (screenshots && !fs.existsSync(options.screenshotDir!)) { fs.mkdirSync(options.screenshotDir!, { recursive: true }); } const reports = new Map(); let totalErrors = 0; try { for (const areaKey of areas) { const area = CRAWL_AREAS[areaKey]; if (!area) continue; const report = await crawlArea(browser, area, options); reports.set(areaKey, report); totalErrors += report.totalErrors; // Print report for this area console.log(formatReport(report)); } // Save combined report const reportFile = saveReport(reports); console.log(`\nšŸ“„ Full report saved to: ${reportFile}`); // Final summary console.log('\n' + '═'.repeat(60)); console.log(' FINAL SUMMARY'); console.log('═'.repeat(60)); let totalPages = 0; for (const [areaKey, report] of reports) { const area = CRAWL_AREAS[areaKey]; const icon = report.totalErrors === 0 ? 'āœ…' : 'āŒ'; console.log(`${icon} ${area.name}: ${report.totalPages} pages, ${report.totalErrors} errors`); totalPages += report.totalPages; } console.log('─'.repeat(60)); console.log(` Total: ${totalPages} pages crawled, ${totalErrors} errors found`); console.log('═'.repeat(60) + '\n'); // Exit with error code if errors found process.exit(totalErrors > 0 ? 1 : 0); } finally { await browser.close(); } } main().catch(error => { console.error('Crawler failed:', error); process.exit(1); });