Files
smoothschedule/frontend/tests/e2e/utils/crawler.ts
poduck 94e37a2522 Add Site Builder help docs and fix FloatingHelpButton paths
- Add HelpSiteBuilder.tsx with comprehensive documentation for the
  drag-and-drop page editor (components, publishing, settings)
- Fix FloatingHelpButton to use /dashboard/help/* paths on tenant sites
- Update HelpComprehensive and HelpAutomations to rename plugins to automations
- Add site-crawler utility with cross-subdomain redirect detection

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-16 22:42:46 -05:00

614 lines
18 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Site Crawler Utility
* Crawls the site discovering links and capturing errors
*/
import { Page, BrowserContext } from '@playwright/test';
export interface CrawlError {
url: string;
type: 'console' | 'network' | 'broken-link' | 'page-error' | 'redirect';
message: string;
details?: string;
timestamp: Date;
}
export interface CrawlResult {
url: string;
status: 'success' | 'error' | 'skipped';
title?: string;
errors: CrawlError[];
linksFound: string[];
duration: number;
}
export interface CrawlReport {
startTime: Date;
endTime: Date;
totalPages: number;
totalErrors: number;
results: CrawlResult[];
summary: {
consoleErrors: number;
networkErrors: number;
brokenLinks: number;
pageErrors: number;
redirects: number;
};
}
export interface CrawlerOptions {
maxPages?: number;
timeout?: number;
excludePatterns?: RegExp[];
includeExternalLinks?: boolean;
waitForNetworkIdle?: boolean;
screenshotOnError?: boolean;
screenshotDir?: string;
verbose?: boolean;
/** Detect when page redirects to a different subdomain (e.g., tenant to public) */
detectCrossSubdomainRedirects?: boolean;
}
const DEFAULT_OPTIONS: CrawlerOptions = {
maxPages: 0, // 0 = unlimited
timeout: 30000,
excludePatterns: [
/\.(pdf|zip|tar|gz|exe|dmg|pkg)$/i,
/^mailto:/i,
/^tel:/i,
/^javascript:/i,
/logout/i,
/sign-out/i,
],
includeExternalLinks: false,
waitForNetworkIdle: true,
screenshotOnError: false,
screenshotDir: 'test-results/crawler-screenshots',
verbose: false,
detectCrossSubdomainRedirects: false,
};
export class SiteCrawler {
private page: Page;
private context: BrowserContext;
private options: CrawlerOptions;
private visited: Set<string> = new Set();
private queue: string[] = [];
private results: CrawlResult[] = [];
private baseUrl: string = '';
private baseDomain: string = '';
constructor(page: Page, context: BrowserContext, options: Partial<CrawlerOptions> = {}) {
this.page = page;
this.context = context;
this.options = { ...DEFAULT_OPTIONS, ...options };
}
private log(message: string, ...args: unknown[]) {
if (this.options.verbose) {
console.log(`[Crawler] ${message}`, ...args);
}
}
private normalizeUrl(url: string): string {
try {
const parsed = new URL(url, this.baseUrl);
// Remove hash and trailing slash for comparison
parsed.hash = '';
let normalized = parsed.href;
if (normalized.endsWith('/') && normalized !== parsed.origin + '/') {
normalized = normalized.slice(0, -1);
}
return normalized;
} catch {
return url;
}
}
private isInternalUrl(url: string): boolean {
try {
const parsed = new URL(url, this.baseUrl);
// Check if it's on the same domain or a subdomain of lvh.me
return parsed.hostname.endsWith('lvh.me') ||
parsed.hostname === 'localhost' ||
parsed.hostname === '127.0.0.1';
} catch {
return false;
}
}
private getSubdomain(url: string): string | null {
try {
const parsed = new URL(url);
const hostname = parsed.hostname;
// Extract subdomain from *.lvh.me
if (hostname.endsWith('.lvh.me')) {
const parts = hostname.split('.');
if (parts.length >= 3) {
return parts[0];
}
}
// No subdomain (e.g., lvh.me itself)
return null;
} catch {
return null;
}
}
private shouldCrawl(url: string): boolean {
// Skip if already visited
if (this.visited.has(url)) {
return false;
}
// Skip if matches exclude patterns
for (const pattern of this.options.excludePatterns || []) {
if (pattern.test(url)) {
this.log(`Skipping excluded URL: ${url}`);
return false;
}
}
// Skip external links unless explicitly included
if (!this.options.includeExternalLinks && !this.isInternalUrl(url)) {
this.log(`Skipping external URL: ${url}`);
return false;
}
return true;
}
private async extractLinks(): Promise<string[]> {
const links = await this.page.evaluate(() => {
const anchors = document.querySelectorAll('a[href]');
const hrefs: string[] = [];
anchors.forEach(anchor => {
const href = anchor.getAttribute('href');
if (href) {
hrefs.push(href);
}
});
return hrefs;
});
// Normalize and filter links
const normalizedLinks: string[] = [];
for (const link of links) {
try {
const normalized = this.normalizeUrl(link);
if (this.shouldCrawl(normalized)) {
normalizedLinks.push(normalized);
}
} catch {
// Invalid URL, skip
}
}
return [...new Set(normalizedLinks)];
}
private async crawlPage(url: string): Promise<CrawlResult> {
const startTime = Date.now();
const errors: CrawlError[] = [];
const linksFound: string[] = [];
this.log(`Crawling: ${url}`);
// Set up error listeners
const consoleHandler = (msg: { type: () => string; text: () => string; location: () => { url: string; lineNumber: number } }) => {
const type = msg.type();
if (type === 'error' || type === 'warning') {
const text = msg.text();
// Filter out non-critical warnings
if (text.includes('width(-1) and height(-1) of chart')) return; // Recharts initial render warning
if (text.includes('WebSocket')) return; // WebSocket connection issues in dev
if (text.includes('Cross-Origin-Opener-Policy')) return; // COOP header in dev environment
if (text.includes('must use HTTPS') && text.includes('Stripe')) return; // Stripe dev mode warning
// Show all backend errors (403, 404, 500) for debugging
errors.push({
url,
type: 'console',
message: text,
details: `${type.toUpperCase()} at ${msg.location().url}:${msg.location().lineNumber}`,
timestamp: new Date(),
});
}
};
const pageErrorHandler = (error: Error) => {
errors.push({
url,
type: 'page-error',
message: error.message,
details: error.stack,
timestamp: new Date(),
});
};
const requestFailedHandler = (request: { url: () => string; failure: () => { errorText: string } | null }) => {
const failedUrl = request.url();
// Ignore some common non-critical failures
if (failedUrl.includes('favicon.ico') || failedUrl.includes('hot-update')) {
return;
}
// Ignore Stripe external requests (tracking/monitoring that gets cancelled)
if (failedUrl.includes('stripe.com') || failedUrl.includes('stripe.network')) {
return;
}
// Show all API failures for debugging
errors.push({
url,
type: 'network',
message: `Request failed: ${failedUrl}`,
details: request.failure()?.errorText || 'Unknown error',
timestamp: new Date(),
});
};
const responseHandler = (response: { url: () => string; status: () => number }) => {
const status = response.status();
const responseUrl = response.url();
// Track 4xx and 5xx responses (excluding some common benign ones)
if (status >= 400 && !responseUrl.includes('favicon.ico')) {
// Show all API errors (403, 404, 500) for debugging
errors.push({
url,
type: 'network',
message: `HTTP ${status}: ${responseUrl}`,
details: `Response status ${status}`,
timestamp: new Date(),
});
}
};
this.page.on('console', consoleHandler);
this.page.on('pageerror', pageErrorHandler);
this.page.on('requestfailed', requestFailedHandler);
this.page.on('response', responseHandler);
try {
// Navigate to the page
const response = await this.page.goto(url, {
timeout: this.options.timeout,
waitUntil: this.options.waitForNetworkIdle ? 'networkidle' : 'domcontentloaded',
});
if (!response) {
errors.push({
url,
type: 'network',
message: 'No response received',
timestamp: new Date(),
});
return {
url,
status: 'error',
errors,
linksFound,
duration: Date.now() - startTime,
};
}
const status = response.status();
if (status >= 400) {
errors.push({
url,
type: 'broken-link',
message: `HTTP ${status}`,
timestamp: new Date(),
});
}
// Check for cross-subdomain redirects
if (this.options.detectCrossSubdomainRedirects) {
const finalUrl = this.page.url();
const requestedSubdomain = this.getSubdomain(url);
const finalSubdomain = this.getSubdomain(finalUrl);
if (requestedSubdomain && requestedSubdomain !== finalSubdomain) {
errors.push({
url,
type: 'redirect',
message: `Redirected from ${requestedSubdomain}.lvh.me to ${finalSubdomain || 'root'}.lvh.me`,
details: `Final URL: ${finalUrl}`,
timestamp: new Date(),
});
}
}
// Wait a bit for React to render and any async operations
await this.page.waitForTimeout(500);
// Get page title
const title = await this.page.title();
// Extract links
const links = await this.extractLinks();
linksFound.push(...links);
// Add new links to queue
for (const link of links) {
if (!this.visited.has(link) && !this.queue.includes(link)) {
this.queue.push(link);
}
}
// Screenshot on error if enabled
if (errors.length > 0 && this.options.screenshotOnError) {
const filename = url.replace(/[^a-zA-Z0-9]/g, '_').substring(0, 100);
await this.page.screenshot({
path: `${this.options.screenshotDir}/${filename}.png`,
fullPage: true,
});
}
return {
url,
status: errors.length > 0 ? 'error' : 'success',
title,
errors,
linksFound,
duration: Date.now() - startTime,
};
} catch (error) {
errors.push({
url,
type: 'page-error',
message: error instanceof Error ? error.message : String(error),
timestamp: new Date(),
});
return {
url,
status: 'error',
errors,
linksFound,
duration: Date.now() - startTime,
};
} finally {
// Remove listeners
this.page.off('console', consoleHandler);
this.page.off('pageerror', pageErrorHandler);
this.page.off('requestfailed', requestFailedHandler);
this.page.off('response', responseHandler);
}
}
async crawl(startUrl: string): Promise<CrawlReport> {
const startTime = new Date();
this.baseUrl = startUrl;
this.baseDomain = new URL(startUrl).hostname;
this.queue = [this.normalizeUrl(startUrl)];
this.visited.clear();
this.results = [];
console.log(`\n🕷 Starting crawl from: ${startUrl}`);
console.log(` Max pages: ${this.options.maxPages || 'unlimited'}`);
console.log('');
const maxPages = this.options.maxPages || 0; // 0 = unlimited
while (this.queue.length > 0 && (maxPages === 0 || this.results.length < maxPages)) {
const url = this.queue.shift()!;
if (this.visited.has(url)) {
continue;
}
this.visited.add(url);
const result = await this.crawlPage(url);
this.results.push(result);
// Progress indicator
const errorCount = result.errors.length;
const statusIcon = result.status === 'success' ? '✓' : '✗';
const errorInfo = errorCount > 0 ? ` (${errorCount} error${errorCount > 1 ? 's' : ''})` : '';
const maxDisplay = maxPages === 0 ? '∞' : maxPages;
console.log(` ${statusIcon} [${this.results.length}/${maxDisplay}] ${url}${errorInfo}`);
}
const endTime = new Date();
// Calculate summary
const summary = {
consoleErrors: 0,
networkErrors: 0,
brokenLinks: 0,
pageErrors: 0,
redirects: 0,
};
for (const result of this.results) {
for (const error of result.errors) {
switch (error.type) {
case 'console':
summary.consoleErrors++;
break;
case 'network':
summary.networkErrors++;
break;
case 'broken-link':
summary.brokenLinks++;
break;
case 'page-error':
summary.pageErrors++;
break;
case 'redirect':
summary.redirects++;
break;
}
}
}
return {
startTime,
endTime,
totalPages: this.results.length,
totalErrors: summary.consoleErrors + summary.networkErrors + summary.brokenLinks + summary.pageErrors,
results: this.results,
summary,
};
}
}
export function formatReport(report: CrawlReport): string {
const duration = (report.endTime.getTime() - report.startTime.getTime()) / 1000;
let output = '\n';
output += '═'.repeat(60) + '\n';
output += ' CRAWL REPORT\n';
output += '═'.repeat(60) + '\n\n';
output += `📊 Summary\n`;
output += ` Pages crawled: ${report.totalPages}\n`;
output += ` Total errors: ${report.totalErrors}\n`;
output += ` Duration: ${duration.toFixed(1)}s\n\n`;
output += `📋 Error Breakdown\n`;
output += ` Console errors: ${report.summary.consoleErrors}\n`;
output += ` Network errors: ${report.summary.networkErrors}\n`;
output += ` Broken links: ${report.summary.brokenLinks}\n`;
output += ` Page errors: ${report.summary.pageErrors}\n`;
output += ` Redirects: ${report.summary.redirects}\n\n`;
// List pages with errors
const pagesWithErrors = report.results.filter(r => r.errors.length > 0);
if (pagesWithErrors.length > 0) {
output += '─'.repeat(60) + '\n';
output += ' ERROR DETAILS\n';
output += '─'.repeat(60) + '\n\n';
for (const result of pagesWithErrors) {
output += `🔗 ${result.url}\n`;
output += ` Title: ${result.title || 'N/A'}\n`;
for (const error of result.errors) {
const icon = error.type === 'console' ? '⚠️' :
error.type === 'network' ? '🌐' :
error.type === 'broken-link' ? '🔴' :
error.type === 'redirect' ? '↪️' : '💥';
output += ` ${icon} [${error.type.toUpperCase()}] ${error.message}\n`;
if (error.details) {
output += ` Details: ${error.details.substring(0, 200)}\n`;
}
}
output += '\n';
}
} else {
output += '✅ No errors found!\n\n';
}
output += '═'.repeat(60) + '\n';
return output;
}
// Authentication helpers for different user types
export interface UserCredentials {
username: string;
password: string;
loginUrl: string;
description: string;
}
export const TEST_USERS: Record<string, UserCredentials> = {
platformSuperuser: {
username: 'poduck@gmail.com',
password: 'starry12',
loginUrl: 'http://platform.lvh.me:5173/platform/login',
description: 'Platform Superuser',
},
businessOwner: {
username: 'owner@demo.com',
password: 'password123',
loginUrl: 'http://demo.lvh.me:5173/login',
description: 'Business Owner',
},
businessManager: {
username: 'manager@demo.com',
password: 'password123',
loginUrl: 'http://demo.lvh.me:5173/login',
description: 'Business Manager',
},
businessStaff: {
username: 'staff@demo.com',
password: 'password123',
loginUrl: 'http://demo.lvh.me:5173/login',
description: 'Staff Member',
},
customer: {
username: 'customer@demo.com',
password: 'password123',
loginUrl: 'http://demo.lvh.me:5173/login',
description: 'Customer',
},
};
export async function loginAsUser(page: Page, user: UserCredentials): Promise<boolean> {
console.log(`\n🔐 Logging in as ${user.description}...`);
console.log(` Login URL: ${user.loginUrl}`);
try {
await page.goto(user.loginUrl);
await page.waitForLoadState('networkidle');
await page.waitForTimeout(1000);
// Check if already logged in (dashboard visible)
const isDashboard = page.url().includes('/dashboard') ||
await page.getByRole('heading', { name: /dashboard/i }).isVisible().catch(() => false);
if (isDashboard) {
console.log(` ✓ Already logged in. Current URL: ${page.url()}`);
return true;
}
// Try quick login buttons first (dev mode)
const quickLoginButton = page.getByRole('button', { name: new RegExp(user.description, 'i') });
const hasQuickLogin = await quickLoginButton.isVisible().catch(() => false);
if (hasQuickLogin) {
console.log(` Using quick login button for ${user.description}...`);
await quickLoginButton.click();
} else {
// Fall back to form login
let emailInput = page.locator('#email');
let passwordInput = page.locator('#password');
const formFound = await emailInput.waitFor({ timeout: 10000 }).then(() => true).catch(() => false);
if (!formFound) {
emailInput = page.getByPlaceholder(/enter your email/i);
passwordInput = page.getByPlaceholder(/password/i);
await emailInput.waitFor({ timeout: 5000 });
}
await emailInput.fill(user.username);
await passwordInput.fill(user.password);
await page.getByRole('button', { name: /^sign in$/i }).click();
}
// Wait for navigation after login
await page.waitForLoadState('networkidle');
await page.waitForTimeout(2000);
// Verify we're logged in (not on login page anymore)
const currentUrl = page.url();
const isLoggedIn = !currentUrl.includes('/login') &&
!currentUrl.endsWith(':5173/') &&
!currentUrl.endsWith(':5173');
if (isLoggedIn) {
console.log(` ✓ Logged in successfully. Current URL: ${currentUrl}`);
} else {
console.log(` ✗ Login may have failed. Current URL: ${currentUrl}`);
}
return isLoggedIn;
} catch (error) {
console.error(` ✗ Login failed:`, error);
return false;
}
}