- Add HelpSiteBuilder.tsx with comprehensive documentation for the drag-and-drop page editor (components, publishing, settings) - Fix FloatingHelpButton to use /dashboard/help/* paths on tenant sites - Update HelpComprehensive and HelpAutomations to rename plugins to automations - Add site-crawler utility with cross-subdomain redirect detection 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
614 lines
18 KiB
TypeScript
614 lines
18 KiB
TypeScript
/**
|
||
* Site Crawler Utility
|
||
* Crawls the site discovering links and capturing errors
|
||
*/
|
||
|
||
import { Page, BrowserContext } from '@playwright/test';
|
||
|
||
export interface CrawlError {
|
||
url: string;
|
||
type: 'console' | 'network' | 'broken-link' | 'page-error' | 'redirect';
|
||
message: string;
|
||
details?: string;
|
||
timestamp: Date;
|
||
}
|
||
|
||
export interface CrawlResult {
|
||
url: string;
|
||
status: 'success' | 'error' | 'skipped';
|
||
title?: string;
|
||
errors: CrawlError[];
|
||
linksFound: string[];
|
||
duration: number;
|
||
}
|
||
|
||
export interface CrawlReport {
|
||
startTime: Date;
|
||
endTime: Date;
|
||
totalPages: number;
|
||
totalErrors: number;
|
||
results: CrawlResult[];
|
||
summary: {
|
||
consoleErrors: number;
|
||
networkErrors: number;
|
||
brokenLinks: number;
|
||
pageErrors: number;
|
||
redirects: number;
|
||
};
|
||
}
|
||
|
||
export interface CrawlerOptions {
|
||
maxPages?: number;
|
||
timeout?: number;
|
||
excludePatterns?: RegExp[];
|
||
includeExternalLinks?: boolean;
|
||
waitForNetworkIdle?: boolean;
|
||
screenshotOnError?: boolean;
|
||
screenshotDir?: string;
|
||
verbose?: boolean;
|
||
/** Detect when page redirects to a different subdomain (e.g., tenant to public) */
|
||
detectCrossSubdomainRedirects?: boolean;
|
||
}
|
||
|
||
const DEFAULT_OPTIONS: CrawlerOptions = {
|
||
maxPages: 0, // 0 = unlimited
|
||
timeout: 30000,
|
||
excludePatterns: [
|
||
/\.(pdf|zip|tar|gz|exe|dmg|pkg)$/i,
|
||
/^mailto:/i,
|
||
/^tel:/i,
|
||
/^javascript:/i,
|
||
/logout/i,
|
||
/sign-out/i,
|
||
],
|
||
includeExternalLinks: false,
|
||
waitForNetworkIdle: true,
|
||
screenshotOnError: false,
|
||
screenshotDir: 'test-results/crawler-screenshots',
|
||
verbose: false,
|
||
detectCrossSubdomainRedirects: false,
|
||
};
|
||
|
||
export class SiteCrawler {
|
||
private page: Page;
|
||
private context: BrowserContext;
|
||
private options: CrawlerOptions;
|
||
private visited: Set<string> = new Set();
|
||
private queue: string[] = [];
|
||
private results: CrawlResult[] = [];
|
||
private baseUrl: string = '';
|
||
private baseDomain: string = '';
|
||
|
||
constructor(page: Page, context: BrowserContext, options: Partial<CrawlerOptions> = {}) {
|
||
this.page = page;
|
||
this.context = context;
|
||
this.options = { ...DEFAULT_OPTIONS, ...options };
|
||
}
|
||
|
||
private log(message: string, ...args: unknown[]) {
|
||
if (this.options.verbose) {
|
||
console.log(`[Crawler] ${message}`, ...args);
|
||
}
|
||
}
|
||
|
||
private normalizeUrl(url: string): string {
|
||
try {
|
||
const parsed = new URL(url, this.baseUrl);
|
||
// Remove hash and trailing slash for comparison
|
||
parsed.hash = '';
|
||
let normalized = parsed.href;
|
||
if (normalized.endsWith('/') && normalized !== parsed.origin + '/') {
|
||
normalized = normalized.slice(0, -1);
|
||
}
|
||
return normalized;
|
||
} catch {
|
||
return url;
|
||
}
|
||
}
|
||
|
||
private isInternalUrl(url: string): boolean {
|
||
try {
|
||
const parsed = new URL(url, this.baseUrl);
|
||
// Check if it's on the same domain or a subdomain of lvh.me
|
||
return parsed.hostname.endsWith('lvh.me') ||
|
||
parsed.hostname === 'localhost' ||
|
||
parsed.hostname === '127.0.0.1';
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
private getSubdomain(url: string): string | null {
|
||
try {
|
||
const parsed = new URL(url);
|
||
const hostname = parsed.hostname;
|
||
// Extract subdomain from *.lvh.me
|
||
if (hostname.endsWith('.lvh.me')) {
|
||
const parts = hostname.split('.');
|
||
if (parts.length >= 3) {
|
||
return parts[0];
|
||
}
|
||
}
|
||
// No subdomain (e.g., lvh.me itself)
|
||
return null;
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
private shouldCrawl(url: string): boolean {
|
||
// Skip if already visited
|
||
if (this.visited.has(url)) {
|
||
return false;
|
||
}
|
||
|
||
// Skip if matches exclude patterns
|
||
for (const pattern of this.options.excludePatterns || []) {
|
||
if (pattern.test(url)) {
|
||
this.log(`Skipping excluded URL: ${url}`);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
// Skip external links unless explicitly included
|
||
if (!this.options.includeExternalLinks && !this.isInternalUrl(url)) {
|
||
this.log(`Skipping external URL: ${url}`);
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
private async extractLinks(): Promise<string[]> {
|
||
const links = await this.page.evaluate(() => {
|
||
const anchors = document.querySelectorAll('a[href]');
|
||
const hrefs: string[] = [];
|
||
anchors.forEach(anchor => {
|
||
const href = anchor.getAttribute('href');
|
||
if (href) {
|
||
hrefs.push(href);
|
||
}
|
||
});
|
||
return hrefs;
|
||
});
|
||
|
||
// Normalize and filter links
|
||
const normalizedLinks: string[] = [];
|
||
for (const link of links) {
|
||
try {
|
||
const normalized = this.normalizeUrl(link);
|
||
if (this.shouldCrawl(normalized)) {
|
||
normalizedLinks.push(normalized);
|
||
}
|
||
} catch {
|
||
// Invalid URL, skip
|
||
}
|
||
}
|
||
|
||
return [...new Set(normalizedLinks)];
|
||
}
|
||
|
||
private async crawlPage(url: string): Promise<CrawlResult> {
|
||
const startTime = Date.now();
|
||
const errors: CrawlError[] = [];
|
||
const linksFound: string[] = [];
|
||
|
||
this.log(`Crawling: ${url}`);
|
||
|
||
// Set up error listeners
|
||
const consoleHandler = (msg: { type: () => string; text: () => string; location: () => { url: string; lineNumber: number } }) => {
|
||
const type = msg.type();
|
||
if (type === 'error' || type === 'warning') {
|
||
const text = msg.text();
|
||
// Filter out non-critical warnings
|
||
if (text.includes('width(-1) and height(-1) of chart')) return; // Recharts initial render warning
|
||
if (text.includes('WebSocket')) return; // WebSocket connection issues in dev
|
||
if (text.includes('Cross-Origin-Opener-Policy')) return; // COOP header in dev environment
|
||
if (text.includes('must use HTTPS') && text.includes('Stripe')) return; // Stripe dev mode warning
|
||
// Show all backend errors (403, 404, 500) for debugging
|
||
|
||
errors.push({
|
||
url,
|
||
type: 'console',
|
||
message: text,
|
||
details: `${type.toUpperCase()} at ${msg.location().url}:${msg.location().lineNumber}`,
|
||
timestamp: new Date(),
|
||
});
|
||
}
|
||
};
|
||
|
||
const pageErrorHandler = (error: Error) => {
|
||
errors.push({
|
||
url,
|
||
type: 'page-error',
|
||
message: error.message,
|
||
details: error.stack,
|
||
timestamp: new Date(),
|
||
});
|
||
};
|
||
|
||
const requestFailedHandler = (request: { url: () => string; failure: () => { errorText: string } | null }) => {
|
||
const failedUrl = request.url();
|
||
// Ignore some common non-critical failures
|
||
if (failedUrl.includes('favicon.ico') || failedUrl.includes('hot-update')) {
|
||
return;
|
||
}
|
||
// Ignore Stripe external requests (tracking/monitoring that gets cancelled)
|
||
if (failedUrl.includes('stripe.com') || failedUrl.includes('stripe.network')) {
|
||
return;
|
||
}
|
||
// Show all API failures for debugging
|
||
errors.push({
|
||
url,
|
||
type: 'network',
|
||
message: `Request failed: ${failedUrl}`,
|
||
details: request.failure()?.errorText || 'Unknown error',
|
||
timestamp: new Date(),
|
||
});
|
||
};
|
||
|
||
const responseHandler = (response: { url: () => string; status: () => number }) => {
|
||
const status = response.status();
|
||
const responseUrl = response.url();
|
||
// Track 4xx and 5xx responses (excluding some common benign ones)
|
||
if (status >= 400 && !responseUrl.includes('favicon.ico')) {
|
||
// Show all API errors (403, 404, 500) for debugging
|
||
|
||
errors.push({
|
||
url,
|
||
type: 'network',
|
||
message: `HTTP ${status}: ${responseUrl}`,
|
||
details: `Response status ${status}`,
|
||
timestamp: new Date(),
|
||
});
|
||
}
|
||
};
|
||
|
||
this.page.on('console', consoleHandler);
|
||
this.page.on('pageerror', pageErrorHandler);
|
||
this.page.on('requestfailed', requestFailedHandler);
|
||
this.page.on('response', responseHandler);
|
||
|
||
try {
|
||
// Navigate to the page
|
||
const response = await this.page.goto(url, {
|
||
timeout: this.options.timeout,
|
||
waitUntil: this.options.waitForNetworkIdle ? 'networkidle' : 'domcontentloaded',
|
||
});
|
||
|
||
if (!response) {
|
||
errors.push({
|
||
url,
|
||
type: 'network',
|
||
message: 'No response received',
|
||
timestamp: new Date(),
|
||
});
|
||
return {
|
||
url,
|
||
status: 'error',
|
||
errors,
|
||
linksFound,
|
||
duration: Date.now() - startTime,
|
||
};
|
||
}
|
||
|
||
const status = response.status();
|
||
if (status >= 400) {
|
||
errors.push({
|
||
url,
|
||
type: 'broken-link',
|
||
message: `HTTP ${status}`,
|
||
timestamp: new Date(),
|
||
});
|
||
}
|
||
|
||
// Check for cross-subdomain redirects
|
||
if (this.options.detectCrossSubdomainRedirects) {
|
||
const finalUrl = this.page.url();
|
||
const requestedSubdomain = this.getSubdomain(url);
|
||
const finalSubdomain = this.getSubdomain(finalUrl);
|
||
|
||
if (requestedSubdomain && requestedSubdomain !== finalSubdomain) {
|
||
errors.push({
|
||
url,
|
||
type: 'redirect',
|
||
message: `Redirected from ${requestedSubdomain}.lvh.me to ${finalSubdomain || 'root'}.lvh.me`,
|
||
details: `Final URL: ${finalUrl}`,
|
||
timestamp: new Date(),
|
||
});
|
||
}
|
||
}
|
||
|
||
// Wait a bit for React to render and any async operations
|
||
await this.page.waitForTimeout(500);
|
||
|
||
// Get page title
|
||
const title = await this.page.title();
|
||
|
||
// Extract links
|
||
const links = await this.extractLinks();
|
||
linksFound.push(...links);
|
||
|
||
// Add new links to queue
|
||
for (const link of links) {
|
||
if (!this.visited.has(link) && !this.queue.includes(link)) {
|
||
this.queue.push(link);
|
||
}
|
||
}
|
||
|
||
// Screenshot on error if enabled
|
||
if (errors.length > 0 && this.options.screenshotOnError) {
|
||
const filename = url.replace(/[^a-zA-Z0-9]/g, '_').substring(0, 100);
|
||
await this.page.screenshot({
|
||
path: `${this.options.screenshotDir}/${filename}.png`,
|
||
fullPage: true,
|
||
});
|
||
}
|
||
|
||
return {
|
||
url,
|
||
status: errors.length > 0 ? 'error' : 'success',
|
||
title,
|
||
errors,
|
||
linksFound,
|
||
duration: Date.now() - startTime,
|
||
};
|
||
} catch (error) {
|
||
errors.push({
|
||
url,
|
||
type: 'page-error',
|
||
message: error instanceof Error ? error.message : String(error),
|
||
timestamp: new Date(),
|
||
});
|
||
|
||
return {
|
||
url,
|
||
status: 'error',
|
||
errors,
|
||
linksFound,
|
||
duration: Date.now() - startTime,
|
||
};
|
||
} finally {
|
||
// Remove listeners
|
||
this.page.off('console', consoleHandler);
|
||
this.page.off('pageerror', pageErrorHandler);
|
||
this.page.off('requestfailed', requestFailedHandler);
|
||
this.page.off('response', responseHandler);
|
||
}
|
||
}
|
||
|
||
async crawl(startUrl: string): Promise<CrawlReport> {
|
||
const startTime = new Date();
|
||
this.baseUrl = startUrl;
|
||
this.baseDomain = new URL(startUrl).hostname;
|
||
this.queue = [this.normalizeUrl(startUrl)];
|
||
this.visited.clear();
|
||
this.results = [];
|
||
|
||
console.log(`\n🕷️ Starting crawl from: ${startUrl}`);
|
||
console.log(` Max pages: ${this.options.maxPages || 'unlimited'}`);
|
||
console.log('');
|
||
|
||
const maxPages = this.options.maxPages || 0; // 0 = unlimited
|
||
while (this.queue.length > 0 && (maxPages === 0 || this.results.length < maxPages)) {
|
||
const url = this.queue.shift()!;
|
||
|
||
if (this.visited.has(url)) {
|
||
continue;
|
||
}
|
||
|
||
this.visited.add(url);
|
||
const result = await this.crawlPage(url);
|
||
this.results.push(result);
|
||
|
||
// Progress indicator
|
||
const errorCount = result.errors.length;
|
||
const statusIcon = result.status === 'success' ? '✓' : '✗';
|
||
const errorInfo = errorCount > 0 ? ` (${errorCount} error${errorCount > 1 ? 's' : ''})` : '';
|
||
const maxDisplay = maxPages === 0 ? '∞' : maxPages;
|
||
console.log(` ${statusIcon} [${this.results.length}/${maxDisplay}] ${url}${errorInfo}`);
|
||
}
|
||
|
||
const endTime = new Date();
|
||
|
||
// Calculate summary
|
||
const summary = {
|
||
consoleErrors: 0,
|
||
networkErrors: 0,
|
||
brokenLinks: 0,
|
||
pageErrors: 0,
|
||
redirects: 0,
|
||
};
|
||
|
||
for (const result of this.results) {
|
||
for (const error of result.errors) {
|
||
switch (error.type) {
|
||
case 'console':
|
||
summary.consoleErrors++;
|
||
break;
|
||
case 'network':
|
||
summary.networkErrors++;
|
||
break;
|
||
case 'broken-link':
|
||
summary.brokenLinks++;
|
||
break;
|
||
case 'page-error':
|
||
summary.pageErrors++;
|
||
break;
|
||
case 'redirect':
|
||
summary.redirects++;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
return {
|
||
startTime,
|
||
endTime,
|
||
totalPages: this.results.length,
|
||
totalErrors: summary.consoleErrors + summary.networkErrors + summary.brokenLinks + summary.pageErrors,
|
||
results: this.results,
|
||
summary,
|
||
};
|
||
}
|
||
}
|
||
|
||
export function formatReport(report: CrawlReport): string {
|
||
const duration = (report.endTime.getTime() - report.startTime.getTime()) / 1000;
|
||
|
||
let output = '\n';
|
||
output += '═'.repeat(60) + '\n';
|
||
output += ' CRAWL REPORT\n';
|
||
output += '═'.repeat(60) + '\n\n';
|
||
|
||
output += `📊 Summary\n`;
|
||
output += ` Pages crawled: ${report.totalPages}\n`;
|
||
output += ` Total errors: ${report.totalErrors}\n`;
|
||
output += ` Duration: ${duration.toFixed(1)}s\n\n`;
|
||
|
||
output += `📋 Error Breakdown\n`;
|
||
output += ` Console errors: ${report.summary.consoleErrors}\n`;
|
||
output += ` Network errors: ${report.summary.networkErrors}\n`;
|
||
output += ` Broken links: ${report.summary.brokenLinks}\n`;
|
||
output += ` Page errors: ${report.summary.pageErrors}\n`;
|
||
output += ` Redirects: ${report.summary.redirects}\n\n`;
|
||
|
||
// List pages with errors
|
||
const pagesWithErrors = report.results.filter(r => r.errors.length > 0);
|
||
if (pagesWithErrors.length > 0) {
|
||
output += '─'.repeat(60) + '\n';
|
||
output += ' ERROR DETAILS\n';
|
||
output += '─'.repeat(60) + '\n\n';
|
||
|
||
for (const result of pagesWithErrors) {
|
||
output += `🔗 ${result.url}\n`;
|
||
output += ` Title: ${result.title || 'N/A'}\n`;
|
||
|
||
for (const error of result.errors) {
|
||
const icon = error.type === 'console' ? '⚠️' :
|
||
error.type === 'network' ? '🌐' :
|
||
error.type === 'broken-link' ? '🔴' :
|
||
error.type === 'redirect' ? '↪️' : '💥';
|
||
output += ` ${icon} [${error.type.toUpperCase()}] ${error.message}\n`;
|
||
if (error.details) {
|
||
output += ` Details: ${error.details.substring(0, 200)}\n`;
|
||
}
|
||
}
|
||
output += '\n';
|
||
}
|
||
} else {
|
||
output += '✅ No errors found!\n\n';
|
||
}
|
||
|
||
output += '═'.repeat(60) + '\n';
|
||
|
||
return output;
|
||
}
|
||
|
||
// Authentication helpers for different user types
|
||
export interface UserCredentials {
|
||
username: string;
|
||
password: string;
|
||
loginUrl: string;
|
||
description: string;
|
||
}
|
||
|
||
export const TEST_USERS: Record<string, UserCredentials> = {
|
||
platformSuperuser: {
|
||
username: 'poduck@gmail.com',
|
||
password: 'starry12',
|
||
loginUrl: 'http://platform.lvh.me:5173/platform/login',
|
||
description: 'Platform Superuser',
|
||
},
|
||
businessOwner: {
|
||
username: 'owner@demo.com',
|
||
password: 'password123',
|
||
loginUrl: 'http://demo.lvh.me:5173/login',
|
||
description: 'Business Owner',
|
||
},
|
||
businessManager: {
|
||
username: 'manager@demo.com',
|
||
password: 'password123',
|
||
loginUrl: 'http://demo.lvh.me:5173/login',
|
||
description: 'Business Manager',
|
||
},
|
||
businessStaff: {
|
||
username: 'staff@demo.com',
|
||
password: 'password123',
|
||
loginUrl: 'http://demo.lvh.me:5173/login',
|
||
description: 'Staff Member',
|
||
},
|
||
customer: {
|
||
username: 'customer@demo.com',
|
||
password: 'password123',
|
||
loginUrl: 'http://demo.lvh.me:5173/login',
|
||
description: 'Customer',
|
||
},
|
||
};
|
||
|
||
export async function loginAsUser(page: Page, user: UserCredentials): Promise<boolean> {
|
||
console.log(`\n🔐 Logging in as ${user.description}...`);
|
||
console.log(` Login URL: ${user.loginUrl}`);
|
||
|
||
try {
|
||
await page.goto(user.loginUrl);
|
||
await page.waitForLoadState('networkidle');
|
||
await page.waitForTimeout(1000);
|
||
|
||
// Check if already logged in (dashboard visible)
|
||
const isDashboard = page.url().includes('/dashboard') ||
|
||
await page.getByRole('heading', { name: /dashboard/i }).isVisible().catch(() => false);
|
||
|
||
if (isDashboard) {
|
||
console.log(` ✓ Already logged in. Current URL: ${page.url()}`);
|
||
return true;
|
||
}
|
||
|
||
// Try quick login buttons first (dev mode)
|
||
const quickLoginButton = page.getByRole('button', { name: new RegExp(user.description, 'i') });
|
||
const hasQuickLogin = await quickLoginButton.isVisible().catch(() => false);
|
||
|
||
if (hasQuickLogin) {
|
||
console.log(` Using quick login button for ${user.description}...`);
|
||
await quickLoginButton.click();
|
||
} else {
|
||
// Fall back to form login
|
||
let emailInput = page.locator('#email');
|
||
let passwordInput = page.locator('#password');
|
||
|
||
const formFound = await emailInput.waitFor({ timeout: 10000 }).then(() => true).catch(() => false);
|
||
|
||
if (!formFound) {
|
||
emailInput = page.getByPlaceholder(/enter your email/i);
|
||
passwordInput = page.getByPlaceholder(/password/i);
|
||
await emailInput.waitFor({ timeout: 5000 });
|
||
}
|
||
|
||
await emailInput.fill(user.username);
|
||
await passwordInput.fill(user.password);
|
||
await page.getByRole('button', { name: /^sign in$/i }).click();
|
||
}
|
||
|
||
// Wait for navigation after login
|
||
await page.waitForLoadState('networkidle');
|
||
await page.waitForTimeout(2000);
|
||
|
||
// Verify we're logged in (not on login page anymore)
|
||
const currentUrl = page.url();
|
||
const isLoggedIn = !currentUrl.includes('/login') &&
|
||
!currentUrl.endsWith(':5173/') &&
|
||
!currentUrl.endsWith(':5173');
|
||
|
||
if (isLoggedIn) {
|
||
console.log(` ✓ Logged in successfully. Current URL: ${currentUrl}`);
|
||
} else {
|
||
console.log(` ✗ Login may have failed. Current URL: ${currentUrl}`);
|
||
}
|
||
|
||
return isLoggedIn;
|
||
} catch (error) {
|
||
console.error(` ✗ Login failed:`, error);
|
||
return false;
|
||
}
|
||
}
|