Add Activepieces integration for workflow automation

- Add Activepieces fork with SmoothSchedule custom piece
- Create integrations app with Activepieces service layer
- Add embed token endpoint for iframe integration
- Create Automations page with embedded workflow builder
- Add sidebar visibility fix for embed mode
- Add list inactive customers endpoint to Public API
- Include SmoothSchedule triggers: event created/updated/cancelled
- Include SmoothSchedule actions: create/update/cancel events, list resources/services/customers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
poduck
2025-12-18 22:59:37 -05:00
parent 9848268d34
commit 3aa7199503
16292 changed files with 1284892 additions and 4708 deletions

View File

@@ -0,0 +1,33 @@
{
"extends": [
"../../../../.eslintrc.base.json"
],
"ignorePatterns": [
"!**/*"
],
"overrides": [
{
"files": [
"*.ts",
"*.tsx",
"*.js",
"*.jsx"
],
"rules": {}
},
{
"files": [
"*.ts",
"*.tsx"
],
"rules": {}
},
{
"files": [
"*.js",
"*.jsx"
],
"rules": {}
}
]
}

View File

@@ -0,0 +1,7 @@
# pieces-webscraping-ai
This library was generated with [Nx](https://nx.dev).
## Building
Run `nx build pieces-webscraping-ai` to build the library.

View File

@@ -0,0 +1,10 @@
{
"name": "@activepieces/piece-webscraping-ai",
"version": "0.0.3",
"type": "commonjs",
"main": "./src/index.js",
"types": "./src/index.d.ts",
"dependencies": {
"tslib": "^2.3.0"
}
}

View File

@@ -0,0 +1,65 @@
{
"name": "pieces-webscraping-ai",
"$schema": "../../../../node_modules/nx/schemas/project-schema.json",
"sourceRoot": "packages/pieces/community/webscraping-ai/src",
"projectType": "library",
"release": {
"version": {
"manifestRootsToUpdate": [
"dist/{projectRoot}"
],
"currentVersionResolver": "git-tag",
"fallbackCurrentVersionResolver": "disk"
}
},
"tags": [],
"targets": {
"build": {
"executor": "@nx/js:tsc",
"outputs": [
"{options.outputPath}"
],
"options": {
"outputPath": "dist/packages/pieces/community/webscraping-ai",
"tsConfig": "packages/pieces/community/webscraping-ai/tsconfig.lib.json",
"packageJson": "packages/pieces/community/webscraping-ai/package.json",
"main": "packages/pieces/community/webscraping-ai/src/index.ts",
"assets": [
"packages/pieces/community/webscraping-ai/*.md",
{
"input": "packages/pieces/community/webscraping-ai/src/i18n",
"output": "./src/i18n",
"glob": "**/!(i18n.json)"
}
],
"buildableProjectDepsInPackageJsonType": "dependencies",
"updateBuildableProjectDepsInPackageJson": true
},
"dependsOn": [
"^build",
"prebuild"
]
},
"nx-release-publish": {
"options": {
"packageRoot": "dist/{projectRoot}"
}
},
"lint": {
"executor": "@nx/eslint:lint",
"outputs": [
"{options.outputFile}"
]
},
"prebuild": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/pieces/community/webscraping-ai",
"command": "bun install --no-save --silent"
},
"dependsOn": [
"^build"
]
}
}
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping KI ist ein leistungsstarkes Tool, mit dem Sie Websites kratzen und Daten extrahieren können.",
"Ask a Question About the Web Page": "Fragen über die Webseite stellen",
"Get Page HTML": "Seite HTML abrufen",
"Scrape Website Text": "Scrape-Webseiten-Text",
"Extract structured data": "Strukturierte Daten extrahieren",
"Get Account Info": "Kontoinformationen abrufen",
"Gets an answer to a question about a given webpage.": "Gibt eine Antwort auf eine Frage zu einer bestimmten Webseite zurück.",
"Retrieves the raw HTML markup of a web page.": "Ruft das rohe HTML-Markup einer Webseite ab.",
"Returns the visible text content of a webpage specified by the URL.": "Gibt den sichtbaren Text einer von der URL angegebenen Webseite zurück.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Gibt strukturierte Datenfelder zurück, die aus der Webseite mit einem LLM-Modell extrahiert wurden.",
"Get account usage information including remaining API credits and concurrent requests.": "Erhalten Sie Kontonutzungsinformationen einschließlich verbleibender API-Credits und gleichzeitiger Anfragen.",
"Question": "Frage",
"URL": "URL",
"Custom Headers": "Eigene Kopfzeilen",
"Timeout": "Timeout",
"Enable JavaScript": "JavaScript aktivieren",
"JavaScript Timeout": "JavaScript-Timeout",
"Wait For": "Warten auf",
"Proxy Type": "Proxy-Typ",
"Proxy Country": "Proxy-Land",
"Custom Proxy": "Eigener Proxy",
"JavaScript Code": "JavaScript-Code",
"Device Type": "Gerätetyp",
"Error on 404": "Fehler auf 404",
"Error on Redirect": "Fehler bei Umleitung",
"Response Format": "Antwortformat",
"Return JavaScript Result": "JavaScript-Ergebnis zurückgeben",
"Text Format": "Textformat",
"Return Links": "Retourenlinks",
"Fields to Extract": "Zu extrahierende Felder",
"Question or instructions to ask the LLM model about the target page.": "Fragen oder Anweisungen, um das LLM-Modell nach der Zielseite zu fragen.",
"URL of the target page.": "URL der Zielseite.",
"Add custom HTTP headers (optional)": "Eigene HTTP-Header hinzufügen (optional)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Maximale Ladezeit der Seite in Millisekunden (Standard: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "JavaScript für dynamische Inhalte ausführen (empfohlen)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Maximale JavaScript-Ausführungszeit in Millisekunden (Standard: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "CSS-Selektor zum Warten auf dynamische Inhalte (z.B. \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Benutze Proxies für Websites, die die IPs für Datacenter blockieren (teurer)",
"Geographic location of the proxy server": "Geographische Position des Proxy-Servers",
"Your proxy URL in format: http://user:password@host:port": "Ihre Proxy-URL im Format: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "Benutzerdefiniertes JavaScript zum Ausführen (z.B. document.querySelector(\"button\").click())",
"Emulate specific device for responsive design testing": "Spezifisches Gerät für responsive Designtests emulieren",
"Fail the action if the page returns a 404 error": "Aktion fehlgeschlagen, wenn die Seite einen 404-Fehler zurückgibt",
"Fail the action if the page redirects to another URL": "Aktion fehlgeschlagen wenn die Seite zu einer anderen URL weitergeleitet wird",
"Response format: Text (simple) or JSON (structured)": "Antwortformat: Text (einfach) oder JSON (strukturiert)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Gibt das Ergebnis des benutzerdefinierten JavaScript-Codes (js_script Parameter) auf der Zielseite zurück (standardmäßig falsch, Seite HTML wird zurückgegeben).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Antwortformat: Einfacher Text, JSON (mit Titel/Beschreibung/Inhalt) oder XML",
"Include links in response (only works with JSON format)": "Links in Antwort einfügen (funktioniert nur mit JSON-Format)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Definieren Sie Felder zum Extrahieren (z.B. {\"title\": \"Produkttitel\", \"Preis\": \"Produktpreis\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Wohnraum (Stealth)",
"United States": "Vereinigte Staaten",
"Canada": "Kanada",
"United Kingdom": "Großbritannien",
"Germany": "Deutschland",
"France": "Frankreich",
"Italy": "Italien",
"Spain": "Spanien",
"Russia": "Russland",
"Japan": "Japan",
"South Korea": "Südkorea",
"India": "Indien",
"Desktop": "Desktop",
"Mobile": "Mobil",
"Tablet": "Tablets",
"Text": "Text",
"JSON": "JSON",
"Plain Text": "Einfacher Text",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping AI es una potente herramienta que te permite rascar sitios web y extraer datos.",
"Ask a Question About the Web Page": "Hacer una pregunta sobre la página web",
"Get Page HTML": "Obtener HTML página",
"Scrape Website Text": "Texto del sitio web de Scrape",
"Extract structured data": "Extraer datos estructurados",
"Get Account Info": "Obtener información de cuenta",
"Gets an answer to a question about a given webpage.": "Obtiene una respuesta a una pregunta sobre una página web determinada.",
"Retrieves the raw HTML markup of a web page.": "Obtiene el marcado HTML crudo de una página web.",
"Returns the visible text content of a webpage specified by the URL.": "Devuelve el contenido de texto visible de una página web especificada por la URL.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Devuelve campos de datos estructurados extraídos de la página web utilizando un modelo LLM.",
"Get account usage information including remaining API credits and concurrent requests.": "Obtener información de uso de la cuenta, incluyendo créditos de la API restantes y solicitudes simultáneas.",
"Question": "Pregunta",
"URL": "URL",
"Custom Headers": "Cabeceras personalizadas",
"Timeout": "Tiempo agotado",
"Enable JavaScript": "Activar JavaScript",
"JavaScript Timeout": "Tiempo de espera de JavaScript",
"Wait For": "Esperar",
"Proxy Type": "Tipo de proxy",
"Proxy Country": "País del proxy",
"Custom Proxy": "Proxy personalizado",
"JavaScript Code": "Código JavaScript",
"Device Type": "Tipo de dispositivo",
"Error on 404": "Error en 404",
"Error on Redirect": "Error en la redirección",
"Response Format": "Formato de respuesta",
"Return JavaScript Result": "Devolver resultado JavaScript",
"Text Format": "Formato de texto",
"Return Links": "Enlaces de retorno",
"Fields to Extract": "Campos a extraer",
"Question or instructions to ask the LLM model about the target page.": "Preguntas o instrucciones para preguntar al modelo LLM sobre la página de destino.",
"URL of the target page.": "URL de la página de destino.",
"Add custom HTTP headers (optional)": "Añadir cabeceras HTTP personalizadas (opcional)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Tiempo máximo de carga de página en milisegundos (por defecto: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "Ejecutar JavaScript para contenido dinámico (recomendado)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Tiempo máximo de ejecución JavaScript en milisegundos (por defecto: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "Selector CSS a esperar a contenido dinámico (por ej., \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Utilizar proxies residenciales para sitios que bloquean el índice de datos (más caro)",
"Geographic location of the proxy server": "Ubicación geográfica del servidor proxy",
"Your proxy URL in format: http://user:password@host:port": "Su URL de proxy en formato: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "JavaScript personalizado a ejecutar (por ejemplo, document.querySelector(\"botón\").click())",
"Emulate specific device for responsive design testing": "Emular dispositivo específico para pruebas de diseño responsivo",
"Fail the action if the page returns a 404 error": "Fallo en la acción si la página devuelve un error 404",
"Fail the action if the page redirects to another URL": "Fallo en la acción si la página redirige a otra URL",
"Response format: Text (simple) or JSON (structured)": "Formato de respuesta: Texto (simple) o JSON (estructurado)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Devolver el resultado de la ejecución de código JavaScript personalizado (parámetro js_script) en la página de destino (falso por defecto, página HTML será devuelta).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Formato de respuesta: Texto sin formato, JSON (con título/descripción/contenido) o XML",
"Include links in response (only works with JSON format)": "Incluye enlaces en respuesta (sólo funciona con formato JSON)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Definir campos a extraer (por ejemplo, {\"título\": \"Título del producto\", \"precio\": \"Precio del producto\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Residencial (Stealth)",
"United States": "Estados Unidos",
"Canada": "Canadá",
"United Kingdom": "Reino Unido",
"Germany": "Alemania",
"France": "Francia",
"Italy": "Italia",
"Spain": "España",
"Russia": "Rusia",
"Japan": "Japón",
"South Korea": "Corea del Sur",
"India": "India",
"Desktop": "Escritorio",
"Mobile": "Móvil",
"Tablet": "Tablet",
"Text": "Texto",
"JSON": "JSON",
"Plain Text": "Texto simple",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping AI est un outil puissant qui vous permet de fouiller les sites Web et d'extraire des données.",
"Ask a Question About the Web Page": "Poser une question à propos de la page Web",
"Get Page HTML": "Obtenir le HTML de la page",
"Scrape Website Text": "Texte du site Web de Scrape",
"Extract structured data": "Extraire les données structurées",
"Get Account Info": "Obtenir les informations du compte",
"Gets an answer to a question about a given webpage.": "Renvoie une réponse à une question à propos d'une page Web donnée.",
"Retrieves the raw HTML markup of a web page.": "Récupère le balisage HTML brut d'une page Web.",
"Returns the visible text content of a webpage specified by the URL.": "Retourne le contenu du texte visible d'une page Web spécifiée par l'URL.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Retourne les champs de données structurés extraits de la page Web en utilisant un modèle LLM.",
"Get account usage information including remaining API credits and concurrent requests.": "Obtenir les informations d'utilisation du compte y compris les crédits API restants et les demandes simultanées.",
"Question": "Question",
"URL": "URL",
"Custom Headers": "En-têtes personnalisés",
"Timeout": "Délai d'expiration",
"Enable JavaScript": "Activer JavaScript",
"JavaScript Timeout": "Délai d'attente du JavaScript",
"Wait For": "Attendre",
"Proxy Type": "Type de proxy",
"Proxy Country": "Pays du proxy",
"Custom Proxy": "Proxy personnalisé",
"JavaScript Code": "Code JavaScript",
"Device Type": "Type de périphérique",
"Error on 404": "Erreur sur 404",
"Error on Redirect": "Erreur lors de la redirection",
"Response Format": "Format de réponse",
"Return JavaScript Result": "Résultat du retour JavaScript",
"Text Format": "Format du texte",
"Return Links": "Liens de retour",
"Fields to Extract": "Champs à extraire",
"Question or instructions to ask the LLM model about the target page.": "Question ou instructions pour demander au modèle LLM à propos de la page cible.",
"URL of the target page.": "URL de la page cible.",
"Add custom HTTP headers (optional)": "Ajouter des en-têtes HTTP personnalisés (facultatif)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Temps maximum de chargement de la page en millisecondes (par défaut: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "Exécuter JavaScript pour le contenu dynamique (recommandé)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Temps maximum d'exécution JavaScript en millisecondes (par défaut: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "Sélecteur CSS pour attendre le contenu dynamique (par exemple, \".content-chargé\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Utiliser des mandataires résidentiels pour les sites qui bloquent les adresses IP du datacenter (plus cher)",
"Geographic location of the proxy server": "Emplacement géographique du serveur proxy",
"Your proxy URL in format: http://user:password@host:port": "Votre URL proxy au format http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "JavaScript personnalisé à exécuter (par exemple, document.querySelector(\"button\").click())",
"Emulate specific device for responsive design testing": "Émuler un dispositif spécifique pour des tests de conception réactifs",
"Fail the action if the page returns a 404 error": "Échec de l'action si la page renvoie une erreur 404",
"Fail the action if the page redirects to another URL": "Échec de l'action si la page redirige vers une autre URL",
"Response format: Text (simple) or JSON (structured)": "Format de réponse : Texte (simple) ou JSON (structuré)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Retourne le résultat de l'exécution du code JavaScript personnalisé (paramètre js_script) sur la page cible (false par défaut, la page HTML sera retournée).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Format de réponse : Texte brut, JSON (avec titre/description/contenu), ou XML",
"Include links in response (only works with JSON format)": "Inclure les liens dans la réponse (ne fonctionne qu'avec le format JSON)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Définir les champs à extraire (par exemple, {\"title\": \"Titre du produit\", \"prix\": \"Prix du produit\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Résidentiel (Stéalth)",
"United States": "États-Unis",
"Canada": "Le Canada",
"United Kingdom": "Royaume-Uni",
"Germany": "L'Allemagne",
"France": "France",
"Italy": "Italie",
"Spain": "L'Espagne",
"Russia": "Russie",
"Japan": "Japon",
"South Korea": "Corée du Sud",
"India": "L'Inde",
"Desktop": "Bureau",
"Mobile": "Téléphone mobile",
"Tablet": "Tablette",
"Text": "Texte du texte",
"JSON": "JSON",
"Plain Text": "Texte brut",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping AIは、ウェブサイトをスクレイプしてデータを抽出することができる強力なツールです。",
"Ask a Question About the Web Page": "ウェブページについて質問する",
"Get Page HTML": "ページHTMLを取得",
"Scrape Website Text": "ウェブサイトのテキストをスクレイプ",
"Extract structured data": "構造化されたデータを抽出",
"Get Account Info": "アカウント情報を取得",
"Gets an answer to a question about a given webpage.": "特定のウェブページに関する質問への回答を取得します。",
"Retrieves the raw HTML markup of a web page.": "Web ページの生の HTML マークアップを取得します。",
"Returns the visible text content of a webpage specified by the URL.": "URL によって指定された Web ページの表示テキストコンテンツを返します。",
"Returns structured data fields extracted from the webpage using an LLM model.": "LLMモデルを使用してWebページから抽出された構造化データフィールドを返します。",
"Get account usage information including remaining API credits and concurrent requests.": "残りのAPIクレジットや同時リクエストなどのアカウント使用情報を取得します。",
"Question": "質問",
"URL": "URL",
"Custom Headers": "カスタムヘッダー",
"Timeout": "タイムアウト",
"Enable JavaScript": "JavaScript を有効にする",
"JavaScript Timeout": "JavaScriptのタイムアウト",
"Wait For": "待つ",
"Proxy Type": "プロキシタイプ",
"Proxy Country": "プロキシの国",
"Custom Proxy": "カスタムプロキシ",
"JavaScript Code": "JavaScript コード",
"Device Type": "デバイスタイプ",
"Error on 404": "404 エラー",
"Error on Redirect": "リダイレクト時のエラー",
"Response Format": "応答形式",
"Return JavaScript Result": "Return JavaScript 結果",
"Text Format": "テキスト形式",
"Return Links": "返品リンク",
"Fields to Extract": "抽出するフィールド",
"Question or instructions to ask the LLM model about the target page.": "対象ページについてLLMモデルに問い合わせる質問または手順。",
"URL of the target page.": "ターゲットページの URL",
"Add custom HTTP headers (optional)": "カスタム HTTP ヘッダーを追加 (オプション)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "最大ページ読み込み時間 (ミリ秒) (デフォルト: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "動的コンテンツの JavaScript を実行します (推奨)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "最大JavaScript実行時間 (ミリ秒) (デフォルト: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "動的なコンテンツを待つための CSS セレクター (例: \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "データセンターの IP をブロックするサイトに住宅用プロキシを使用する (より高価な)",
"Geographic location of the proxy server": "プロキシサーバーの地理的な場所",
"Your proxy URL in format: http://user:password@host:port": "あなたのプロキシ URL の形式: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "実行するカスタム JavaScript (例: document.querySelector(\"button\").click())",
"Emulate specific device for responsive design testing": "レスポンシブ設計テスト用の特定のデバイスをエミュレートする",
"Fail the action if the page returns a 404 error": "ページが404エラーを返した場合、アクションに失敗します",
"Fail the action if the page redirects to another URL": "ページが別の URL にリダイレクトされた場合、アクションを失敗します。",
"Response format: Text (simple) or JSON (structured)": "応答形式: テキスト(簡単)または JSON (構造)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "ターゲットページでのカスタム JavaScript コード (js_script パラメータ) の実行結果を返します (デフォルトでは false、ページは HTML が返されます)。",
"Response format: Plain text, JSON (with title/description/content), or XML": "応答形式: プレーンテキスト、JSON(タイトル/説明/コンテンツ付き)、または XML",
"Include links in response (only works with JSON format)": "レスポンスにリンクを含める (JSON 形式でのみ動作します)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "抽出するフィールドを定義します(例:{\"title\": \"製品タイトル\", \"price\": \"製品価格\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Residential (Stealth)",
"United States": "アメリカ",
"Canada": "カナダ",
"United Kingdom": "イギリス",
"Germany": "ドイツ",
"France": "フランス",
"Italy": "イタリア",
"Spain": "スペイン",
"Russia": "ロシア",
"Japan": "日本",
"South Korea": "韓国",
"India": "インド",
"Desktop": "デスクトップ",
"Mobile": "モバイル",
"Tablet": "タブレット",
"Text": "テキスト",
"JSON": "JSON",
"Plain Text": "プレーンテキスト",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping AI is een krachtig hulpmiddel waarmee je websites kunt scrapen en gegevens kunt extraheren.",
"Ask a Question About the Web Page": "Stel een vraag over de webpagina",
"Get Page HTML": "Verkrijg pagina HTML",
"Scrape Website Text": "Tekst scrape website",
"Extract structured data": "Gestructureerde data uitpakken",
"Get Account Info": "Accountinformatie ophalen",
"Gets an answer to a question about a given webpage.": "Geeft een antwoord op een vraag over een bepaalde webpagina.",
"Retrieves the raw HTML markup of a web page.": "Ophalen van de onbewerkte HTML-opmaak van een webpagina.",
"Returns the visible text content of a webpage specified by the URL.": "Geeft de zichtbare tekstinhoud van een webpagina gespecificeerd door de URL.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Geeft als resultaat gestructureerde gegevensvelden uitgepakt van de webpagina met behulp van een LLM-model.",
"Get account usage information including remaining API credits and concurrent requests.": "Krijg informatie over het gebruik van uw account, inclusief resterende API credits en gelijktijdige aanvragen.",
"Question": "Vraag",
"URL": "URL",
"Custom Headers": "Aangepaste headers",
"Timeout": "Time-out",
"Enable JavaScript": "JavaScript inschakelen",
"JavaScript Timeout": "JavaScript time-out",
"Wait For": "Wacht op",
"Proxy Type": "Proxy Type",
"Proxy Country": "Proxy Land",
"Custom Proxy": "Aangepaste Proxy",
"JavaScript Code": "JavaScript code",
"Device Type": "Type apparaat",
"Error on 404": "Fout bij 404",
"Error on Redirect": "Fout bij doorverwijzen",
"Response Format": "Antwoord formaat",
"Return JavaScript Result": "Retourneer JavaScript resultaat",
"Text Format": "Tekst formaat",
"Return Links": "Retourneer links",
"Fields to Extract": "Velden om uit te pakken",
"Question or instructions to ask the LLM model about the target page.": "Vraag of instructies om het LLIM-model te vragen over de doelpagina.",
"URL of the target page.": "URL van de doelpagina.",
"Add custom HTTP headers (optional)": "Aangepaste HTTP-headers toevoegen (optioneel)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Maximale laadtijd in milliseconden (standaard: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "JavaScript uitvoeren voor dynamische inhoud (aanbevolen)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Maximale uitvoeringstijd van JavaScript in milliseconden (standaard: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "CSS-selector die wacht op dynamische inhoud (bijv. \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Gebruik residentiële proxies voor sites die datacenter IP's blokkeren (duurder)",
"Geographic location of the proxy server": "Geografische locatie van de proxyserver",
"Your proxy URL in format: http://user:password@host:port": "Uw proxy URL in formaat: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "Aangepaste JavaScript om uit te voeren (vb. document.querySelector(\"button\").click())",
"Emulate specific device for responsive design testing": "Emuleren van specifiek apparaat voor responsief ontwerp testen",
"Fail the action if the page returns a 404 error": "Kon de actie niet uitvoeren als de pagina een 404-fout geeft",
"Fail the action if the page redirects to another URL": "Niet uitvoeren als de pagina naar een andere URL doorverwijst",
"Response format: Text (simple) or JSON (structured)": "Antwoord-formaat: Tekst (eenvoudig) of JSON (structuur)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Retourresultaat van de aangepaste JavaScript-code (js_scriptparameter) uitvoering op de doelpagina (standaard onwaar, pagina HTML wordt teruggegeven).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Antwoord-formaat: platte tekst, JSON (met titel/beschrijving/inhoud) of XML",
"Include links in response (only works with JSON format)": "Links opnemen in reactie (werkt alleen met JSON formaat)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Definieer de uit te pakken velden (bijv. {\"title\": \"Producttitel\", \"prijs\": \"Productprijs\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 woonhuisaal (tealth)",
"United States": "Verenigde Staten",
"Canada": "Canada",
"United Kingdom": "Verenigd Koninkrijk",
"Germany": "Duitsland",
"France": "Frankrijk",
"Italy": "Italiaans",
"Spain": "Spanje",
"Russia": "Rusland",
"Japan": "Japans",
"South Korea": "Zuid-Korea",
"India": "Indië",
"Desktop": "Startscherm",
"Mobile": "Mobiel",
"Tablet": "Telefoon",
"Text": "Tekstveld",
"JSON": "JSON",
"Plain Text": "Onopgemaakte tekst",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "A AI WebScraping é uma ferramenta poderosa que permite que você scrape sites e extraia dados.",
"Ask a Question About the Web Page": "Faça uma pergunta sobre a página da Web",
"Get Page HTML": "Obter HTML da página",
"Scrape Website Text": "Scrape Texto do Site",
"Extract structured data": "Extrair dados estruturados",
"Get Account Info": "Obter informações da conta",
"Gets an answer to a question about a given webpage.": "Obtém uma resposta para uma pergunta sobre uma determinada página da web.",
"Retrieves the raw HTML markup of a web page.": "Recupera a marcação HTML bruta de uma página da web.",
"Returns the visible text content of a webpage specified by the URL.": "Retorna o conteúdo visível do texto de uma página web especificada pelo URL.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Retorna campos de dados estruturados extraídos da página web usando um modelo LLM.",
"Get account usage information including remaining API credits and concurrent requests.": "Obtenha informações de uso da conta, incluindo créditos de API restantes e solicitações simultâneas.",
"Question": "Questão",
"URL": "URL:",
"Custom Headers": "Cabeçalhos Personalizados",
"Timeout": "Tempo esgotado",
"Enable JavaScript": "Ativar JavaScript",
"JavaScript Timeout": "Tempo limite do JavaScript",
"Wait For": "Esperar por",
"Proxy Type": "Tipo de proxy",
"Proxy Country": "País do Proxy",
"Custom Proxy": "Proxy personalizado",
"JavaScript Code": "Código JavaScript",
"Device Type": "Tipo de dispositivo",
"Error on 404": "Erro em 404",
"Error on Redirect": "Erro no Redirecionamento",
"Response Format": "Formato de Resposta",
"Return JavaScript Result": "Retornar resultado JavaScript",
"Text Format": "Formato do texto",
"Return Links": "Links de Devolução",
"Fields to Extract": "Campos para Extrair",
"Question or instructions to ask the LLM model about the target page.": "Pergunte ou instruções para perguntar ao modelo LLM sobre a página alvo.",
"URL of the target page.": "URL da página alvo.",
"Add custom HTTP headers (optional)": "Adicionar cabeçalhos HTTP personalizados (opcional)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Tempo máximo de carregamento da página em milissegundos (padrão: 10000, máx: 30000)",
"Execute JavaScript for dynamic content (recommended)": "Executar JavaScript para conteúdo dinâmico (recomendado)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Tempo máximo de execução do JavaScript em milissegundos (padrão: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "Seletor de CSS para esperar por conteúdo dinâmico (por exemplo, \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Use proxies residenciais para sites que bloqueiam IPs inseridos (mais caro)",
"Geographic location of the proxy server": "Localização geográfica do servidor proxy",
"Your proxy URL in format: http://user:password@host:port": "URL do seu proxy no formato: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "JavaScript personalizado para executar (por exemplo, document.querySelector(\"botão\").click())",
"Emulate specific device for responsive design testing": "Emular dispositivo específico para testes de design responsivo",
"Fail the action if the page returns a 404 error": "Falhar a ação se a página retorna um erro 404",
"Fail the action if the page redirects to another URL": "Falhar a ação se a página redirecionar para outra URL",
"Response format: Text (simple) or JSON (structured)": "Formato de resposta: Texto (simples) ou JSON (estruturado)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Resultado do resultado do código JavaScript personalizado (parâmetro js_script) na página de destino (falso por padrão, o HTML da página será retornado).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Formato de resposta: Texto simples, JSON (com título/descrição/conteúdo), ou XML",
"Include links in response (only works with JSON format)": "Incluir links na resposta (só funciona com o formato JSON)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Defina os campos para extrair (por exemplo, {\"title\": \"Título do produto\", \"price\": \"Preço do produto\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Residencial (Padrão)",
"United States": "Estados Unidos",
"Canada": "Canadá",
"United Kingdom": "Reino Unido",
"Germany": "Alemanha",
"France": "França",
"Italy": "Itália",
"Spain": "Espanha",
"Russia": "Rússia",
"Japan": "Japão",
"South Korea": "Coreia do Sul",
"India": "Índia",
"Desktop": "Computadores",
"Mobile": "Celular",
"Tablet": "Tábua",
"Text": "texto",
"JSON": "JSON",
"Plain Text": "Texto sem Formatação",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping AI is a powerful tool that allows you to scrape websites and extract data.",
"Ask a Question About the Web Page": "Ask a Question About the Web Page",
"Get Page HTML": "Get Page HTML",
"Scrape Website Text": "Scrape Website Text",
"Extract structured data": "Extract structured data",
"Get Account Info": "Get Account Info",
"Gets an answer to a question about a given webpage.": "Gets an answer to a question about a given webpage.",
"Retrieves the raw HTML markup of a web page.": "Retrieves the raw HTML markup of a web page.",
"Returns the visible text content of a webpage specified by the URL.": "Returns the visible text content of a webpage specified by the URL.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Returns structured data fields extracted from the webpage using an LLM model.",
"Get account usage information including remaining API credits and concurrent requests.": "Get account usage information including remaining API credits and concurrent requests.",
"Question": "Question",
"URL": "URL",
"Custom Headers": "Custom Headers",
"Timeout": "Timeout",
"Enable JavaScript": "Enable JavaScript",
"JavaScript Timeout": "JavaScript Timeout",
"Wait For": "Wait For",
"Proxy Type": "Proxy Type",
"Proxy Country": "Proxy Country",
"Custom Proxy": "Custom Proxy",
"JavaScript Code": "JavaScript Code",
"Device Type": "Device Type",
"Error on 404": "Error on 404",
"Error on Redirect": "Error on Redirect",
"Response Format": "Response Format",
"Return JavaScript Result": "Return JavaScript Result",
"Text Format": "Text Format",
"Return Links": "Return Links",
"Fields to Extract": "Fields to Extract",
"Question or instructions to ask the LLM model about the target page.": "Question or instructions to ask the LLM model about the target page.",
"URL of the target page.": "URL of the target page.",
"Add custom HTTP headers (optional)": "Add custom HTTP headers (optional)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Maximum page load time in milliseconds (default: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "Execute JavaScript for dynamic content (recommended)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Maximum JavaScript execution time in milliseconds (default: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "CSS selector to wait for dynamic content (e.g., \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Use residential proxies for sites that block datacenter IPs (more expensive)",
"Geographic location of the proxy server": "Geographic location of the proxy server",
"Your proxy URL in format: http://user:password@host:port": "Your proxy URL in format: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())",
"Emulate specific device for responsive design testing": "Emulate specific device for responsive design testing",
"Fail the action if the page returns a 404 error": "Fail the action if the page returns a 404 error",
"Fail the action if the page redirects to another URL": "Fail the action if the page redirects to another URL",
"Response format: Text (simple) or JSON (structured)": "Response format: Text (simple) or JSON (structured)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Response format: Plain text, JSON (with title/description/content), or XML",
"Include links in response (only works with JSON format)": "Include links in response (only works with JSON format)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Residential (Stealth)",
"United States": "United States",
"Canada": "Canada",
"United Kingdom": "United Kingdom",
"Germany": "Germany",
"France": "France",
"Italy": "Italy",
"Spain": "Spain",
"Russia": "Russia",
"Japan": "Japan",
"South Korea": "South Korea",
"India": "India",
"Desktop": "Desktop",
"Mobile": "Mobile",
"Tablet": "Tablet",
"Text": "Text",
"JSON": "JSON",
"Plain Text": "Plain Text",
"XML": "XML"
}

View File

@@ -0,0 +1,71 @@
{
"WebScraping AI is a powerful tool that allows you to scrape websites and extract data.": "WebScraping AI is a powerful tool that allows you to scrape websites and extract data.",
"Ask a Question About the Web Page": "Ask a Question About the Web Page",
"Get Page HTML": "Get Page HTML",
"Scrape Website Text": "Scrape Website Text",
"Extract structured data": "Extract structured data",
"Get Account Info": "Get Account Info",
"Gets an answer to a question about a given webpage.": "Gets an answer to a question about a given webpage.",
"Retrieves the raw HTML markup of a web page.": "Retrieves the raw HTML markup of a web page.",
"Returns the visible text content of a webpage specified by the URL.": "Returns the visible text content of a webpage specified by the URL.",
"Returns structured data fields extracted from the webpage using an LLM model.": "Returns structured data fields extracted from the webpage using an LLM model.",
"Get account usage information including remaining API credits and concurrent requests.": "Get account usage information including remaining API credits and concurrent requests.",
"Question": "Question",
"URL": "URL",
"Custom Headers": "Custom Headers",
"Timeout": "Timeout",
"Enable JavaScript": "Enable JavaScript",
"JavaScript Timeout": "JavaScript Timeout",
"Wait For": "Wait For",
"Proxy Type": "Proxy Type",
"Proxy Country": "Proxy Country",
"Custom Proxy": "Custom Proxy",
"JavaScript Code": "JavaScript Code",
"Device Type": "Device Type",
"Error on 404": "Error on 404",
"Error on Redirect": "Error on Redirect",
"Response Format": "Response Format",
"Return JavaScript Result": "Return JavaScript Result",
"Text Format": "Text Format",
"Return Links": "Return Links",
"Fields to Extract": "Fields to Extract",
"Question or instructions to ask the LLM model about the target page.": "Question or instructions to ask the LLM model about the target page.",
"URL of the target page.": "URL of the target page.",
"Add custom HTTP headers (optional)": "Add custom HTTP headers (optional)",
"Maximum page load time in milliseconds (default: 10000, max: 30000)": "Maximum page load time in milliseconds (default: 10000, max: 30000)",
"Execute JavaScript for dynamic content (recommended)": "Execute JavaScript for dynamic content (recommended)",
"Maximum JavaScript execution time in milliseconds (default: 2000)": "Maximum JavaScript execution time in milliseconds (default: 2000)",
"CSS selector to wait for dynamic content (e.g., \".content-loaded\")": "CSS selector to wait for dynamic content (e.g., \".content-loaded\")",
"Use residential proxies for sites that block datacenter IPs (more expensive)": "Use residential proxies for sites that block datacenter IPs (more expensive)",
"Geographic location of the proxy server": "Geographic location of the proxy server",
"Your proxy URL in format: http://user:password@host:port": "Your proxy URL in format: http://user:password@host:port",
"Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())": "Custom JavaScript to execute (e.g., document.querySelector(\"button\").click())",
"Emulate specific device for responsive design testing": "Emulate specific device for responsive design testing",
"Fail the action if the page returns a 404 error": "Fail the action if the page returns a 404 error",
"Fail the action if the page redirects to another URL": "Fail the action if the page redirects to another URL",
"Response format: Text (simple) or JSON (structured)": "Response format: Text (simple) or JSON (structured)",
"Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).": "Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).",
"Response format: Plain text, JSON (with title/description/content), or XML": "Response format: Plain text, JSON (with title/description/content), or XML",
"Include links in response (only works with JSON format)": "Include links in response (only works with JSON format)",
"Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})": "Define fields to extract (e.g., {\"title\": \"Product title\", \"price\": \"Product price\"})",
"🏢 Datacenter (Fast)": "🏢 Datacenter (Fast)",
"🏠 Residential (Stealth)": "🏠 Residential (Stealth)",
"United States": "United States",
"Canada": "Canada",
"United Kingdom": "United Kingdom",
"Germany": "Germany",
"France": "France",
"Italy": "Italy",
"Spain": "Spain",
"Russia": "Russia",
"Japan": "Japan",
"South Korea": "South Korea",
"India": "India",
"Desktop": "Desktop",
"Mobile": "Mobile",
"Tablet": "Tablet",
"Text": "文本",
"JSON": "JSON",
"Plain Text": "Plain Text",
"XML": "XML"
}

View File

@@ -0,0 +1,26 @@
import { createPiece } from '@activepieces/pieces-framework';
import { askAQuestionAboutTheWebPage } from './lib/actions/ask-a-question-about-the-web-page';
import { extractStructuredData } from './lib/actions/extract-structured-data';
import { getAccountInformation } from './lib/actions/get-account-information';
import { getPageHtml } from './lib/actions/get-page-html';
import { scrapeWebsiteText } from './lib/actions/scrape-website-text';
import { webscrapingAiAuth } from './lib/common';
import { PieceCategory } from '@activepieces/shared';
export const webscrapingAi = createPiece({
displayName: 'WebScraping AI',
auth: webscrapingAiAuth,
minimumSupportedRelease: '0.36.1',
description: 'WebScraping AI is a powerful tool that allows you to scrape websites and extract data.',
categories: [PieceCategory.DEVELOPER_TOOLS, PieceCategory.ARTIFICIAL_INTELLIGENCE],
logoUrl: 'https://cdn.activepieces.com/pieces/webscraping-ai.png',
authors: ['LuizDMM', 'onyedikachi-david'],
actions: [
askAQuestionAboutTheWebPage,
getPageHtml,
scrapeWebsiteText,
extractStructuredData,
getAccountInformation,
],
triggers: [],
});

View File

@@ -0,0 +1,34 @@
import { createAction } from '@activepieces/pieces-framework';
import { webscrapingAiAuth, webscrapingAiCommon } from '../common';
export const askAQuestionAboutTheWebPage = createAction({
auth: webscrapingAiAuth,
name: 'askAQuestionAboutTheWebPage',
displayName: 'Ask a Question About the Web Page',
description: 'Gets an answer to a question about a given webpage.',
props: webscrapingAiCommon.askQuestionProperties,
async run({ auth: apiKey, propsValue }) {
const { device, format, question, ...rest } = propsValue;
const allowedCountries = [
'us', 'gb', 'de', 'it', 'fr', 'ca', 'es', 'ru', 'jp', 'kr', 'in'
];
const params: any = {
apiKey:apiKey.secret_text,
question,
...rest,
format: (format === 'json' || format === 'text') ? format : undefined,
proxy: (rest.proxy === 'datacenter' || rest.proxy === 'residential') ? rest.proxy : undefined,
country: (rest.country && allowedCountries.includes(rest.country))
? rest.country as typeof allowedCountries[number]
: undefined,
headers: rest.headers && Array.isArray(rest.headers)
? Object.fromEntries(rest.headers.map((h: any) => [(h as any).name, (h as any).value]))
: undefined,
device: device as 'desktop' | 'mobile' | 'tablet' | undefined,
};
return await webscrapingAiCommon.askQuestion(params);
},
});

View File

@@ -0,0 +1,36 @@
import { createAction } from '@activepieces/pieces-framework';
import { webscrapingAiAuth, webscrapingAiCommon } from '../common';
export const extractStructuredData = createAction({
auth: webscrapingAiAuth,
name: 'extractStructuredData',
displayName: 'Extract structured data',
description:
'Returns structured data fields extracted from the webpage using an LLM model.',
props: webscrapingAiCommon.getPageStructuredDataProperties,
async run({ auth: apiKey, propsValue }) {
const { fields, headers, ...rest } = propsValue;
const allowedCountries = [
'us', 'gb', 'de', 'it', 'fr', 'ca', 'es', 'ru', 'jp', 'kr', 'in'
];
const params: any = {
apiKey:apiKey.secret_text,
...rest,
proxy: (rest.proxy === 'datacenter' || rest.proxy === 'residential') ? rest.proxy : undefined,
country: (rest.country && allowedCountries.includes(rest.country))
? rest.country as typeof allowedCountries[number]
: undefined,
headers: headers && Array.isArray(headers)
? Object.fromEntries(headers.map((h: any) => [(h as any).name, (h as any).value]))
: undefined,
fields: fields && typeof fields === 'object'
? Object.fromEntries(Object.entries(fields).map(([k, v]) => [k, String(v)]))
: {},
device: rest.device as 'desktop' | 'mobile' | 'tablet' | undefined,
};
return await webscrapingAiCommon.getPageStructuredData(params);
},
});

View File

@@ -0,0 +1,13 @@
import { createAction } from '@activepieces/pieces-framework';
import { webscrapingAiAuth, webscrapingAiCommon } from '../common';
export const getAccountInformation = createAction({
auth: webscrapingAiAuth,
name: 'getAccountInformation',
displayName: 'Get Account Info',
description: 'Get account usage information including remaining API credits and concurrent requests.',
props: {},
async run({ auth: apiKey }) {
return await webscrapingAiCommon.getAccountInformation({ apiKey:apiKey.secret_text });
},
});

View File

@@ -0,0 +1,36 @@
import { createAction } from '@activepieces/pieces-framework';
import { webscrapingAiAuth, webscrapingAiCommon } from '../common';
export const getPageHtml = createAction({
auth: webscrapingAiAuth,
name: 'getPageHtml',
displayName: 'Get Page HTML',
description: 'Retrieves the raw HTML markup of a web page.',
props: webscrapingAiCommon.getPageHtmlProperties,
async run({ auth: apiKey, propsValue }) {
const { format, headers, proxy, device, errorOn404, errorOnRedirect, returnScriptResult, ...rest } = propsValue;
const allowedCountries = [
'us', 'gb', 'de', 'it', 'fr', 'ca', 'es', 'ru', 'jp', 'kr', 'in'
];
const params: any = {
apiKey:apiKey.secret_text,
...rest,
format: (format === 'json' || format === 'text') ? format : undefined,
proxy: (proxy === 'datacenter' || proxy === 'residential') ? proxy : undefined,
country: (rest.country && allowedCountries.includes(rest.country))
? rest.country as typeof allowedCountries[number]
: undefined,
headers: headers && Array.isArray(headers)
? Object.fromEntries(headers.map((h: any) => [(h as any).name, (h as any).value]))
: undefined,
device: device as 'desktop' | 'mobile' | 'tablet' | undefined,
errorOn404,
errorOnRedirect,
returnScriptResult,
};
return await webscrapingAiCommon.getPageHtml(params);
},
});

View File

@@ -0,0 +1,37 @@
import { createAction } from '@activepieces/pieces-framework';
import { webscrapingAiAuth, webscrapingAiCommon } from '../common';
export const scrapeWebsiteText = createAction({
auth: webscrapingAiAuth,
name: 'scrapeWebsiteText',
displayName: 'Scrape Website Text',
description:
'Returns the visible text content of a webpage specified by the URL.',
props: webscrapingAiCommon.getPageTextProperties,
async run({ auth: apiKey, propsValue }) {
const { textFormat, headers, returnLinks, ...rest } = propsValue;
const allowedCountries = [
'us', 'gb', 'de', 'it', 'fr', 'ca', 'es', 'ru', 'jp', 'kr', 'in'
];
const params: any = {
apiKey:apiKey.secret_text,
...rest,
textFormat: (textFormat === 'json' || textFormat === 'plain' || textFormat === 'xml')
? textFormat
: undefined,
returnLinks: (textFormat === 'json') ? returnLinks : undefined,
proxy: (rest.proxy === 'datacenter' || rest.proxy === 'residential') ? rest.proxy : undefined,
country: (rest.country && allowedCountries.includes(rest.country))
? rest.country as typeof allowedCountries[number]
: undefined,
headers: headers && Array.isArray(headers)
? Object.fromEntries(headers.map((h: any) => [(h as any).name, (h as any).value]))
: undefined,
device: rest.device as 'desktop' | 'mobile' | 'tablet' | undefined,
};
return await webscrapingAiCommon.getPageText(params);
},
});

View File

@@ -0,0 +1,488 @@
import {
httpClient,
HttpMethod,
QueryParams,
} from '@activepieces/pieces-common';
import { PieceAuth, Property } from '@activepieces/pieces-framework';
import { pickBy } from '@activepieces/shared';
const baseRequestProperties = {
url: Property.ShortText({
displayName: 'URL',
description: 'URL of the target page.',
required: true,
}),
headers: Property.Array({
displayName: 'Custom Headers',
description: 'Add custom HTTP headers (optional)',
required: false,
properties: {
name: Property.ShortText({
displayName: 'Header Name',
description: 'Header name (e.g., User-Agent, Authorization)',
required: true,
}),
value: Property.ShortText({
displayName: 'Header Value',
description: 'Header value',
required: true,
}),
},
}),
timeout: Property.Number({
displayName: 'Timeout',
description: 'Maximum page load time in milliseconds (default: 10000, max: 30000)',
required: false,
defaultValue: 10000,
}),
js: Property.Checkbox({
displayName: 'Enable JavaScript',
description: 'Execute JavaScript for dynamic content (recommended)',
defaultValue: true,
required: false,
}),
jsTimeout: Property.Number({
displayName: 'JavaScript Timeout',
description: 'Maximum JavaScript execution time in milliseconds (default: 2000)',
required: false,
defaultValue: 2000,
}),
waitFor: Property.ShortText({
displayName: 'Wait For',
description: 'CSS selector to wait for dynamic content (e.g., ".content-loaded")',
required: false,
}),
proxy: Property.StaticDropdown({
displayName: 'Proxy Type',
description: 'Use residential proxies for sites that block datacenter IPs (more expensive)',
required: false,
defaultValue: 'datacenter',
options: {
options: [
{ label: '🏢 Datacenter (Fast)', value: 'datacenter' },
{ label: '🏠 Residential (Stealth)', value: 'residential' },
],
},
}),
country: Property.StaticDropdown({
displayName: 'Proxy Country',
description: 'Geographic location of the proxy server',
required: false,
defaultValue: 'us',
options: {
options: [
{ label: 'United States', value: 'us' },
{ label: 'Canada', value: 'ca' },
{ label: 'United Kingdom', value: 'gb' },
{ label: 'Germany', value: 'de' },
{ label: 'France', value: 'fr' },
{ label: 'Italy', value: 'it' },
{ label: 'Spain', value: 'es' },
{ label: 'Russia', value: 'ru' },
{ label: 'Japan', value: 'jp' },
{ label: 'South Korea', value: 'kr' },
{ label: 'India', value: 'in' },
],
},
}),
customProxy: Property.ShortText({
displayName: 'Custom Proxy',
description: 'Your proxy URL in format: http://user:password@host:port',
required: false,
}),
jsScript: Property.LongText({
displayName: 'JavaScript Code',
description: 'Custom JavaScript to execute (e.g., document.querySelector("button").click())',
required: false,
}),
};
export const webscrapingAiAuth = PieceAuth.SecretText({
displayName: 'API Key',
required: true,
});
export const webscrapingAiCommon = {
baseUrl: 'https://api.webscraping.ai',
endpoints: {
askQuestion: '/ai/question',
getPageHtml: '/html',
getPageText: '/text',
getStructuredData: '/ai/fields',
getAccountInfo: '/account',
},
askQuestionProperties: {
question: Property.ShortText({
displayName: 'Question',
description:
'Question or instructions to ask the LLM model about the target page.',
required: true,
}),
...baseRequestProperties,
device: Property.StaticDropdown({
displayName: 'Device Type',
description: 'Emulate specific device for responsive design testing',
required: false,
options: {
options: [
{ label: 'Desktop', value: 'desktop' },
{ label: 'Mobile', value: 'mobile' },
{ label: 'Tablet', value: 'tablet' },
],
},
}),
errorOn404: Property.Checkbox({
displayName: 'Error on 404',
description: 'Fail the action if the page returns a 404 error',
required: false,
}),
errorOnRedirect: Property.Checkbox({
displayName: 'Error on Redirect',
description: 'Fail the action if the page redirects to another URL',
required: false,
}),
format: Property.StaticDropdown({
displayName: 'Response Format',
description: 'Response format: Text (simple) or JSON (structured)',
required: false,
defaultValue: 'text',
options: {
options: [
{ label: 'Text', value: 'text' },
{ label: 'JSON', value: 'json' },
],
},
}),
},
getPageHtmlProperties: {
...baseRequestProperties,
returnScriptResult: Property.Checkbox({
displayName: 'Return JavaScript Result',
description:
'Return result of the custom JavaScript code (js_script parameter) \
execution on the target page (false by default, page HTML will be returned).',
required: false,
}),
device: Property.StaticDropdown({
displayName: 'Device Type',
description: 'Emulate specific device for responsive design testing',
required: false,
options: {
options: [
{ label: 'Desktop', value: 'desktop' },
{ label: 'Mobile', value: 'mobile' },
{ label: 'Tablet', value: 'tablet' },
],
},
}),
errorOn404: Property.Checkbox({
displayName: 'Error on 404',
description: 'Fail the action if the page returns a 404 error',
required: false,
}),
errorOnRedirect: Property.Checkbox({
displayName: 'Error on Redirect',
description: 'Fail the action if the page redirects to another URL',
required: false,
}),
format: Property.StaticDropdown({
displayName: 'Response Format',
description: 'Response format: Text (simple) or JSON (structured)',
required: false,
defaultValue: 'text',
options: {
options: [
{ label: 'Text', value: 'text' },
{ label: 'JSON', value: 'json' },
],
},
}),
},
getPageTextProperties: {
...baseRequestProperties,
textFormat: Property.StaticDropdown({
displayName: 'Text Format',
description: 'Response format: Plain text, JSON (with title/description/content), or XML',
required: false,
defaultValue: 'plain',
options: {
options: [
{ label: 'Plain Text', value: 'plain' },
{ label: 'JSON', value: 'json' },
{ label: 'XML', value: 'xml' },
],
},
}),
returnLinks: Property.Checkbox({
displayName: 'Return Links',
description: 'Include links in response (only works with JSON format)',
required: false,
}),
device: Property.StaticDropdown({
displayName: 'Device Type',
description: 'Emulate specific device for responsive design testing',
required: false,
options: {
options: [
{ label: 'Desktop', value: 'desktop' },
{ label: 'Mobile', value: 'mobile' },
{ label: 'Tablet', value: 'tablet' },
],
},
}),
errorOn404: Property.Checkbox({
displayName: 'Error on 404',
description: 'Fail the action if the page returns a 404 error',
required: false,
}),
errorOnRedirect: Property.Checkbox({
displayName: 'Error on Redirect',
description: 'Fail the action if the page redirects to another URL',
required: false,
}),
},
getPageStructuredDataProperties: {
fields: Property.Object({
displayName: 'Fields to Extract',
description: 'Define fields to extract (e.g., {"title": "Product title", "price": "Product price"})',
required: true,
}),
...baseRequestProperties,
device: Property.StaticDropdown({
displayName: 'Device Type',
description: 'Emulate specific device for responsive design testing',
required: false,
options: {
options: [
{ label: 'Desktop', value: 'desktop' },
{ label: 'Mobile', value: 'mobile' },
{ label: 'Tablet', value: 'tablet' },
],
},
}),
errorOn404: Property.Checkbox({
displayName: 'Error on 404',
description: 'Fail the action if the page returns a 404 error',
required: false,
}),
errorOnRedirect: Property.Checkbox({
displayName: 'Error on Redirect',
description: 'Fail the action if the page redirects to another URL',
required: false,
}),
},
askQuestion: async (params: askQuestionParams) => {
const rawParams: Record<string, string | number | boolean | undefined> = {
api_key: params.apiKey,
url: params.url,
question: params.question,
timeout: params.timeout,
js: params.js,
js_timeout: params.jsTimeout,
wait_for: params.waitFor,
proxy: params.proxy,
country: params.country,
custom_proxy: params.customProxy,
device: params.device,
error_on_404: params.errorOn404,
error_on_redirect: params.errorOnRedirect,
js_script: params.jsScript,
format: params.format,
headers: params.headers && Array.isArray(params.headers)
? JSON.stringify(Object.fromEntries(params.headers.map(h => [h.name, h.value])))
: undefined,
};
const filtered = pickBy(
rawParams,
(value) => value !== undefined
) as Record<string, string | number | boolean>;
const queryParams: QueryParams = Object.fromEntries(
Object.entries(filtered).map(([k, v]) => [k, String(v)])
) as QueryParams;
return await httpClient.sendRequest({
method: HttpMethod.GET,
url: `${webscrapingAiCommon.baseUrl}${webscrapingAiCommon.endpoints.askQuestion}`,
queryParams,
});
},
getPageHtml: async (params: getPageHtmlParams) => {
const rawParams: Record<string, string | number | boolean | undefined> = {
api_key: params.apiKey,
url: params.url,
headers: params.headers && Array.isArray(params.headers)
? JSON.stringify(Object.fromEntries(params.headers.map(h => [h.name, h.value])))
: undefined,
timeout: params.timeout,
js: params.js,
js_timeout: params.jsTimeout,
wait_for: params.waitFor,
proxy: params.proxy,
country: params.country,
custom_proxy: params.customProxy,
device: params.device,
error_on_404: params.errorOn404,
error_on_redirect: params.errorOnRedirect,
js_script: params.jsScript,
return_script_result: params.returnScriptResult,
format: params.format,
};
const filtered = pickBy(
rawParams,
(value) => value !== undefined
) as Record<string, string | number | boolean>;
const queryParams: QueryParams = Object.fromEntries(
Object.entries(filtered).map(([k, v]) => [k, String(v)])
) as QueryParams;
return await httpClient.sendRequest({
method: HttpMethod.GET,
url: `${webscrapingAiCommon.baseUrl}${webscrapingAiCommon.endpoints.getPageHtml}`,
queryParams,
});
},
getPageText: async (params: getPageTextParams) => {
const rawParams: Record<string, string | number | boolean | undefined> = {
api_key: params.apiKey,
text_format: params.textFormat,
return_links: params.returnLinks,
url: params.url,
headers: params.headers && Array.isArray(params.headers)
? JSON.stringify(Object.fromEntries(params.headers.map(h => [h.name, h.value])))
: undefined,
timeout: params.timeout,
js: params.js,
js_timeout: params.jsTimeout,
wait_for: params.waitFor,
proxy: params.proxy,
country: params.country,
custom_proxy: params.customProxy,
device: params.device,
error_on_404: params.errorOn404,
error_on_redirect: params.errorOnRedirect,
js_script: params.jsScript,
};
const filtered = pickBy(
rawParams,
(value) => value !== undefined
) as Record<string, string | number | boolean>;
const queryParams: QueryParams = Object.fromEntries(
Object.entries(filtered).map(([k, v]) => [k, String(v)])
) as QueryParams;
return await httpClient.sendRequest({
method: HttpMethod.GET,
url: `${webscrapingAiCommon.baseUrl}${webscrapingAiCommon.endpoints.getPageText}`,
queryParams,
});
},
getPageStructuredData: async (params: getPageStructuredDataParams) => {
const rawParams: Record<string, string | number | boolean | undefined> = {
api_key: params.apiKey,
url: params.url,
headers: params.headers && Array.isArray(params.headers)
? JSON.stringify(Object.fromEntries(params.headers.map(h => [h.name, h.value])))
: undefined,
timeout: params.timeout,
js: params.js,
js_timeout: params.jsTimeout,
wait_for: params.waitFor,
proxy: params.proxy,
country: params.country,
custom_proxy: params.customProxy,
device: params.device,
error_on_404: params.errorOn404,
error_on_redirect: params.errorOnRedirect,
js_script: params.jsScript,
};
const filtered = pickBy(
rawParams,
(value) => value !== undefined
) as Record<string, string | number | boolean>;
const expandedFields: Record<string, string> = Object.fromEntries(
Object.entries(params.fields || {}).map(([key, value]) => [
`fields[${key}]`,
value,
])
);
const queryParams: QueryParams = {
...Object.fromEntries(
Object.entries(filtered).map(([k, v]) => [k, String(v)])
),
...expandedFields,
} as QueryParams;
return await httpClient.sendRequest({
method: HttpMethod.GET,
url: `${webscrapingAiCommon.baseUrl}${webscrapingAiCommon.endpoints.getStructuredData}`,
queryParams,
});
},
getAccountInformation: async ({ apiKey }: AuthenticationRequired) => {
return await httpClient.sendRequest({
method: HttpMethod.GET,
url: `${webscrapingAiCommon.baseUrl}${webscrapingAiCommon.endpoints.getAccountInfo}`,
queryParams: { api_key: apiKey },
});
},
};
type AuthenticationRequired = {
apiKey: string;
};
interface baseRequestParams extends AuthenticationRequired {
url: string;
headers?: Array<{name: string, value: string}>;
timeout?: number;
js?: boolean;
jsTimeout?: number;
waitFor?: string;
proxy?: 'datacenter' | 'residential';
country?:
| 'us'
| 'gb'
| 'de'
| 'it'
| 'fr'
| 'ca'
| 'es'
| 'ru'
| 'jp'
| 'kr'
| 'in';
customProxy?: string;
jsScript?: string;
}
interface askQuestionParams extends baseRequestParams {
question: string;
device?: 'desktop' | 'mobile' | 'tablet';
errorOn404?: boolean;
errorOnRedirect?: boolean;
format?: 'json' | 'text';
}
interface getPageHtmlParams extends baseRequestParams {
returnScriptResult?: boolean;
device?: 'desktop' | 'mobile' | 'tablet';
errorOn404?: boolean;
errorOnRedirect?: boolean;
format?: 'json' | 'text';
}
interface getPageTextParams extends baseRequestParams {
textFormat?: 'plain' | 'xml' | 'json';
returnLinks?: boolean;
device?: 'desktop' | 'mobile' | 'tablet';
errorOn404?: boolean;
errorOnRedirect?: boolean;
}
interface getPageStructuredDataParams extends baseRequestParams {
fields: Record<string, string>;
device?: 'desktop' | 'mobile' | 'tablet';
errorOn404?: boolean;
errorOnRedirect?: boolean;
}

View File

@@ -0,0 +1,20 @@
{
"extends": "../../../../tsconfig.base.json",
"compilerOptions": {
"module": "commonjs",
"forceConsistentCasingInFileNames": true,
"strict": true,
"importHelpers": true,
"noImplicitOverride": true,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true,
"noPropertyAccessFromIndexSignature": true
},
"files": [],
"include": [],
"references": [
{
"path": "./tsconfig.lib.json"
}
]
}

View File

@@ -0,0 +1,9 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
"outDir": "../../../../dist/out-tsc",
"declaration": true,
"types": ["node"]
},
"include": ["src/**/*.ts"]
}