From 1e75877fb931726fa881ef754cc23386005e2cb3 Mon Sep 17 00:00:00 2001 From: Harikrishnan Dhanasekaran Date: Sun, 5 Oct 2025 09:33:48 +0530 Subject: [PATCH] Add Firecrawl Docker Compose Template -#422 (#423) * fix: run api and workers in separate containers to resolve port conflict Previously, the API and worker services ran together and tried to bind to the same port, causing repeated EADDRINUSE errors and container crashes. This update splits the API, queue worker, extract worker, and nuq workers into individual service containers, each with a unique port and process. Fixes API not starting, stabilizes the deployment, and enables concurrent service operation. * updated the meta.json for the build issue * updated the meta.json for the logo path * Update blueprints/firecrawl/docker-compose.yml --------- Co-authored-by: Mauricio Siu <47042324+Siumauricio@users.noreply.github.com> --- blueprints/firecrawl/docker-compose.yml | 138 ++++++++++++++++++++++++ blueprints/firecrawl/firecrawl.svg | 3 + blueprints/firecrawl/template.toml | 65 +++++++++++ meta.json | 19 ++++ 4 files changed, 225 insertions(+) create mode 100644 blueprints/firecrawl/docker-compose.yml create mode 100644 blueprints/firecrawl/firecrawl.svg create mode 100644 blueprints/firecrawl/template.toml diff --git a/blueprints/firecrawl/docker-compose.yml b/blueprints/firecrawl/docker-compose.yml new file mode 100644 index 00000000..f4181cfc --- /dev/null +++ b/blueprints/firecrawl/docker-compose.yml @@ -0,0 +1,138 @@ +name: firecrawl + +x-common-service: &common-service + image: ghcr.io/firecrawl/firecrawl:latest + ulimits: + nofile: + soft: 65535 + hard: 65535 + extra_hosts: + - "host.docker.internal:host-gateway" + +x-common-env: &common-env + REDIS_URL: ${REDIS_URL:-redis://redis:6379} + REDIS_RATE_LIMIT_URL: ${REDIS_RATE_LIMIT_URL:-redis://redis:6379} + PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape} + NUQ_DATABASE_URL: ${NUQ_DATABASE_URL:-postgres://postgres:postgres@nuq-postgres:5432/postgres} + USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-} + OPENAI_API_KEY: ${OPENAI_API_KEY:-} + OPENAI_BASE_URL: ${OPENAI_BASE_URL:-} + MODEL_NAME: ${MODEL_NAME:-} + MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-} + OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-} + SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL:-} + BULL_AUTH_KEY: ${BULL_AUTH_KEY:-} + TEST_API_KEY: ${TEST_API_KEY:-} + POSTHOG_API_KEY: ${POSTHOG_API_KEY:-} + POSTHOG_HOST: ${POSTHOG_HOST:-} + SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN:-} + SUPABASE_URL: ${SUPABASE_URL:-} + SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN:-} + SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL:-} + SERPER_API_KEY: ${SERPER_API_KEY:-} + SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY:-} + LOGGING_LEVEL: ${LOGGING_LEVEL:-INFO} + PROXY_SERVER: ${PROXY_SERVER:-} + PROXY_USERNAME: ${PROXY_USERNAME:-} + PROXY_PASSWORD: ${PROXY_PASSWORD:-} + NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal} + SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT:-} + SEARXNG_ENGINES: ${SEARXNG_ENGINES:-} + SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-} + +services: + playwright-service: + image: ghcr.io/firecrawl/playwright-service:latest + shm_size: "1g" + restart: unless-stopped + environment: + PORT: 3000 + PROXY_SERVER: ${PROXY_SERVER:-} + PROXY_USERNAME: ${PROXY_USERNAME:-} + PROXY_PASSWORD: ${PROXY_PASSWORD:-} + BLOCK_MEDIA: ${BLOCK_MEDIA:-} + NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal} + + api: + <<: *common-service + restart: unless-stopped + ports: + - "3002" + environment: + <<: *common-env + HOST: "0.0.0.0" + PORT: 3002 + WORKER_PORT: 3005 + ENV: local + depends_on: + redis: + condition: service_started + playwright-service: + condition: service_started + nuq-postgres: + condition: service_healthy + command: node --import ./dist/src/otel.js dist/src/index.js + + worker: + <<: *common-service + restart: unless-stopped + environment: + <<: *common-env + HOST: "0.0.0.0" + PORT: 3005 + ENV: local + depends_on: + redis: + condition: service_started + nuq-postgres: + condition: service_healthy + command: node --import ./dist/src/otel.js dist/src/services/queue-worker.js + + extract-worker: + <<: *common-service + restart: unless-stopped + environment: + <<: *common-env + HOST: "0.0.0.0" + PORT: 3004 + ENV: local + depends_on: + redis: + condition: service_started + nuq-postgres: + condition: service_healthy + command: node --import ./dist/src/otel.js dist/src/services/extract-worker.js + + redis: + image: redis:alpine + command: redis-server --bind 0.0.0.0 + + nuq-postgres: + build: + context: "https://github.com/firecrawl/firecrawl.git#main:apps/nuq-postgres" + dockerfile: Dockerfile + restart: unless-stopped + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + volumes: + - nuq_pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + start_period: 30s + interval: 10s + timeout: 5s + retries: 10 + networks: + - backend + - dokploy-network + +networks: + backend: + driver: bridge + dokploy-network: + external: true + +volumes: + nuq_pg_data: \ No newline at end of file diff --git a/blueprints/firecrawl/firecrawl.svg b/blueprints/firecrawl/firecrawl.svg new file mode 100644 index 00000000..34dba9b7 --- /dev/null +++ b/blueprints/firecrawl/firecrawl.svg @@ -0,0 +1,3 @@ + + + diff --git a/blueprints/firecrawl/template.toml b/blueprints/firecrawl/template.toml new file mode 100644 index 00000000..999ec4ca --- /dev/null +++ b/blueprints/firecrawl/template.toml @@ -0,0 +1,65 @@ +[variables] +main_domain = "${domain}" +openai_api_key = "${OPENAI_API_KEY}" +openai_base_url = "${OPENAI_BASE_URL}" +ollama_base_url = "${OLLAMA_BASE_URL}" +model_name = "${MODEL_NAME}" +model_embedding_name = "${MODEL_EMBEDDING_NAME}" +proxy_server = "${PROXY_SERVER}" +proxy_username = "${PROXY_USERNAME}" +proxy_password = "${PROXY_PASSWORD}" +searxng_endpoint = "${SEARXNG_ENDPOINT}" +searxng_engines = "${SEARXNG_ENGINES}" +searxng_categories = "${SEARXNG_CATEGORIES}" +supabase_anon_token = "${SUPABASE_ANON_TOKEN}" +supabase_url = "${SUPABASE_URL}" +supabase_service_token = "${SUPABASE_SERVICE_TOKEN}" +test_api_key = "${TEST_API_KEY}" +bull_auth_key = "${password:32}" +llamaparse_api_key = "${LLAMAPARSE_API_KEY}" +slack_webhook_url = "${SLACK_WEBHOOK_URL}" +posthog_api_key = "${POSTHOG_API_KEY}" +posthog_host = "${POSTHOG_HOST}" +max_cpu = "${MAX_CPU}" +max_ram = "${MAX_RAM}" + +[config] +env = [ + "PORT=3002", + "HOST=0.0.0.0", + "USE_DB_AUTHENTICATION=false", + "BULL_AUTH_KEY=${bull_auth_key}", + "PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape", + "REDIS_URL=redis://redis:6379", + "REDIS_RATE_LIMIT_URL=redis://redis:6379", + "OPENAI_API_KEY=${openai_api_key}", + "OPENAI_BASE_URL=${openai_base_url}", + "OLLAMA_BASE_URL=${ollama_base_url}", + "MODEL_NAME=${model_name}", + "MODEL_EMBEDDING_NAME=${model_embedding_name}", + "PROXY_SERVER=${proxy_server}", + "PROXY_USERNAME=${proxy_username}", + "PROXY_PASSWORD=${proxy_password}", + "SEARXNG_ENDPOINT=${searxng_endpoint}", + "SEARXNG_ENGINES=${searxng_engines}", + "SEARXNG_CATEGORIES=${searxng_categories}", + "SUPABASE_ANON_TOKEN=${supabase_anon_token}", + "SUPABASE_URL=${supabase_url}", + "SUPABASE_SERVICE_TOKEN=${supabase_service_token}", + "TEST_API_KEY=${test_api_key}", + "LLAMAPARSE_API_KEY=${llamaparse_api_key}", + "SLACK_WEBHOOK_URL=${slack_webhook_url}", + "POSTHOG_API_KEY=${posthog_api_key}", + "POSTHOG_HOST=${posthog_host}", + "MAX_CPU=0.8", + "MAX_RAM=0.8" +] +mounts = [] + +[[config.domains]] +serviceName = "api" +port = 3002 +host = "${main_domain}" +path = "/" + + \ No newline at end of file diff --git a/meta.json b/meta.json index 34146662..161a8ae0 100644 --- a/meta.json +++ b/meta.json @@ -1985,6 +1985,25 @@ "self-hosted" ] }, + { + "id": "firecrawl", + "name": "Firecrawl", + "version": "latest", + "description": "Firecrawl is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data. It can crawl all accessible subpages and provide clean data for each.", + "logo": "firecrawl.svg", + "links": { + "github": "https://github.com/firecrawl/firecrawl", + "website": "https://firecrawl.dev", + "docs": "https://github.com/firecrawl/firecrawl" + }, + "tags": [ + "api", + "crawler", + "scraping", + "data-extraction", + "llm" + ] + }, { "id": "fivem", "name": "FiveM Server",