diff --git a/blueprints/firecrawl/docker-compose.yml b/blueprints/firecrawl/docker-compose.yml new file mode 100644 index 00000000..f4181cfc --- /dev/null +++ b/blueprints/firecrawl/docker-compose.yml @@ -0,0 +1,138 @@ +name: firecrawl + +x-common-service: &common-service + image: ghcr.io/firecrawl/firecrawl:latest + ulimits: + nofile: + soft: 65535 + hard: 65535 + extra_hosts: + - "host.docker.internal:host-gateway" + +x-common-env: &common-env + REDIS_URL: ${REDIS_URL:-redis://redis:6379} + REDIS_RATE_LIMIT_URL: ${REDIS_RATE_LIMIT_URL:-redis://redis:6379} + PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape} + NUQ_DATABASE_URL: ${NUQ_DATABASE_URL:-postgres://postgres:postgres@nuq-postgres:5432/postgres} + USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-} + OPENAI_API_KEY: ${OPENAI_API_KEY:-} + OPENAI_BASE_URL: ${OPENAI_BASE_URL:-} + MODEL_NAME: ${MODEL_NAME:-} + MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-} + OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-} + SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL:-} + BULL_AUTH_KEY: ${BULL_AUTH_KEY:-} + TEST_API_KEY: ${TEST_API_KEY:-} + POSTHOG_API_KEY: ${POSTHOG_API_KEY:-} + POSTHOG_HOST: ${POSTHOG_HOST:-} + SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN:-} + SUPABASE_URL: ${SUPABASE_URL:-} + SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN:-} + SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL:-} + SERPER_API_KEY: ${SERPER_API_KEY:-} + SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY:-} + LOGGING_LEVEL: ${LOGGING_LEVEL:-INFO} + PROXY_SERVER: ${PROXY_SERVER:-} + PROXY_USERNAME: ${PROXY_USERNAME:-} + PROXY_PASSWORD: ${PROXY_PASSWORD:-} + NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal} + SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT:-} + SEARXNG_ENGINES: ${SEARXNG_ENGINES:-} + SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-} + +services: + playwright-service: + image: ghcr.io/firecrawl/playwright-service:latest + shm_size: "1g" + restart: unless-stopped + environment: + PORT: 3000 + PROXY_SERVER: ${PROXY_SERVER:-} + PROXY_USERNAME: ${PROXY_USERNAME:-} + PROXY_PASSWORD: ${PROXY_PASSWORD:-} + BLOCK_MEDIA: ${BLOCK_MEDIA:-} + NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal} + + api: + <<: *common-service + restart: unless-stopped + ports: + - "3002" + environment: + <<: *common-env + HOST: "0.0.0.0" + PORT: 3002 + WORKER_PORT: 3005 + ENV: local + depends_on: + redis: + condition: service_started + playwright-service: + condition: service_started + nuq-postgres: + condition: service_healthy + command: node --import ./dist/src/otel.js dist/src/index.js + + worker: + <<: *common-service + restart: unless-stopped + environment: + <<: *common-env + HOST: "0.0.0.0" + PORT: 3005 + ENV: local + depends_on: + redis: + condition: service_started + nuq-postgres: + condition: service_healthy + command: node --import ./dist/src/otel.js dist/src/services/queue-worker.js + + extract-worker: + <<: *common-service + restart: unless-stopped + environment: + <<: *common-env + HOST: "0.0.0.0" + PORT: 3004 + ENV: local + depends_on: + redis: + condition: service_started + nuq-postgres: + condition: service_healthy + command: node --import ./dist/src/otel.js dist/src/services/extract-worker.js + + redis: + image: redis:alpine + command: redis-server --bind 0.0.0.0 + + nuq-postgres: + build: + context: "https://github.com/firecrawl/firecrawl.git#main:apps/nuq-postgres" + dockerfile: Dockerfile + restart: unless-stopped + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + volumes: + - nuq_pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + start_period: 30s + interval: 10s + timeout: 5s + retries: 10 + networks: + - backend + - dokploy-network + +networks: + backend: + driver: bridge + dokploy-network: + external: true + +volumes: + nuq_pg_data: \ No newline at end of file diff --git a/blueprints/firecrawl/firecrawl.svg b/blueprints/firecrawl/firecrawl.svg new file mode 100644 index 00000000..34dba9b7 --- /dev/null +++ b/blueprints/firecrawl/firecrawl.svg @@ -0,0 +1,3 @@ + + + diff --git a/blueprints/firecrawl/template.toml b/blueprints/firecrawl/template.toml new file mode 100644 index 00000000..999ec4ca --- /dev/null +++ b/blueprints/firecrawl/template.toml @@ -0,0 +1,65 @@ +[variables] +main_domain = "${domain}" +openai_api_key = "${OPENAI_API_KEY}" +openai_base_url = "${OPENAI_BASE_URL}" +ollama_base_url = "${OLLAMA_BASE_URL}" +model_name = "${MODEL_NAME}" +model_embedding_name = "${MODEL_EMBEDDING_NAME}" +proxy_server = "${PROXY_SERVER}" +proxy_username = "${PROXY_USERNAME}" +proxy_password = "${PROXY_PASSWORD}" +searxng_endpoint = "${SEARXNG_ENDPOINT}" +searxng_engines = "${SEARXNG_ENGINES}" +searxng_categories = "${SEARXNG_CATEGORIES}" +supabase_anon_token = "${SUPABASE_ANON_TOKEN}" +supabase_url = "${SUPABASE_URL}" +supabase_service_token = "${SUPABASE_SERVICE_TOKEN}" +test_api_key = "${TEST_API_KEY}" +bull_auth_key = "${password:32}" +llamaparse_api_key = "${LLAMAPARSE_API_KEY}" +slack_webhook_url = "${SLACK_WEBHOOK_URL}" +posthog_api_key = "${POSTHOG_API_KEY}" +posthog_host = "${POSTHOG_HOST}" +max_cpu = "${MAX_CPU}" +max_ram = "${MAX_RAM}" + +[config] +env = [ + "PORT=3002", + "HOST=0.0.0.0", + "USE_DB_AUTHENTICATION=false", + "BULL_AUTH_KEY=${bull_auth_key}", + "PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape", + "REDIS_URL=redis://redis:6379", + "REDIS_RATE_LIMIT_URL=redis://redis:6379", + "OPENAI_API_KEY=${openai_api_key}", + "OPENAI_BASE_URL=${openai_base_url}", + "OLLAMA_BASE_URL=${ollama_base_url}", + "MODEL_NAME=${model_name}", + "MODEL_EMBEDDING_NAME=${model_embedding_name}", + "PROXY_SERVER=${proxy_server}", + "PROXY_USERNAME=${proxy_username}", + "PROXY_PASSWORD=${proxy_password}", + "SEARXNG_ENDPOINT=${searxng_endpoint}", + "SEARXNG_ENGINES=${searxng_engines}", + "SEARXNG_CATEGORIES=${searxng_categories}", + "SUPABASE_ANON_TOKEN=${supabase_anon_token}", + "SUPABASE_URL=${supabase_url}", + "SUPABASE_SERVICE_TOKEN=${supabase_service_token}", + "TEST_API_KEY=${test_api_key}", + "LLAMAPARSE_API_KEY=${llamaparse_api_key}", + "SLACK_WEBHOOK_URL=${slack_webhook_url}", + "POSTHOG_API_KEY=${posthog_api_key}", + "POSTHOG_HOST=${posthog_host}", + "MAX_CPU=0.8", + "MAX_RAM=0.8" +] +mounts = [] + +[[config.domains]] +serviceName = "api" +port = 3002 +host = "${main_domain}" +path = "/" + + \ No newline at end of file diff --git a/meta.json b/meta.json index 34146662..161a8ae0 100644 --- a/meta.json +++ b/meta.json @@ -1985,6 +1985,25 @@ "self-hosted" ] }, + { + "id": "firecrawl", + "name": "Firecrawl", + "version": "latest", + "description": "Firecrawl is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data. It can crawl all accessible subpages and provide clean data for each.", + "logo": "firecrawl.svg", + "links": { + "github": "https://github.com/firecrawl/firecrawl", + "website": "https://firecrawl.dev", + "docs": "https://github.com/firecrawl/firecrawl" + }, + "tags": [ + "api", + "crawler", + "scraping", + "data-extraction", + "llm" + ] + }, { "id": "fivem", "name": "FiveM Server",