From bac77633584fe9838b3be23b3b4ef365de312be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jainil=20Prajapati=20=F0=9F=AA=90?= <86187588+jaainil@users.noreply.github.com> Date: Sat, 23 Aug 2025 10:30:00 +0530 Subject: [PATCH] Updated karakeep fixed crawling error (#278) * Updated karakeep fixed crawling error * fix * fix * Updated Meta.json --- blueprints/karakeep/docker-compose.yml | 59 ++++------- blueprints/karakeep/template.toml | 129 +++++++++++++++++++++---- 2 files changed, 130 insertions(+), 58 deletions(-) diff --git a/blueprints/karakeep/docker-compose.yml b/blueprints/karakeep/docker-compose.yml index dd79e704..287fddef 100644 --- a/blueprints/karakeep/docker-compose.yml +++ b/blueprints/karakeep/docker-compose.yml @@ -1,54 +1,33 @@ +version: "3.8" services: web: image: ghcr.io/karakeep-app/karakeep:${KARAKEEP_VERSION:-release} restart: unless-stopped volumes: - # By default, the data is stored in a docker volume called "karakeep_data". - # If you want to mount a custom directory, change the volume mapping to: - # - /path/to/your/directory:/data - - karakeep_data:/data - ports: - - "3000" + - data:/data environment: - DATA_DIR: ${DATA_DIR:-/data} - DISABLE_SIGNUPS: ${DISABLE_SIGNUPS:-false} - NEXTAUTH_SECRET: ${NEXTAUTH_SECRET} - NEXTAUTH_URL: ${NEXTAUTH_URL} - # Meilisearch config - MEILI_ADDR: ${MEILI_ADDR:-http://meilisearch:7700} - MEILI_MASTER_KEY: ${MEILI_MASTER_KEY} - # Chrome config - BROWSER_WEB_URL: ${BROWSER_WEB_URL:-ws://chrome:3000} - BROWSER_CONNECT_ONDEMAND: ${BROWSER_CONNECT_ONDEMAND:-true} - depends_on: - - chrome - - meilisearch - + MEILI_ADDR: http://meilisearch:7700 + BROWSER_WEB_URL: http://chrome:9222 + DATA_DIR: /data + NEXTAUTH_SECRET: your_secure_key chrome: - image: browserless/chrome:1-chrome-stable + image: gcr.io/zenika-hub/alpine-chrome:124 restart: unless-stopped - ports: - - "3000" - environment: - - DEBUG=browserless* - - MAX_CONCURRENT_SESSIONS=10 - - CONNECTION_TIMEOUT=60000 - - MAX_QUEUE_LENGTH=10 - - PORT=3000 - - CHROME_REFRESH_TIME=2147483647 - + command: + - --no-sandbox + - --disable-gpu + - --disable-dev-shm-usage + - --remote-debugging-address=0.0.0.0 + - --remote-debugging-port=9222 + - --hide-scrollbars meilisearch: - image: getmeili/meilisearch:v1.10 + image: getmeili/meilisearch:v1.13.3 restart: unless-stopped - ports: - - "7700" environment: - MEILI_MASTER_KEY: ${MEILI_MASTER_KEY} - MEILI_NO_ANALYTICS: true - MEILI_ENV: production + MEILI_NO_ANALYTICS: "true" volumes: - - meilisearch_data:/meili_data + - meilisearch:/meili_data volumes: - karakeep_data: - meilisearch_data: + meilisearch: + data: diff --git a/blueprints/karakeep/template.toml b/blueprints/karakeep/template.toml index 751cf8ec..285f6a1b 100644 --- a/blueprints/karakeep/template.toml +++ b/blueprints/karakeep/template.toml @@ -1,25 +1,118 @@ [variables] main_domain = "${domain}" -nextauth_secret = "${password:64}" -meili_master_key = "${password:32}" - -[config.env] -KARAKEEP_VERSION = "release" -NEXTAUTH_SECRET = "${nextauth_secret}" -MEILI_MASTER_KEY = "${meili_master_key}" -NEXTAUTH_URL = "http://${main_domain}" -DATA_DIR = "/data" -DISABLE_SIGNUPS = "false" -# Meilisearch config -MEILI_ADDR = "http://meilisearch:7700" -# Chrome config -BROWSER_WEB_URL = "ws://chrome:9222" -BROWSER_CONNECT_ONDEMAND = "true" +karakeep_version = "release" [config] -mounts = [] - [[config.domains]] serviceName = "web" port = 3000 -host = "${main_domain}" \ No newline at end of file +host = "${main_domain}" + +[config.env] +# Core required configuration +"NEXTAUTH_URL" = "http://${main_domain}" +# "NEXTAUTH_SECRET" = "${password:36}" + +# DATA_DIR is already set to /data by default in docker-compose +# "DATA_DIR" = "/data" + +# Optional: common configs +# "ASSETS_DIR" = "" +"MEILI_MASTER_KEY" = "${password:36}" +# "MAX_ASSET_SIZE_MB" = "50" +# "DISABLE_NEW_RELEASE_CHECK" = "false" +# "PROMETHEUS_AUTH_TOKEN" = "" +# "RATE_LIMITING_ENABLED" = "false" +# "DB_WAL_MODE" = "false" +# "SEARCH_NUM_WORKERS" = "1" +# "WEBHOOK_NUM_WORKERS" = "1" +# "ASSET_PREPROCESSING_NUM_WORKERS" = "1" +# "RULE_ENGINE_NUM_WORKERS" = "1" + +# --- Asset Storage (S3) --- +# "ASSET_STORE_S3_ENDPOINT" = "" +# "ASSET_STORE_S3_REGION" = "" +# "ASSET_STORE_S3_BUCKET" = "" +# "ASSET_STORE_S3_ACCESS_KEY_ID" = "" +# "ASSET_STORE_S3_SECRET_ACCESS_KEY" = "" +# "ASSET_STORE_S3_FORCE_PATH_STYLE" = "false" + +# --- Authentication / Signup --- +# "DISABLE_SIGNUPS" = "false" +# "DISABLE_PASSWORD_AUTH" = "false" +# "EMAIL_VERIFICATION_REQUIRED" = "false" +# "OAUTH_WELLKNOWN_URL" = "" +# "OAUTH_CLIENT_SECRET" = "" +# "OAUTH_CLIENT_ID" = "" +# "OAUTH_SCOPE" = "openid email profile" +# "OAUTH_PROVIDER_NAME" = "Custom Provider" +# "OAUTH_ALLOW_DANGEROUS_EMAIL_ACCOUNT_LINKING" = "false" +# "OAUTH_TIMEOUT" = "3500" + +# --- Inference Configs --- +# "OPENAI_API_KEY" = "" +# "OPENAI_BASE_URL" = "" +# "OLLAMA_BASE_URL" = "" +# "OLLAMA_KEEP_ALIVE" = "" +# "INFERENCE_TEXT_MODEL" = "gpt-4.1-mini" +# "INFERENCE_IMAGE_MODEL" = "gpt-4o-mini" +# "EMBEDDING_TEXT_MODEL" = "text-embedding-3-small" +# "INFERENCE_CONTEXT_LENGTH" = "2048" +# "INFERENCE_LANG" = "english" +# "INFERENCE_NUM_WORKERS" = "1" +# "INFERENCE_ENABLE_AUTO_TAGGING" = "true" +# "INFERENCE_ENABLE_AUTO_SUMMARIZATION" = "false" +# "INFERENCE_JOB_TIMEOUT_SEC" = "30" +# "INFERENCE_FETCH_TIMEOUT_SEC" = "300" +# "INFERENCE_OUTPUT_SCHEMA" = "structured" + +# --- Crawler Configs --- +# "CRAWLER_NUM_WORKERS" = "1" +# "BROWSER_WEB_URL" = "http://chrome:9222" +# "BROWSER_WEBSOCKET_URL" = "" +# "BROWSER_CONNECT_ONDEMAND" = "false" +# "CRAWLER_DOWNLOAD_BANNER_IMAGE" = "true" +# "CRAWLER_STORE_SCREENSHOT" = "true" +# "CRAWLER_FULL_PAGE_SCREENSHOT" = "false" +# "CRAWLER_SCREENSHOT_TIMEOUT_SEC" = "5" +# "CRAWLER_FULL_PAGE_ARCHIVE" = "false" +# "CRAWLER_JOB_TIMEOUT_SEC" = "60" +# "CRAWLER_NAVIGATE_TIMEOUT_SEC" = "30" +# "CRAWLER_VIDEO_DOWNLOAD" = "false" +# "CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE" = "50" +# "CRAWLER_VIDEO_DOWNLOAD_TIMEOUT_SEC" = "600" +# "CRAWLER_ENABLE_ADBLOCKER" = "true" +# "CRAWLER_YTDLP_ARGS" = "[]" + +# --- OCR Configs --- +# "OCR_CACHE_DIR" = "" +# "OCR_LANGS" = "eng" +# "OCR_CONFIDENCE_THRESHOLD" = "50" + +# --- Webhook Configs --- +# "WEBHOOK_TIMEOUT_SEC" = "5" +# "WEBHOOK_RETRY_TIMES" = "3" +# "WEBHOOK_TOKEN" = "" + +# --- SMTP Configuration --- +# "SMTP_HOST" = "" +# "SMTP_PORT" = "587" +# "SMTP_SECURE" = "false" +# "SMTP_USER" = "" +# "SMTP_PASSWORD" = "" +# "SMTP_FROM" = "" + +# --- Proxy Configuration --- +# "CRAWLER_HTTP_PROXY" = "" +# "CRAWLER_HTTPS_PROXY" = "" +# "CRAWLER_NO_PROXY" = "" + +[[config.mounts]] +# Persistent data directory for Karakeep +volumeName = "data" +mountPath = "/data" + +[[config.mounts]] +# Meilisearch data directory +volumeName = "meilisearch" +mountPath = "/meili_data"