mirror of
https://github.com/Dokploy/dokploy.git
synced 2026-06-15 20:25:23 +02:00
Merge pull request #4033 from Dokploy/feat/improve-update-process-to-validate-dokploy-services
feat: enhance web server update process with health checks
This commit is contained in:
@@ -1,4 +1,11 @@
|
|||||||
import { HardDriveDownload, Loader2 } from "lucide-react";
|
import {
|
||||||
|
AlertTriangle,
|
||||||
|
CheckCircle2,
|
||||||
|
HardDriveDownload,
|
||||||
|
Loader2,
|
||||||
|
RefreshCw,
|
||||||
|
XCircle,
|
||||||
|
} from "lucide-react";
|
||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import { toast } from "sonner";
|
import { toast } from "sonner";
|
||||||
import {
|
import {
|
||||||
@@ -15,11 +22,70 @@ import {
|
|||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { api } from "@/utils/api";
|
import { api } from "@/utils/api";
|
||||||
|
|
||||||
|
type ServiceStatus = {
|
||||||
|
status: "healthy" | "unhealthy";
|
||||||
|
message?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type HealthResult = {
|
||||||
|
postgres: ServiceStatus;
|
||||||
|
redis: ServiceStatus;
|
||||||
|
traefik: ServiceStatus;
|
||||||
|
};
|
||||||
|
|
||||||
|
type ModalState = "idle" | "checking" | "results" | "updating";
|
||||||
|
|
||||||
|
const ServiceStatusItem = ({
|
||||||
|
name,
|
||||||
|
service,
|
||||||
|
}: {
|
||||||
|
name: string;
|
||||||
|
service: ServiceStatus;
|
||||||
|
}) => (
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
{service.status === "healthy" ? (
|
||||||
|
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||||
|
) : (
|
||||||
|
<XCircle className="h-4 w-4 text-red-500" />
|
||||||
|
)}
|
||||||
|
<span className="text-sm font-medium">{name}</span>
|
||||||
|
{service.status === "unhealthy" && service.message && (
|
||||||
|
<span className="text-xs text-muted-foreground">— {service.message}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
|
||||||
export const UpdateWebServer = () => {
|
export const UpdateWebServer = () => {
|
||||||
const [updating, setUpdating] = useState(false);
|
const [modalState, setModalState] = useState<ModalState>("idle");
|
||||||
const [open, setOpen] = useState(false);
|
const [open, setOpen] = useState(false);
|
||||||
|
const [healthResult, setHealthResult] = useState<HealthResult | null>(null);
|
||||||
|
|
||||||
const { mutateAsync: updateServer } = api.settings.updateServer.useMutation();
|
const { mutateAsync: updateServer } = api.settings.updateServer.useMutation();
|
||||||
|
const { refetch: checkHealth } =
|
||||||
|
api.settings.checkInfrastructureHealth.useQuery(undefined, {
|
||||||
|
enabled: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
const handleVerify = async () => {
|
||||||
|
setModalState("checking");
|
||||||
|
setHealthResult(null);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await checkHealth();
|
||||||
|
if (result.data) {
|
||||||
|
setHealthResult(result.data);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// checkHealth failed entirely
|
||||||
|
}
|
||||||
|
setModalState("results");
|
||||||
|
};
|
||||||
|
|
||||||
|
const allHealthy =
|
||||||
|
healthResult &&
|
||||||
|
healthResult.postgres.status === "healthy" &&
|
||||||
|
healthResult.redis.status === "healthy" &&
|
||||||
|
healthResult.traefik.status === "healthy";
|
||||||
|
|
||||||
const checkIsUpdateFinished = async () => {
|
const checkIsUpdateFinished = async () => {
|
||||||
try {
|
try {
|
||||||
@@ -33,28 +99,24 @@ export const UpdateWebServer = () => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
// Allow seeing the toast before reloading
|
|
||||||
window.location.reload();
|
window.location.reload();
|
||||||
}, 2000);
|
}, 2000);
|
||||||
} catch {
|
} catch {
|
||||||
// Delay each request
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||||
// Keep running until it returns 200
|
|
||||||
void checkIsUpdateFinished();
|
void checkIsUpdateFinished();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleConfirm = async () => {
|
const handleConfirm = async () => {
|
||||||
try {
|
try {
|
||||||
setUpdating(true);
|
setModalState("updating");
|
||||||
await updateServer();
|
await updateServer();
|
||||||
|
|
||||||
// Give some time for docker service restart before starting to check status
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 8000));
|
await new Promise((resolve) => setTimeout(resolve, 8000));
|
||||||
|
|
||||||
await checkIsUpdateFinished();
|
await checkIsUpdateFinished();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
setUpdating(false);
|
setModalState("results");
|
||||||
console.error("Error updating server:", error);
|
console.error("Error updating server:", error);
|
||||||
toast.error(
|
toast.error(
|
||||||
"An error occurred while updating the server, please try again.",
|
"An error occurred while updating the server, please try again.",
|
||||||
@@ -62,6 +124,14 @@ export const UpdateWebServer = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleClose = () => {
|
||||||
|
if (modalState !== "updating") {
|
||||||
|
setOpen(false);
|
||||||
|
setModalState("idle");
|
||||||
|
setHealthResult(null);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<AlertDialog open={open}>
|
<AlertDialog open={open}>
|
||||||
<AlertDialogTrigger asChild>
|
<AlertDialogTrigger asChild>
|
||||||
@@ -81,36 +151,111 @@ export const UpdateWebServer = () => {
|
|||||||
<AlertDialogContent>
|
<AlertDialogContent>
|
||||||
<AlertDialogHeader>
|
<AlertDialogHeader>
|
||||||
<AlertDialogTitle>
|
<AlertDialogTitle>
|
||||||
{updating
|
{modalState === "idle" && "Are you absolutely sure?"}
|
||||||
? "Server update in progress"
|
{modalState === "checking" && "Verifying Services..."}
|
||||||
: "Are you absolutely sure?"}
|
{modalState === "results" &&
|
||||||
|
(allHealthy ? "Ready to Update" : "Service Issues Detected")}
|
||||||
|
{modalState === "updating" && "Server update in progress"}
|
||||||
</AlertDialogTitle>
|
</AlertDialogTitle>
|
||||||
<AlertDialogDescription>
|
<AlertDialogDescription asChild>
|
||||||
{updating ? (
|
<div>
|
||||||
<span className="flex items-center gap-1">
|
{modalState === "idle" && (
|
||||||
<Loader2 className="animate-spin" />
|
<span>
|
||||||
The server is being updated, please wait...
|
This will update the web server to the new version. You will
|
||||||
</span>
|
not be able to use the panel during the update process. The
|
||||||
) : (
|
page will be reloaded once the update is finished.
|
||||||
<>
|
<br />
|
||||||
This action cannot be undone. This will update the web server to
|
<br />
|
||||||
the new version. You will not be able to use the panel during
|
We recommend verifying that all services are running before
|
||||||
the update process. The page will be reloaded once the update is
|
updating.
|
||||||
finished.
|
</span>
|
||||||
</>
|
)}
|
||||||
)}
|
|
||||||
|
{modalState === "checking" && (
|
||||||
|
<span className="flex items-center gap-2">
|
||||||
|
<Loader2 className="animate-spin h-4 w-4" />
|
||||||
|
Checking PostgreSQL, Redis and Traefik...
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{modalState === "results" && healthResult && (
|
||||||
|
<div className="flex flex-col gap-3">
|
||||||
|
<div className="flex flex-col gap-2">
|
||||||
|
<ServiceStatusItem
|
||||||
|
name="PostgreSQL"
|
||||||
|
service={healthResult.postgres}
|
||||||
|
/>
|
||||||
|
<ServiceStatusItem
|
||||||
|
name="Redis"
|
||||||
|
service={healthResult.redis}
|
||||||
|
/>
|
||||||
|
<ServiceStatusItem
|
||||||
|
name="Traefik"
|
||||||
|
service={healthResult.traefik}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{!allHealthy && (
|
||||||
|
<div className="flex items-start gap-2 rounded-md border border-yellow-500/30 bg-yellow-500/10 p-3">
|
||||||
|
<AlertTriangle className="h-4 w-4 text-yellow-500 mt-0.5 shrink-0" />
|
||||||
|
<span className="text-sm text-yellow-600 dark:text-yellow-400">
|
||||||
|
Some services are not healthy. You can still proceed
|
||||||
|
with the update.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{allHealthy && (
|
||||||
|
<span className="text-sm text-muted-foreground">
|
||||||
|
All services are running. You can proceed with the update.
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{modalState === "results" && !healthResult && (
|
||||||
|
<div className="flex items-start gap-2 rounded-md border border-yellow-500/30 bg-yellow-500/10 p-3">
|
||||||
|
<AlertTriangle className="h-4 w-4 text-yellow-500 mt-0.5 shrink-0" />
|
||||||
|
<span className="text-sm text-yellow-600 dark:text-yellow-400">
|
||||||
|
Could not verify services. You can still proceed with the
|
||||||
|
update.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{modalState === "updating" && (
|
||||||
|
<span className="flex items-center gap-2">
|
||||||
|
<Loader2 className="animate-spin h-4 w-4" />
|
||||||
|
The server is being updated, please wait...
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
</AlertDialogDescription>
|
</AlertDialogDescription>
|
||||||
</AlertDialogHeader>
|
</AlertDialogHeader>
|
||||||
{!updating && (
|
{modalState === "idle" && (
|
||||||
<AlertDialogFooter>
|
<AlertDialogFooter>
|
||||||
<AlertDialogCancel onClick={() => setOpen(false)}>
|
<AlertDialogCancel onClick={handleClose}>Cancel</AlertDialogCancel>
|
||||||
Cancel
|
<Button variant="secondary" onClick={handleVerify}>
|
||||||
</AlertDialogCancel>
|
<RefreshCw className="h-4 w-4" />
|
||||||
|
Verify Status
|
||||||
|
</Button>
|
||||||
<AlertDialogAction onClick={handleConfirm}>
|
<AlertDialogAction onClick={handleConfirm}>
|
||||||
Confirm
|
Confirm
|
||||||
</AlertDialogAction>
|
</AlertDialogAction>
|
||||||
</AlertDialogFooter>
|
</AlertDialogFooter>
|
||||||
)}
|
)}
|
||||||
|
{modalState === "results" && (
|
||||||
|
<AlertDialogFooter>
|
||||||
|
<AlertDialogCancel onClick={handleClose}>Cancel</AlertDialogCancel>
|
||||||
|
<Button variant="secondary" onClick={handleVerify}>
|
||||||
|
<RefreshCw className="h-4 w-4" />
|
||||||
|
Re-check
|
||||||
|
</Button>
|
||||||
|
<AlertDialogAction onClick={handleConfirm}>
|
||||||
|
{allHealthy ? "Confirm" : "Confirm Anyway"}
|
||||||
|
</AlertDialogAction>
|
||||||
|
</AlertDialogFooter>
|
||||||
|
)}
|
||||||
</AlertDialogContent>
|
</AlertDialogContent>
|
||||||
</AlertDialog>
|
</AlertDialog>
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -2,6 +2,9 @@ import {
|
|||||||
CLEANUP_CRON_JOB,
|
CLEANUP_CRON_JOB,
|
||||||
checkGPUStatus,
|
checkGPUStatus,
|
||||||
checkPortInUse,
|
checkPortInUse,
|
||||||
|
checkPostgresHealth,
|
||||||
|
checkRedisHealth,
|
||||||
|
checkTraefikHealth,
|
||||||
cleanupAll,
|
cleanupAll,
|
||||||
cleanupAllBackground,
|
cleanupAllBackground,
|
||||||
cleanupBuilders,
|
cleanupBuilders,
|
||||||
@@ -44,8 +47,8 @@ import {
|
|||||||
writeTraefikConfigInPath,
|
writeTraefikConfigInPath,
|
||||||
writeTraefikSetup,
|
writeTraefikSetup,
|
||||||
} from "@dokploy/server";
|
} from "@dokploy/server";
|
||||||
import { checkPermission } from "@dokploy/server/services/permission";
|
|
||||||
import { db } from "@dokploy/server/db";
|
import { db } from "@dokploy/server/db";
|
||||||
|
import { checkPermission } from "@dokploy/server/services/permission";
|
||||||
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
|
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
|
||||||
import { TRPCError } from "@trpc/server";
|
import { TRPCError } from "@trpc/server";
|
||||||
import { eq, sql } from "drizzle-orm";
|
import { eq, sql } from "drizzle-orm";
|
||||||
@@ -864,6 +867,23 @@ export const settingsRouter = createTRPCRouter({
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
checkInfrastructureHealth: adminProcedure.query(async () => {
|
||||||
|
if (IS_CLOUD) {
|
||||||
|
return {
|
||||||
|
postgres: { status: "healthy" as const },
|
||||||
|
redis: { status: "healthy" as const },
|
||||||
|
traefik: { status: "healthy" as const },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const [postgres, redis, traefik] = await Promise.all([
|
||||||
|
checkPostgresHealth(),
|
||||||
|
checkRedisHealth(),
|
||||||
|
checkTraefikHealth(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return { postgres, redis, traefik };
|
||||||
|
}),
|
||||||
setupGPU: adminProcedure
|
setupGPU: adminProcedure
|
||||||
.input(
|
.input(
|
||||||
z.object({
|
z.object({
|
||||||
|
|||||||
@@ -741,3 +741,177 @@ export const getComposeContainer = async (
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type ServiceHealthStatus = {
|
||||||
|
status: "healthy" | "unhealthy";
|
||||||
|
message?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const checkSwarmServiceRunning = async (
|
||||||
|
serviceName: string,
|
||||||
|
): Promise<ServiceHealthStatus> => {
|
||||||
|
try {
|
||||||
|
const service = docker.getService(serviceName);
|
||||||
|
const info = await service.inspect();
|
||||||
|
const replicas = info.Spec?.Mode?.Replicated?.Replicas ?? 0;
|
||||||
|
if (replicas === 0) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: "Service has 0 replicas configured",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that at least one task is actually running
|
||||||
|
const tasks = await docker.listTasks({
|
||||||
|
filters: JSON.stringify({
|
||||||
|
service: [serviceName],
|
||||||
|
"desired-state": ["running"],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const runningTask = tasks.find((t) => t.Status?.State === "running");
|
||||||
|
|
||||||
|
if (!runningTask) {
|
||||||
|
const latestTask = tasks[0];
|
||||||
|
const taskState = latestTask?.Status?.State ?? "unknown";
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: `No running tasks (current state: ${taskState})`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: "healthy" };
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: error instanceof Error ? error.message : "Service not found",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getSwarmServiceContainerId = async (
|
||||||
|
serviceName: string,
|
||||||
|
): Promise<string | null> => {
|
||||||
|
try {
|
||||||
|
const tasks = await docker.listTasks({
|
||||||
|
filters: JSON.stringify({
|
||||||
|
service: [serviceName],
|
||||||
|
"desired-state": ["running"],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const runningTask = tasks.find((t) => t.Status?.State === "running");
|
||||||
|
|
||||||
|
return runningTask?.Status?.ContainerStatus?.ContainerID ?? null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const checkPostgresHealth = async (): Promise<ServiceHealthStatus> => {
|
||||||
|
const serviceCheck = await checkSwarmServiceRunning("dokploy-postgres");
|
||||||
|
if (serviceCheck.status === "unhealthy") {
|
||||||
|
return serviceCheck;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify PostgreSQL actually accepts connections
|
||||||
|
const containerId = await getSwarmServiceContainerId("dokploy-postgres");
|
||||||
|
if (!containerId) {
|
||||||
|
return { status: "unhealthy", message: "Could not find running container" };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const exec = await docker.getContainer(containerId).exec({
|
||||||
|
Cmd: ["pg_isready", "-U", "dokploy"],
|
||||||
|
AttachStdout: true,
|
||||||
|
AttachStderr: true,
|
||||||
|
});
|
||||||
|
const stream = await exec.start({});
|
||||||
|
|
||||||
|
const output = await new Promise<string>((resolve) => {
|
||||||
|
let data = "";
|
||||||
|
stream.on("data", (chunk: Buffer) => {
|
||||||
|
data += chunk.toString();
|
||||||
|
});
|
||||||
|
stream.on("end", () => resolve(data));
|
||||||
|
});
|
||||||
|
|
||||||
|
const inspectResult = await exec.inspect();
|
||||||
|
if (inspectResult.ExitCode !== 0) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: `PostgreSQL not ready: ${output.trim()}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: "healthy" };
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message:
|
||||||
|
error instanceof Error ? error.message : "Failed to check PostgreSQL",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const checkRedisHealth = async (): Promise<ServiceHealthStatus> => {
|
||||||
|
const serviceCheck = await checkSwarmServiceRunning("dokploy-redis");
|
||||||
|
if (serviceCheck.status === "unhealthy") {
|
||||||
|
return serviceCheck;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify Redis actually responds to PING
|
||||||
|
const containerId = await getSwarmServiceContainerId("dokploy-redis");
|
||||||
|
if (!containerId) {
|
||||||
|
return { status: "unhealthy", message: "Could not find running container" };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const exec = await docker.getContainer(containerId).exec({
|
||||||
|
Cmd: ["redis-cli", "ping"],
|
||||||
|
AttachStdout: true,
|
||||||
|
AttachStderr: true,
|
||||||
|
});
|
||||||
|
const stream = await exec.start({});
|
||||||
|
|
||||||
|
const output = await new Promise<string>((resolve) => {
|
||||||
|
let data = "";
|
||||||
|
stream.on("data", (chunk: Buffer) => {
|
||||||
|
data += chunk.toString();
|
||||||
|
});
|
||||||
|
stream.on("end", () => resolve(data));
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!output.includes("PONG")) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: `Redis did not respond with PONG: ${output.trim()}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: "healthy" };
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: error instanceof Error ? error.message : "Failed to check Redis",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const checkTraefikHealth = async (): Promise<ServiceHealthStatus> => {
|
||||||
|
// Traefik can run as a standalone container or a swarm service
|
||||||
|
try {
|
||||||
|
const container = docker.getContainer("dokploy-traefik");
|
||||||
|
const info = await container.inspect();
|
||||||
|
if (!info.State.Running) {
|
||||||
|
return {
|
||||||
|
status: "unhealthy",
|
||||||
|
message: "Container is not running",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return { status: "healthy" };
|
||||||
|
} catch {
|
||||||
|
// Not a standalone container, check as swarm service
|
||||||
|
return checkSwarmServiceRunning("dokploy-traefik");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user