mirror of
https://github.com/Dokploy/dokploy.git
synced 2026-06-15 20:25:23 +02:00
Merge pull request #4033 from Dokploy/feat/improve-update-process-to-validate-dokploy-services
feat: enhance web server update process with health checks
This commit is contained in:
@@ -1,4 +1,11 @@
|
||||
import { HardDriveDownload, Loader2 } from "lucide-react";
|
||||
import {
|
||||
AlertTriangle,
|
||||
CheckCircle2,
|
||||
HardDriveDownload,
|
||||
Loader2,
|
||||
RefreshCw,
|
||||
XCircle,
|
||||
} from "lucide-react";
|
||||
import { useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import {
|
||||
@@ -15,11 +22,70 @@ import {
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { api } from "@/utils/api";
|
||||
|
||||
type ServiceStatus = {
|
||||
status: "healthy" | "unhealthy";
|
||||
message?: string;
|
||||
};
|
||||
|
||||
type HealthResult = {
|
||||
postgres: ServiceStatus;
|
||||
redis: ServiceStatus;
|
||||
traefik: ServiceStatus;
|
||||
};
|
||||
|
||||
type ModalState = "idle" | "checking" | "results" | "updating";
|
||||
|
||||
const ServiceStatusItem = ({
|
||||
name,
|
||||
service,
|
||||
}: {
|
||||
name: string;
|
||||
service: ServiceStatus;
|
||||
}) => (
|
||||
<div className="flex items-center gap-2">
|
||||
{service.status === "healthy" ? (
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
) : (
|
||||
<XCircle className="h-4 w-4 text-red-500" />
|
||||
)}
|
||||
<span className="text-sm font-medium">{name}</span>
|
||||
{service.status === "unhealthy" && service.message && (
|
||||
<span className="text-xs text-muted-foreground">— {service.message}</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
export const UpdateWebServer = () => {
|
||||
const [updating, setUpdating] = useState(false);
|
||||
const [modalState, setModalState] = useState<ModalState>("idle");
|
||||
const [open, setOpen] = useState(false);
|
||||
const [healthResult, setHealthResult] = useState<HealthResult | null>(null);
|
||||
|
||||
const { mutateAsync: updateServer } = api.settings.updateServer.useMutation();
|
||||
const { refetch: checkHealth } =
|
||||
api.settings.checkInfrastructureHealth.useQuery(undefined, {
|
||||
enabled: false,
|
||||
});
|
||||
|
||||
const handleVerify = async () => {
|
||||
setModalState("checking");
|
||||
setHealthResult(null);
|
||||
|
||||
try {
|
||||
const result = await checkHealth();
|
||||
if (result.data) {
|
||||
setHealthResult(result.data);
|
||||
}
|
||||
} catch {
|
||||
// checkHealth failed entirely
|
||||
}
|
||||
setModalState("results");
|
||||
};
|
||||
|
||||
const allHealthy =
|
||||
healthResult &&
|
||||
healthResult.postgres.status === "healthy" &&
|
||||
healthResult.redis.status === "healthy" &&
|
||||
healthResult.traefik.status === "healthy";
|
||||
|
||||
const checkIsUpdateFinished = async () => {
|
||||
try {
|
||||
@@ -33,28 +99,24 @@ export const UpdateWebServer = () => {
|
||||
);
|
||||
|
||||
setTimeout(() => {
|
||||
// Allow seeing the toast before reloading
|
||||
window.location.reload();
|
||||
}, 2000);
|
||||
} catch {
|
||||
// Delay each request
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
// Keep running until it returns 200
|
||||
void checkIsUpdateFinished();
|
||||
}
|
||||
};
|
||||
|
||||
const handleConfirm = async () => {
|
||||
try {
|
||||
setUpdating(true);
|
||||
setModalState("updating");
|
||||
await updateServer();
|
||||
|
||||
// Give some time for docker service restart before starting to check status
|
||||
await new Promise((resolve) => setTimeout(resolve, 8000));
|
||||
|
||||
await checkIsUpdateFinished();
|
||||
} catch (error) {
|
||||
setUpdating(false);
|
||||
setModalState("results");
|
||||
console.error("Error updating server:", error);
|
||||
toast.error(
|
||||
"An error occurred while updating the server, please try again.",
|
||||
@@ -62,6 +124,14 @@ export const UpdateWebServer = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const handleClose = () => {
|
||||
if (modalState !== "updating") {
|
||||
setOpen(false);
|
||||
setModalState("idle");
|
||||
setHealthResult(null);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<AlertDialog open={open}>
|
||||
<AlertDialogTrigger asChild>
|
||||
@@ -81,36 +151,111 @@ export const UpdateWebServer = () => {
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>
|
||||
{updating
|
||||
? "Server update in progress"
|
||||
: "Are you absolutely sure?"}
|
||||
{modalState === "idle" && "Are you absolutely sure?"}
|
||||
{modalState === "checking" && "Verifying Services..."}
|
||||
{modalState === "results" &&
|
||||
(allHealthy ? "Ready to Update" : "Service Issues Detected")}
|
||||
{modalState === "updating" && "Server update in progress"}
|
||||
</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{updating ? (
|
||||
<span className="flex items-center gap-1">
|
||||
<Loader2 className="animate-spin" />
|
||||
The server is being updated, please wait...
|
||||
</span>
|
||||
) : (
|
||||
<>
|
||||
This action cannot be undone. This will update the web server to
|
||||
the new version. You will not be able to use the panel during
|
||||
the update process. The page will be reloaded once the update is
|
||||
finished.
|
||||
</>
|
||||
)}
|
||||
<AlertDialogDescription asChild>
|
||||
<div>
|
||||
{modalState === "idle" && (
|
||||
<span>
|
||||
This will update the web server to the new version. You will
|
||||
not be able to use the panel during the update process. The
|
||||
page will be reloaded once the update is finished.
|
||||
<br />
|
||||
<br />
|
||||
We recommend verifying that all services are running before
|
||||
updating.
|
||||
</span>
|
||||
)}
|
||||
|
||||
{modalState === "checking" && (
|
||||
<span className="flex items-center gap-2">
|
||||
<Loader2 className="animate-spin h-4 w-4" />
|
||||
Checking PostgreSQL, Redis and Traefik...
|
||||
</span>
|
||||
)}
|
||||
|
||||
{modalState === "results" && healthResult && (
|
||||
<div className="flex flex-col gap-3">
|
||||
<div className="flex flex-col gap-2">
|
||||
<ServiceStatusItem
|
||||
name="PostgreSQL"
|
||||
service={healthResult.postgres}
|
||||
/>
|
||||
<ServiceStatusItem
|
||||
name="Redis"
|
||||
service={healthResult.redis}
|
||||
/>
|
||||
<ServiceStatusItem
|
||||
name="Traefik"
|
||||
service={healthResult.traefik}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{!allHealthy && (
|
||||
<div className="flex items-start gap-2 rounded-md border border-yellow-500/30 bg-yellow-500/10 p-3">
|
||||
<AlertTriangle className="h-4 w-4 text-yellow-500 mt-0.5 shrink-0" />
|
||||
<span className="text-sm text-yellow-600 dark:text-yellow-400">
|
||||
Some services are not healthy. You can still proceed
|
||||
with the update.
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{allHealthy && (
|
||||
<span className="text-sm text-muted-foreground">
|
||||
All services are running. You can proceed with the update.
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{modalState === "results" && !healthResult && (
|
||||
<div className="flex items-start gap-2 rounded-md border border-yellow-500/30 bg-yellow-500/10 p-3">
|
||||
<AlertTriangle className="h-4 w-4 text-yellow-500 mt-0.5 shrink-0" />
|
||||
<span className="text-sm text-yellow-600 dark:text-yellow-400">
|
||||
Could not verify services. You can still proceed with the
|
||||
update.
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{modalState === "updating" && (
|
||||
<span className="flex items-center gap-2">
|
||||
<Loader2 className="animate-spin h-4 w-4" />
|
||||
The server is being updated, please wait...
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
{!updating && (
|
||||
{modalState === "idle" && (
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel onClick={() => setOpen(false)}>
|
||||
Cancel
|
||||
</AlertDialogCancel>
|
||||
<AlertDialogCancel onClick={handleClose}>Cancel</AlertDialogCancel>
|
||||
<Button variant="secondary" onClick={handleVerify}>
|
||||
<RefreshCw className="h-4 w-4" />
|
||||
Verify Status
|
||||
</Button>
|
||||
<AlertDialogAction onClick={handleConfirm}>
|
||||
Confirm
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
)}
|
||||
{modalState === "results" && (
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel onClick={handleClose}>Cancel</AlertDialogCancel>
|
||||
<Button variant="secondary" onClick={handleVerify}>
|
||||
<RefreshCw className="h-4 w-4" />
|
||||
Re-check
|
||||
</Button>
|
||||
<AlertDialogAction onClick={handleConfirm}>
|
||||
{allHealthy ? "Confirm" : "Confirm Anyway"}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
)}
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
);
|
||||
|
||||
@@ -2,6 +2,9 @@ import {
|
||||
CLEANUP_CRON_JOB,
|
||||
checkGPUStatus,
|
||||
checkPortInUse,
|
||||
checkPostgresHealth,
|
||||
checkRedisHealth,
|
||||
checkTraefikHealth,
|
||||
cleanupAll,
|
||||
cleanupAllBackground,
|
||||
cleanupBuilders,
|
||||
@@ -44,8 +47,8 @@ import {
|
||||
writeTraefikConfigInPath,
|
||||
writeTraefikSetup,
|
||||
} from "@dokploy/server";
|
||||
import { checkPermission } from "@dokploy/server/services/permission";
|
||||
import { db } from "@dokploy/server/db";
|
||||
import { checkPermission } from "@dokploy/server/services/permission";
|
||||
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
|
||||
import { TRPCError } from "@trpc/server";
|
||||
import { eq, sql } from "drizzle-orm";
|
||||
@@ -864,6 +867,23 @@ export const settingsRouter = createTRPCRouter({
|
||||
throw error;
|
||||
}
|
||||
}),
|
||||
checkInfrastructureHealth: adminProcedure.query(async () => {
|
||||
if (IS_CLOUD) {
|
||||
return {
|
||||
postgres: { status: "healthy" as const },
|
||||
redis: { status: "healthy" as const },
|
||||
traefik: { status: "healthy" as const },
|
||||
};
|
||||
}
|
||||
|
||||
const [postgres, redis, traefik] = await Promise.all([
|
||||
checkPostgresHealth(),
|
||||
checkRedisHealth(),
|
||||
checkTraefikHealth(),
|
||||
]);
|
||||
|
||||
return { postgres, redis, traefik };
|
||||
}),
|
||||
setupGPU: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
|
||||
@@ -741,3 +741,177 @@ export const getComposeContainer = async (
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
type ServiceHealthStatus = {
|
||||
status: "healthy" | "unhealthy";
|
||||
message?: string;
|
||||
};
|
||||
|
||||
const checkSwarmServiceRunning = async (
|
||||
serviceName: string,
|
||||
): Promise<ServiceHealthStatus> => {
|
||||
try {
|
||||
const service = docker.getService(serviceName);
|
||||
const info = await service.inspect();
|
||||
const replicas = info.Spec?.Mode?.Replicated?.Replicas ?? 0;
|
||||
if (replicas === 0) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: "Service has 0 replicas configured",
|
||||
};
|
||||
}
|
||||
|
||||
// Check that at least one task is actually running
|
||||
const tasks = await docker.listTasks({
|
||||
filters: JSON.stringify({
|
||||
service: [serviceName],
|
||||
"desired-state": ["running"],
|
||||
}),
|
||||
});
|
||||
|
||||
const runningTask = tasks.find((t) => t.Status?.State === "running");
|
||||
|
||||
if (!runningTask) {
|
||||
const latestTask = tasks[0];
|
||||
const taskState = latestTask?.Status?.State ?? "unknown";
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: `No running tasks (current state: ${taskState})`,
|
||||
};
|
||||
}
|
||||
|
||||
return { status: "healthy" };
|
||||
} catch (error) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: error instanceof Error ? error.message : "Service not found",
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const getSwarmServiceContainerId = async (
|
||||
serviceName: string,
|
||||
): Promise<string | null> => {
|
||||
try {
|
||||
const tasks = await docker.listTasks({
|
||||
filters: JSON.stringify({
|
||||
service: [serviceName],
|
||||
"desired-state": ["running"],
|
||||
}),
|
||||
});
|
||||
|
||||
const runningTask = tasks.find((t) => t.Status?.State === "running");
|
||||
|
||||
return runningTask?.Status?.ContainerStatus?.ContainerID ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
export const checkPostgresHealth = async (): Promise<ServiceHealthStatus> => {
|
||||
const serviceCheck = await checkSwarmServiceRunning("dokploy-postgres");
|
||||
if (serviceCheck.status === "unhealthy") {
|
||||
return serviceCheck;
|
||||
}
|
||||
|
||||
// Verify PostgreSQL actually accepts connections
|
||||
const containerId = await getSwarmServiceContainerId("dokploy-postgres");
|
||||
if (!containerId) {
|
||||
return { status: "unhealthy", message: "Could not find running container" };
|
||||
}
|
||||
|
||||
try {
|
||||
const exec = await docker.getContainer(containerId).exec({
|
||||
Cmd: ["pg_isready", "-U", "dokploy"],
|
||||
AttachStdout: true,
|
||||
AttachStderr: true,
|
||||
});
|
||||
const stream = await exec.start({});
|
||||
|
||||
const output = await new Promise<string>((resolve) => {
|
||||
let data = "";
|
||||
stream.on("data", (chunk: Buffer) => {
|
||||
data += chunk.toString();
|
||||
});
|
||||
stream.on("end", () => resolve(data));
|
||||
});
|
||||
|
||||
const inspectResult = await exec.inspect();
|
||||
if (inspectResult.ExitCode !== 0) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: `PostgreSQL not ready: ${output.trim()}`,
|
||||
};
|
||||
}
|
||||
|
||||
return { status: "healthy" };
|
||||
} catch (error) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message:
|
||||
error instanceof Error ? error.message : "Failed to check PostgreSQL",
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
export const checkRedisHealth = async (): Promise<ServiceHealthStatus> => {
|
||||
const serviceCheck = await checkSwarmServiceRunning("dokploy-redis");
|
||||
if (serviceCheck.status === "unhealthy") {
|
||||
return serviceCheck;
|
||||
}
|
||||
|
||||
// Verify Redis actually responds to PING
|
||||
const containerId = await getSwarmServiceContainerId("dokploy-redis");
|
||||
if (!containerId) {
|
||||
return { status: "unhealthy", message: "Could not find running container" };
|
||||
}
|
||||
|
||||
try {
|
||||
const exec = await docker.getContainer(containerId).exec({
|
||||
Cmd: ["redis-cli", "ping"],
|
||||
AttachStdout: true,
|
||||
AttachStderr: true,
|
||||
});
|
||||
const stream = await exec.start({});
|
||||
|
||||
const output = await new Promise<string>((resolve) => {
|
||||
let data = "";
|
||||
stream.on("data", (chunk: Buffer) => {
|
||||
data += chunk.toString();
|
||||
});
|
||||
stream.on("end", () => resolve(data));
|
||||
});
|
||||
|
||||
if (!output.includes("PONG")) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: `Redis did not respond with PONG: ${output.trim()}`,
|
||||
};
|
||||
}
|
||||
|
||||
return { status: "healthy" };
|
||||
} catch (error) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: error instanceof Error ? error.message : "Failed to check Redis",
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
export const checkTraefikHealth = async (): Promise<ServiceHealthStatus> => {
|
||||
// Traefik can run as a standalone container or a swarm service
|
||||
try {
|
||||
const container = docker.getContainer("dokploy-traefik");
|
||||
const info = await container.inspect();
|
||||
if (!info.State.Running) {
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: "Container is not running",
|
||||
};
|
||||
}
|
||||
return { status: "healthy" };
|
||||
} catch {
|
||||
// Not a standalone container, check as swarm service
|
||||
return checkSwarmServiceRunning("dokploy-traefik");
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user