diff --git a/docs/appendices/0.38.0-migration-guide.md b/docs/appendices/0.38.0-migration-guide.md index 4c809cf6e..5debbd56f 100644 --- a/docs/appendices/0.38.0-migration-guide.md +++ b/docs/appendices/0.38.0-migration-guide.md @@ -19,6 +19,7 @@ - During a fresh apt install, the upstream nginx default vhost files (`/etc/nginx/sites-enabled/default`, `/etc/nginx/sites-available/default`, and `/etc/nginx/conf.d/default.conf`) are renamed to `${path}.dokku-disabled` (not deleted) to avoid a `duplicate default server for 0.0.0.0:80` error. Operators with local customizations can recover them by inspecting the `.dokku-disabled` siblings. Upgrade-in-place installs do not touch any existing nginx files. - Fresh apt installs now ship a catch-all default site at `/etc/nginx/conf.d/00-default-vhost.conf` that rejects requests with unknown Host headers using `ssl_reject_handshake on` (HTTPS) and `return 444` (HTTP). This replaces the manual workaround previously documented in the nginx docs. The behavior can be opted out at install time via the `dokku/install_default_site` debconf prompt. See the [Default site documentation](/docs/networking/proxies/nginx.md#default-site). - The `docker-local` scheduler now sends `SIGTERM` to old containers immediately after a successful deploy, rather than waiting `wait-to-retire` seconds before signaling. This matches Heroku's graceful-shutdown contract and lets applications begin draining in-flight work as soon as proxy traffic switches. The `wait-to-retire` grace period and `stop-timeout-seconds` hard-stop continue to apply as before. See the [zero downtime deploys documentation](/docs/deployment/zero-downtime-deploys.md#wait-to-retire) for more details. +- The `docker-local` scheduler no longer queues an image for retirement when another running container of the same app still uses it. This fixes the case where a `ps:rebuild` against an image-based deploy (`git:from-image`) produced an identical-SHA image and the `dokku-retire` cron timer would log `Image ... has running containers, skipping rm` on every run. Stuck entries from prior versions are pruned automatically on the next `ps:retire` run. ### TLS handshake behavior change diff --git a/plugins/scheduler-docker-local/internal-functions b/plugins/scheduler-docker-local/internal-functions index d0cfdb061..dd34eecbe 100755 --- a/plugins/scheduler-docker-local/internal-functions +++ b/plugins/scheduler-docker-local/internal-functions @@ -243,6 +243,11 @@ fn-scheduler-docker-local-retire-images() { fi if echo "$RM_OUTPUT" | grep -q "image is being used by running container"; then + if fn-scheduler-docker-local-image-in-use-by-app "$RETIRE_APP" "$IMAGE_ID" ""; then + dokku_log_warn "Image ${IMAGE_ID} is still in use by ${RETIRE_APP}, removing from retire list" + DEAD_IMAGES+=("$IMAGE_ID") + continue + fi dokku_log_warn "Image ${IMAGE_ID} has running containers, skipping rm" continue fi @@ -273,6 +278,39 @@ fn-scheduler-docker-local-register-retired() { fi } +fn-scheduler-docker-local-image-in-use-by-app() { + declare desc="returns 0 if IMAGE_ID is used by an app container that is not already pending retirement" + declare APP="$1" IMAGE_ID="$2" EXCLUDE_CONTAINER_ID="$3" + local DEAD_CONTAINER_FILE="${DOKKU_LIB_ROOT}/data/scheduler-docker-local/dead-containers" + local cid exclude_full_id container_image short_cid + + if [[ -z "$IMAGE_ID" ]]; then + return 1 + fi + + if [[ -n "$EXCLUDE_CONTAINER_ID" ]]; then + exclude_full_id="$("$DOCKER_BIN" container inspect "$EXCLUDE_CONTAINER_ID" --format '{{.Id}}' 2>/dev/null || true)" + fi + + for cid in $("$DOCKER_BIN" container ls -q --no-trunc -f label=com.dokku.app-name="$APP" 2>/dev/null); do + [[ -z "$cid" ]] && continue + if [[ -n "$exclude_full_id" ]] && [[ "$cid" == "$exclude_full_id" ]]; then + continue + fi + + short_cid="${cid:0:12}" + if [[ -f "$DEAD_CONTAINER_FILE" ]] && grep -q "$short_cid" "$DEAD_CONTAINER_FILE"; then + continue + fi + + container_image="$("$DOCKER_BIN" container inspect "$cid" --format '{{.Image}}' 2>/dev/null | cut -d: -f2 || true)" + if [[ -n "$container_image" ]] && [[ "$container_image" == "$IMAGE_ID" ]]; then + return 0 + fi + done + return 1 +} + fn-scheduler-docker-local-start-app-container() { declare desc="starts a single app container" declare APP="$1" diff --git a/plugins/scheduler-docker-local/scheduler-register-retired b/plugins/scheduler-docker-local/scheduler-register-retired index 2c700205e..3d16d7fbc 100755 --- a/plugins/scheduler-docker-local/scheduler-register-retired +++ b/plugins/scheduler-docker-local/scheduler-register-retired @@ -21,12 +21,17 @@ trigger-scheduler-docker-local-scheduler-register-retired() { if [[ -n "$IMAGE_ID" ]] && [[ -z "$DOKKU_SKIP_IMAGE_CLEANUP_REGISTRATION" ]]; then ALT_IMAGE_TAGS="$("$DOCKER_BIN" image inspect --format '{{ index .Config.Labels "com.dokku.docker-image-labeler/alternate-tags" }}' "$IMAGE_ID" 2>/dev/null || true)" - fn-scheduler-docker-local-register-retired "image" "$APP" "$IMAGE_ID" "$WAIT" + if ! fn-scheduler-docker-local-image-in-use-by-app "$APP" "$IMAGE_ID" "$CONTAINER_ID"; then + fn-scheduler-docker-local-register-retired "image" "$APP" "$IMAGE_ID" "$WAIT" + fi if [[ -n "$ALT_IMAGE_TAGS" ]]; then ALT_IMAGE_TAG="$(echo "$ALT_IMAGE_TAGS" | jq -r ".[]")" if [[ "$(plugn trigger git-get-property "$APP" "source-image")" != "$ALT_IMAGE_TAG" ]]; then ALT_IMAGE_ID="$("$DOCKER_BIN" image inspect --format '{{ .Id }}' "$ALT_IMAGE_TAG" 2>/dev/null || true)" - fn-scheduler-docker-local-register-retired "image" "$APP" "$(echo "$ALT_IMAGE_ID" | cut -d ':' -f2)" "$WAIT" + ALT_IMAGE_ID="$(echo "$ALT_IMAGE_ID" | cut -d ':' -f2)" + if [[ -n "$ALT_IMAGE_ID" ]] && ! fn-scheduler-docker-local-image-in-use-by-app "$APP" "$ALT_IMAGE_ID" "$CONTAINER_ID"; then + fn-scheduler-docker-local-register-retired "image" "$APP" "$ALT_IMAGE_ID" "$WAIT" + fi fi fi fi diff --git a/tests/unit/scheduler-docker-local.bats b/tests/unit/scheduler-docker-local.bats index 1260c127e..88bafef00 100644 --- a/tests/unit/scheduler-docker-local.bats +++ b/tests/unit/scheduler-docker-local.bats @@ -264,3 +264,47 @@ teardown() { echo "status: $status" assert_output "$TEST_APP.web.1" } + +@test "(scheduler-docker-local) ps:rebuild with image-based deploy keeps running image" { + run create_app + echo "output: $output" + echo "status: $status" + assert_success + + run /bin/bash -c "dokku checks:set $TEST_APP wait-to-retire 1" + echo "output: $output" + echo "status: $status" + assert_success + + run /bin/bash -c "dokku git:from-image $TEST_APP cockpithq/cockpit:core-latest" + echo "output: $output" + echo "status: $status" + assert_success + + local running_image + running_image="$(docker container inspect "$TEST_APP.web.1" --format '{{.Image}}' | cut -d: -f2)" + [[ -n "$running_image" ]] + + run /bin/bash -c "dokku ps:rebuild $TEST_APP" + echo "output: $output" + echo "status: $status" + assert_success + + sleep 3 + + run /bin/bash -c "dokku ps:retire" + echo "output: $output" + echo "status: $status" + assert_success + assert_output_contains "has running containers, skipping rm" 0 + + run /bin/bash -c "grep -F \"$running_image\" /var/lib/dokku/data/scheduler-docker-local/dead-images 2>/dev/null || true" + echo "output: $output" + echo "status: $status" + assert_output "" + + run /bin/bash -c "docker container inspect $TEST_APP.web.1 --format '{{.State.Status}}'" + echo "output: $output" + echo "status: $status" + assert_output "running" +}