Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: resolve incorrect kubelet version on e2e compat versions test using 2 step upgrade process #34304

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 49 additions & 11 deletions experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ trap signal_handler INT TERM

# build kubernetes / node image, e2e binaries
build() {
# build the node image w/ kubernetes
kind build node-image -v 1
# build the node image w/ kubernetes (NOT USED FOR INITIAL CLUSTER, but for upgrade)
aaron-prindle marked this conversation as resolved.
Show resolved Hide resolved

# Ginkgo v1 is used by Kubernetes 1.24 and earlier, fallback if v2 is not available.
aaron-prindle marked this conversation as resolved.
Show resolved Hide resolved
GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/v2/ginkgo"
if [ ! -d "$GINKGO_SRC_DIR" ]; then
Expand All @@ -95,9 +95,6 @@ check_structured_log_support() {

# up a cluster with kind
create_cluster() {
# Grab the version of the cluster we're about to start
KUBE_VERSION="$(docker run --rm --entrypoint=cat "kindest/node:latest" /kind/version)"

# Default Log level for all components in test clusters
KIND_CLUSTER_LOG_LEVEL=${KIND_CLUSTER_LOG_LEVEL:-4}

Expand All @@ -109,7 +106,7 @@ create_cluster() {
controllerManager_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\""
apiServer_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\""
kubelet_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\""

if [ -n "$CLUSTER_LOG_FORMAT" ]; then
check_structured_log_support "CLUSTER_LOG_FORMAT"
scheduler_extra_args="${scheduler_extra_args}
Expand Down Expand Up @@ -146,7 +143,7 @@ create_cluster() {
exit 1
fi

echo "Limiting to GA APIs and features for ${KUBE_VERSION}"
echo "Limiting to GA APIs and features for ${PREV_VERSION}"
feature_gates='{"AllAlpha":false,"AllBeta":false}'
runtime_config='{"api/alpha":"false", "api/beta":"false"}'
;;
Expand Down Expand Up @@ -207,7 +204,7 @@ EOF
# TODO(BenTheElder): settle on verbosity for this script
KIND_CREATE_ATTEMPTED=true
kind create cluster \
--image=kindest/node:latest \
--image="kindest/node:v${PREV_VERSION}.0" \
--retain \
--wait=1m \
-v=3 \
Expand Down Expand Up @@ -285,6 +282,40 @@ run_tests() {
wait "$GINKGO_PID"
}

upgrade_cluster_components() {
# upgrade cluster components excluding kube-proxy and kubelet

# Get the retry attempts, defaulting to 5 if not set
RETRY_ATTEMPTS="${RETRY_ATTEMPTS:-5}"
aaron-prindle marked this conversation as resolved.
Show resolved Hide resolved

local attempt=1
local success=false

echo "Attempt $attempt of $RETRY_ATTEMPTS to upgrade cluster..."
bash -x "${UPGRADE_SCRIPT}" --no-kproxy --no-kubelet | tee "${ARTIFACTS}/upgrade-output-1.txt"
bash -x "${UPGRADE_SCRIPT}" --no-kproxy --no-kubelet | tee "${ARTIFACTS}/upgrade-output-2.txt"
# Run the script twice, is necessary for fully updating the binaries

while [ "$attempt" -le "$RETRY_ATTEMPTS" ]; do
# Check if kubectl version reports the current version
kind export kubeconfig --name kind
if kubectl version | grep "Server Version:"| grep -q "$CURRENT_VERSION"; then
echo "Upgrade successful! kubectl version reports $CURRENT_VERSION"
success=true
break # Exit the loop on success
fi

attempt=$((attempt + 1))
echo "Upgrade check $attempt failed. Retrying in 60s..."
sleep 60
done

if ! "$success"; then
echo "Upgrade failed after $RETRY_ATTEMPTS attempts."
exit 1
fi
}

main() {
# create temp dir and setup cleanup
TMP_DIR=$(mktemp -d)
Expand All @@ -309,18 +340,24 @@ main() {
# debug kind version
kind version

# build kubernetes
# build kubernetes (for upgrade)
build
# in CI attempt to release some memory after building
if [ -n "${KUBETEST_IN_DOCKER:-}" ]; then
sync || true
echo 1 > /proc/sys/vm/drop_caches || true
fi

# create the cluster and run tests
res=0
create_cluster || res=$?


# Perform the upgrade. Assume kind-upgrade.sh is in the same directory as this script.
UPGRADE_SCRIPT="${UPGRADE_SCRIPT:-${PWD}/../test-infra/experiment/compatibility-versions/kind-upgrade.sh}"
echo "Upgrading cluster with ${UPGRADE_SCRIPT}"

upgrade_cluster_components

# Clone the previous versions Kubernetes release branch
# TODO(aaron-prindle) extend the branches to test from n-1 -> n-1..3 as more k8s releases are done that support compatibility versions
export PREV_RELEASE_BRANCH="release-${EMULATED_VERSION}"
Expand All @@ -331,8 +368,9 @@ main() {
run_tests || res=$?
popd


cleanup || res=$?
exit $res
}

main
main
173 changes: 173 additions & 0 deletions experiment/compatibility-versions/kind-upgrade.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/usr/bin/env bash
# Copyright 2025 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
set -o pipefail

build_docker(){
build/run.sh make all WHAT="cmd/kubectl cmd/kubelet" 1> /dev/null
make quick-release-images 1> /dev/null
}

build_bazel(){
bazel build //cmd/kubectl:kubectl //cmd/kubelet:kubelet //build:docker-artifacts
}

update_kubelet() {
for n in $NODES; do
# Backup previous kubelet
docker exec $n cp /usr/bin/kubelet /usr/bin/kubelet.bak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Todo: use something like which kubelet to avoid depending on the exact path.

https://kind.sigs.k8s.io/docs/design/node-image/

# Install new kubelet binary
docker cp ${KUBELET_BINARY} $n:/usr/bin/kubelet
docker exec $n systemctl restart kubelet
echo "Updated kubelet on node $n"
done
}

update_kube_proxy() {
for n in $NODES; do
kind load image-archive ${IMAGES_PATH}/kube-proxy.tar --name ${CLUSTER_NAME}
done
# RollingUpdate
kubectl set image ds/kube-proxy kube-proxy=${DOCKER_REGISTRY}/kube-proxy-amd64:${DOCKER_TAG} -n kube-system
kubectl rollout status ds/kube-proxy -n kube-system -w
echo "Updated kube-proxy"
}

update_cni() {
aaron-prindle marked this conversation as resolved.
Show resolved Hide resolved
for n in $NODES; do
kind load image-archive ${CNI_IMAGE} --name ${CLUSTER_NAME}
done
# RollingUpdate
kubectl set image ds/kindnet kindnet-cni=${CNI_IMAGE} -n kube-system
kubectl rollout status ds/kindnet -n kube-system -w
echo "Updated kindnet"
}

update_control_plane(){
# TODO allow to configure node and control plane components
for n in $CONTROL_PLANE_NODES; do
for i in $CONTROL_PLANE_COMPONENTS; do
kind load image-archive ${IMAGES_PATH}/${i}.tar --name ${CLUSTER_NAME} --nodes $n
docker exec $n sed -i.bak -r "s|^(.*image\:.*)\:.*$|\1-amd64\:${DOCKER_TAG}|" /etc/kubernetes/manifests/$i.yaml
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This path is at least a kubeadm default

echo "Updated component $i on node $n"
sleep 1
done
done
}

usage()
{
echo "usage: kind_upgrade.sh [-n|--name <cluster_name>] [--cni <cni_image>] [-b|--build-mode docker|bazel]"
echo " [--no-kproxy] [--no-control-plane] [--no-kubelet]"
echo ""
}

parse_args()
{
while [ "$1" != "" ]; do
case $1 in
-n | --name ) shift
CLUSTER_NAME=$1
;;
--cni-image ) shift
CNI_IMAGE=$1
;;
-b | --build-mode ) shift
if [ "$1" != "docker" ] && [ "$1" != "bazel" ]; then
echo "Invalid build mode: $1"
usage
exit 1
fi
BUILD_MODE=$1
;;
--no-kproxy ) UPDATE_KUBE_PROXY=false
;;
--no-kubelet ) UPDATE_KUBELET=false
;;
--no-control-plane ) UPDATE_CONTROL_PLANE=false
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done
}

parse_args $*

# Set default values
CLUSTER_NAME=${CLUSTER_NAME:-kind}
BUILD_MODE=${BUILD_MODE:-docker}
UPDATE_KUBE_PROXY=${UPDATE_KUBE_PROXY:-true}
UPDATE_KUBELET=${UPDATE_KUBE_PROXY:-true}
# TODO: we can have more granularity here
UPDATE_CONTROL_PLANE=${UPDATE_CONTROL_PLANE:-true}
CONTROL_PLANE_COMPONENTS="kube-apiserver kube-controller-manager kube-scheduler"

# Initialize variables
# Assume go installed
KUBE_ROOT="."
# KUBE_ROOT="$(go env GOPATH)/src/k8s.io/kubernetes"
source "${KUBE_ROOT}/hack/lib/version.sh"
kube::version::get_version_vars
DOCKER_TAG=${KUBE_GIT_VERSION/+/_}
DOCKER_REGISTRY=${KUBE_DOCKER_REGISTRY:-k8s.gcr.io}
aaron-prindle marked this conversation as resolved.
Show resolved Hide resolved
export GOFLAGS="-tags=providerless"
export KUBE_BUILD_CONFORMANCE=n

# KIND nodes
NODES=$(kind get nodes --name ${CLUSTER_NAME})
CONTROL_PLANE_NODES=$(kind get nodes --name ${CLUSTER_NAME} | grep control)
WORKER_NODES=$(kind get nodes --name ${CLUSTER_NAME} | grep worker)

# Main
if [[ "$BUILD_MODE" == "docker" ]]; then
build_docker
IMAGES_PATH="${KUBE_ROOT}/_output/release-images/amd64"
KUBELET_BINARY=$(find ${KUBE_ROOT}/_output/ -type f -name kubelet)
else
build_bazel
IMAGES_PATH="${KUBE_ROOT}/bazel-kubernetes/bazel-out/k8-fastbuild/bin/build"
KUBELET_BINARY=$(find ${KUBE_ROOT}/bazel-kubernetes/ -type f -name kubelet)
fi
aaron-prindle marked this conversation as resolved.
Show resolved Hide resolved

if [[ "$UPDATE_CONTROL_PLANE" == "true" ]]; then
update_control_plane
fi

if [[ "$UPDATE_KUBELET" == "true" ]]; then
update_kubelet
fi

if [[ "$UPDATE_KUBE_PROXY" == "true" ]]; then
update_kube_proxy
fi

# If CNI_IMAGE set update the CNI
if [[ ! -z ${CNI_IMAGE} ]]; then
update_cni
fi

if kubectl get nodes | grep NoReady; then
echo "Error: KIND cluster $CLUSTER_NAME NOT READY"
exit 1
else
echo "KIND cluster $CLUSTER_NAME updated successfully with version $KUBE_GIT_VERSION"
exit 0
fi