Skip to content

Commit 8ee48b0

Browse files
author
Brad Klein
committed
Rollin feedback/address issues from mug upgrade (also add copyright)
1 parent 1983b04 commit 8ee48b0

26 files changed

+117
-34
lines changed

runLint.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ grep -n -R \” *.md && echo >&2 'Malformed quotes detected (bad: ” vs. good:
77
[ $error = 1 ] && echo '^FAILED'
88

99

10-
printf "+++++++++++++++ ... OK\n" && exit 0
10+
printf "+++++++++++++++ ... OK\n" && exit 0

upgrade/1.0/resource_material/common/prerequisite-steps.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ These steps should be taken regardless of the type of NCN you will be upgrading.
2727

2828
```text
2929
ncn# export CSM_RELEASE=csm-x.y.z
30-
ncn# export UPGRADE_NCN=ncn-m001
30+
ncn# export UPGRADE_NCN=<ncn> # <-- SET TO NODE BEING UPGRADED (like ncn-s001)
3131
3232
ncn# export STABLE_NCN=$(hostname)
3333
ncn# export UPGRADE_XNAME=$(curl -s -k -H "Authorization: Bearer ${TOKEN}" "https://api-gw-service-nmn.local/apis/sls/v1/search/hardware?extra_properties.Role=Management" | \

upgrade/1.0/resource_material/prereqs/get-csm.md

+12-11
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,9 @@ Fetch the base installation CSM tarball and extract it, installing the contained
99
```
1010
2. Download the CSM software release to ncn-m001 -- choose either stable or prerelease depending on your intent:
1111

12-
a. Preferred method (stable):
12+
a. **OPTION 1:** Internal/Test Systems:
1313

14-
```bash
15-
ncn-m001# cd ~
16-
ncn-m001# export ENDPOINT=https://arti.dev.cray.com/artifactory/shasta-distribution-stable-local/csm/
17-
ncn-m001# export CSM_RELEASE=csm-x.y.z
18-
ncn-m001# wget ${ENDPOINT}/${CSM_RELEASE}.tar.gz
19-
```
20-
21-
b. Prerelease/internal use (only):
22-
23-
> **`INTERNAL USE`** The `ENDPOINT` URL below are for internal use. Customers do not need to download any additional
14+
> NOTE: The `ENDPOINT` URL below are for internal use. Customers do not need to download any additional
2415
> artifacts, the CSM tarball is included along with the Shasta release.
2516
2617
```bash
@@ -30,6 +21,16 @@ Fetch the base installation CSM tarball and extract it, installing the contained
3021
ncn-m001# wget ${ENDPOINT}/${CSM_RELEASE}.tar.gz
3122
```
3223

24+
b. **OPTION 2:** Customer/Production Systems:
25+
26+
```bash
27+
ncn-m001# cd ~
28+
ncn-m001# export ENDPOINT=https://arti.dev.cray.com/artifactory/shasta-distribution-stable-local/csm/
29+
ncn-m001# export CSM_RELEASE=csm-x.y.z
30+
ncn-m001# wget ${ENDPOINT}/${CSM_RELEASE}.tar.gz
31+
```
32+
33+
3334
3. Expand the CSM software release:
3435
```bash
3536
ncn-m001# tar -zxvf ${CSM_RELEASE}.tar.gz

upgrade/1.0/scripts/ceph/ceph-partitions-stage1.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
disks=$(lsblk | grep -B2 -F md1 | grep ^s | awk '{print $1}')
47
disk1=$(echo $disks | awk '{print $1}')

upgrade/1.0/scripts/ceph/ceph-services-stage2.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
echo "Reconfiguring apparmor for haproxy"
47
sed -i -e '/inet6/a\' -e ' /etc/ceph/rgw.pem r,' /etc/apparmor.d/usr.sbin.haproxy

upgrade/1.0/scripts/ceph/ceph-upgrade.sh

+7-16
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
14
###
25
# Part 1. Prep work
36
###
47

58
file="/etc/cray/ceph/_upgraded"
69
pre_pull_images_file="/etc/cray/ceph/images_pre_pulled"
7-
scale_down_cephfs_clients_file="/etc/cray/ceph/cephfs_scaled_down"
8-
scale_up_cephfs_clients_file="/etc/cray/ceph/cephfs_scaled_up"
910
convert_rgw_file="/etc/cray/ceph/radosgw_converted"
1011
upgrade_init_file="/etc/cray/ceph/upgrade_initialized"
1112
upgrade_mons_file="/etc/cray/ceph/mons_upgraded"
@@ -59,13 +60,8 @@ else
5960
mark_initialized $pre_pull_images_file
6061
fi
6162

62-
if [ -f "$scale_down_cephfs_clients_file" ]; then
63-
echo "cephfs clients have been scaled down"
64-
else
65-
echo "Scaling down cephfs clients"
66-
scale_down_cephfs_clients
67-
mark_initialized $scale_down_cephfs_clients_file
68-
fi
63+
echo "Scaling down cephfs clients (if needed)"
64+
scale_down_cephfs_clients
6965

7066
if [ -f "$convert_rgw_file" ]; then
7167
echo "Radosgw has already been converted"
@@ -183,10 +179,5 @@ ceph config set mgr mgr/cephadm/warn_on_stray_daemons true
183179

184180
wait_for_health_ok
185181

186-
if [ -f "$scale_up_cephfs_clients_file" ]; then
187-
echo "cephfs clients have been scaled up"
188-
else
189-
echo "Scaling up cephfs clients"
190-
scale_up_cephfs_clients
191-
mark_initialized $scale_up_cephfs_clients_file
192-
fi
182+
echo "Scaling up cephfs clients"
183+
scale_up_cephfs_clients

upgrade/1.0/scripts/ceph/lib/ceph-health.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
function wait_for_health_ok() {
47
cnt=0

upgrade/1.0/scripts/ceph/lib/ceph-image-pull.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Pre-pull images for upgrade so we can live without nexus during upgrade
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
function pre_pull_ceph_images () {
47
IMAGE="$registry/ceph/ceph:v15.2.8"

upgrade/1.0/scripts/ceph/lib/ceph-install-dashboard.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function install_dashboard () {
26
echo "Enabling the Ceph Dashboard"
37
until $(ceph mgr services|jq .dashboard) =~ "ncn-s00"
@@ -28,4 +32,4 @@ function install_dashboard () {
2832
echo "Disable ssl_verify until we are on signed certs"
2933
ceph dashboard set-rgw-api-ssl-verify False
3034
# Add checks for verifying the dashboard is up and functional
31-
}
35+
}

upgrade/1.0/scripts/ceph/lib/ceph-orch-tasks.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function ceph_orch_tasks () {
26
for host in $(ceph node ls| jq -r '.osd|keys[]')
37
do

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-init.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function ceph_upgrade_init () {
26
echo "Starting upgrade with initial tasks"
37
for host in $(ceph node ls| jq -r '.osd|keys[]')

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-mdss.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Begin OSD conversion. Run on each node that has OSDS
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
. ./lib/ceph-health.sh
47

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-mgrs.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
### Begin run on each mon/mgr
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
5+
26
function upgrade_ceph_mgrs () {
37
for host in $(ceph node ls| jq -r '.mgr|keys[]')
48
do

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-mons.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
### Begin run on each mon/mgr
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
5+
26
function upgrade_ceph_mons () {
37
for host in $(ceph node ls| jq -r '.mon|keys[]')
48
do

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-osds.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
# Begin OSD conversion. Run on each node that has OSDS
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
5+
26
function upgrade_osds () {
37
for host in $(ceph node ls| jq -r '.osd|keys[]')
48
do

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-rgws.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
# stop the ceph-rgw daemon on all hosts as the command needs the cluster status to be in HEALTH_OK
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
5+
26
function upgrade_rgws () {
37

48
for host in $(ceph node ls| jq -r '.osd|keys|join(" ")'); do

upgrade/1.0/scripts/ceph/lib/ceph-upgrade-step1.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
cephadm prepare-host
26
cephadm ls
37
ceph config assimilate-conf -i /etc/ceph/ceph.conf

upgrade/1.0/scripts/ceph/lib/cephadm-keys.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function create_cephadm_keys () {
26
echo "Creating cephadm key"
37
ceph cephadm generate-key

upgrade/1.0/scripts/ceph/lib/convert-radosgw.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function convert_radosgw () {
26
echo "Converting radsogw to support Ceph 15.x requirements"
37
echo "Verifying that a realm doesn't exist"

upgrade/1.0/scripts/ceph/lib/k8s-scale-utils.sh

+20-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Scale down cephfs clients to prevent mds corruption issues
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
cephfs_replica_counts_file="/etc/cray/ceph/cephfs_replica_counts"
47

@@ -7,10 +10,11 @@ function scale_down_cephfs_clients () {
710
backup_name="$now-snapshot"
811

912
echo "Taking a snapshot of nexus pvc ($backup_name)"
10-
kubectl -n nexus exec -it $(kubectl get po -n nexus -l 'app=nexus' -o json | jq -r '.items[].metadata.name') -c nexus -- /bin/sh -c "mkdir /nexus-data/.snap/$backup_name"
13+
output=$(kubectl -n nexus exec -it $(kubectl get po -n nexus -l 'app=nexus' -o json | jq -r '.items[].metadata.name') -c nexus -- /bin/sh -c "mkdir /nexus-data/.snap/$backup_name" 2>&1)
14+
if [[ "$?" -ne 0 ]]; then
15+
echo "Didn't find nexus pod to take snapshot from -- continuing..."
16+
fi
1117

12-
echo "Sleeping 10 seconds after taking nexus pvc snapshot"
13-
sleep 10
1418

1519
rm -f $cephfs_replica_counts_file
1620
cnt=0
@@ -26,8 +30,20 @@ function scale_down_cephfs_clients () {
2630
kubectl get deployment -n $ns $deployment -o yaml | grep -q "claimName: $pvc_name"
2731
if [[ "$?" -eq 0 ]]; then
2832
num_replicas=$(kubectl -n $ns get deployment $deployment -o json | jq -r '.spec.replicas')
33+
if [[ "$num_replicas" -eq 0 ]]; then
34+
#
35+
# We may have already scaled this deployment down or are re-running
36+
# the upgrade script. Let's be careful not to write zeros in the
37+
# replica count file.
38+
#
39+
if [ "$deployment" == "cray-ipxe" ]; then
40+
num_replicas=3
41+
else
42+
num_replicas=1
43+
fi
44+
fi
2945
echo "${ns}_${deployment} $num_replicas" >> $cephfs_replica_counts_file
30-
echo "Scaling $deployment deployment in namespace $ns from $num_replicas to zero"
46+
echo "Ensuring $deployment deployment in namespace $ns is scaled from $num_replicas to zero"
3147
kubectl scale deployment -n $ns $deployment --replicas=0
3248
fi
3349
done

upgrade/1.0/scripts/ceph/lib/mark_step_complete.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function mark_initialized() {
26
initialized_file=$1
37
touch $initialized_file

upgrade/1.0/scripts/ceph/lib/update_container_images.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#
2+
# Copyright 2021 Hewlett Packard Enterprise Development LP
3+
#
4+
15
function update_image_values () {
26
IMAGE="$registry/ceph/ceph:v15.2.8"
37
ceph config set global container_image $IMAGE

upgrade/1.0/scripts/k8s/determine-worker-order.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
all_workers=$(kubectl get nodes | grep ncn-w | awk '{print $1}')
47
kea_node=$(kubectl get po -n services -l 'app.kubernetes.io/name=cray-dhcp-kea' -o wide | grep -v NAME | awk '{print $7}')

upgrade/1.0/scripts/k8s/move-pod.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
pod_name=$1
47
target_node=$2

upgrade/1.0/scripts/k8s/promote-initial-master.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
source /srv/cray/scripts/metal/lib.sh
47
export KUBERNETES_VERSION="v$(cat /etc/cray/kubernetes/version)"

upgrade/1.0/scripts/k8s/remove-k8s-node.sh

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash
2+
#
3+
# Copyright 2021 Hewlett Packard Enterprise Development LP
4+
#
25

36
if [ "$1" == "" ]
47
then

0 commit comments

Comments
 (0)