1
1
# Scale down cephfs clients to prevent mds corruption issues
2
+ #
3
+ # Copyright 2021 Hewlett Packard Enterprise Development LP
4
+ #
2
5
3
6
cephfs_replica_counts_file=" /etc/cray/ceph/cephfs_replica_counts"
4
7
@@ -7,10 +10,11 @@ function scale_down_cephfs_clients () {
7
10
backup_name=" $now -snapshot"
8
11
9
12
echo " Taking a snapshot of nexus pvc ($backup_name )"
10
- kubectl -n nexus exec -it $( kubectl get po -n nexus -l ' app=nexus' -o json | jq -r ' .items[].metadata.name' ) -c nexus -- /bin/sh -c " mkdir /nexus-data/.snap/$backup_name "
13
+ output=$( kubectl -n nexus exec -it $( kubectl get po -n nexus -l ' app=nexus' -o json | jq -r ' .items[].metadata.name' ) -c nexus -- /bin/sh -c " mkdir /nexus-data/.snap/$backup_name " 2>&1 )
14
+ if [[ " $? " -ne 0 ]]; then
15
+ echo " Didn't find nexus pod to take snapshot from -- continuing..."
16
+ fi
11
17
12
- echo " Sleeping 10 seconds after taking nexus pvc snapshot"
13
- sleep 10
14
18
15
19
rm -f $cephfs_replica_counts_file
16
20
cnt=0
@@ -26,8 +30,20 @@ function scale_down_cephfs_clients () {
26
30
kubectl get deployment -n $ns $deployment -o yaml | grep -q " claimName: $pvc_name "
27
31
if [[ " $? " -eq 0 ]]; then
28
32
num_replicas=$( kubectl -n $ns get deployment $deployment -o json | jq -r ' .spec.replicas' )
33
+ if [[ " $num_replicas " -eq 0 ]]; then
34
+ #
35
+ # We may have already scaled this deployment down or are re-running
36
+ # the upgrade script. Let's be careful not to write zeros in the
37
+ # replica count file.
38
+ #
39
+ if [ " $deployment " == " cray-ipxe" ]; then
40
+ num_replicas=3
41
+ else
42
+ num_replicas=1
43
+ fi
44
+ fi
29
45
echo " ${ns} _${deployment} $num_replicas " >> $cephfs_replica_counts_file
30
- echo " Scaling $deployment deployment in namespace $ns from $num_replicas to zero"
46
+ echo " Ensuring $deployment deployment in namespace $ns is scaled from $num_replicas to zero"
31
47
kubectl scale deployment -n $ns $deployment --replicas=0
32
48
fi
33
49
done
0 commit comments