Skip to content

Commit 5920b8e

Browse files
committed
add deployment, document some challenges
Signed-off-by: Thomas Jungblut <[email protected]>
1 parent 9ea3c07 commit 5920b8e

File tree

4 files changed

+156
-48
lines changed

4 files changed

+156
-48
lines changed
Lines changed: 107 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,164 @@
11
apiVersion: apps/v1
2+
# TODO may become a daemonset now that we run this on hostnetwork
3+
# TODO or just a fully functional sidecar mounting memory in pod.gotpl.yaml
24
kind: Deployment
35
metadata:
46
name: dedicated-event-etcd
57
namespace: openshift-etcd
68
labels:
79
app: dedicated-event-etcd
810
k8s-app: dedicated-event-etcd
9-
etcd: "true"
1011
spec:
1112
replicas: 1
1213
selector:
1314
matchLabels:
1415
app: dedicated-event-etcd
1516
k8s-app: dedicated-event-etcd
16-
etcd: "true"
1717
template:
1818
metadata:
1919
name: dedicated-event-etcd
2020
annotations:
21-
kubectl.kubernetes.io/default-container: etcd
21+
kubectl.kubernetes.io/default-container: etcdctl
2222
labels:
2323
app: dedicated-event-etcd
2424
k8s-app: dedicated-event-etcd
25-
etcd: "true"
2625
spec:
2726
hostNetwork: true
28-
priority: 2000001000
29-
priorityClassName: system-node-critical
27+
nodeSelector:
28+
node-role.kubernetes.io/master: ""
3029
tolerations:
31-
- operator: "Exists"
30+
- operator: "Exists"
3231
containers:
32+
- name: etcdctl
33+
# image: {{.Image}}
34+
# harcoded 4.20.0
35+
image: "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:3654c629d9d8c9a07b481f6d9f8b36f77922f4b196b5e9dd4979957821dbd4b2"
36+
imagePullPolicy: IfNotPresent
37+
terminationMessagePolicy: FallbackToLogsOnError
38+
command:
39+
- "/bin/bash"
40+
- "-c"
41+
- "trap TERM INT; sleep infinity & wait"
42+
volumeMounts:
43+
- mountPath: /var/lib/etcd/
44+
name: data-dir
45+
- mountPath: /etcd-all-bundles
46+
name: etcd-ca-bundle
47+
- mountPath: /etcd-all-certs
48+
name: etcd-all-certs
49+
env:
50+
# export ETCDCTL_ENDPOINTS="https://${MY_POD_IP}:20379"
51+
# export ETCDCTL_CACERT="/etcd-all-bundles/ca-bundle.crt"
52+
# export ETCDCTL_CERT="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.crt"
53+
# export ETCDCTL_KEY="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.key"
54+
55+
- name: MY_POD_IP
56+
valueFrom:
57+
fieldRef:
58+
fieldPath: status.podIP
59+
- name: MY_NODE_NAME
60+
valueFrom:
61+
fieldRef:
62+
fieldPath: spec.nodeName
63+
- name: ETCD_DATA_DIR
64+
value: "/var/lib/etcd"
65+
- name: ETCDCTL_ENDPOINTS
66+
value: "https://${MY_POD_IP}:20379"
67+
- name: ETCDCTL_CACERT
68+
value: "/etcd-all-bundles/ca-bundle.crt"
69+
- name: ETCDCTL_CERT
70+
value: "/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.crt"
71+
- name: ETCDCTL_KEY
72+
value: "/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.key"
3373
- name: etcd
34-
image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:placeholder
74+
# image: {{.Image}}
75+
# harcoded 4.20.0
76+
image: "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:3654c629d9d8c9a07b481f6d9f8b36f77922f4b196b5e9dd4979957821dbd4b2"
3577
imagePullPolicy: IfNotPresent
3678
terminationMessagePolicy: FallbackToLogsOnError
79+
env:
80+
- name: MY_POD_IP
81+
valueFrom:
82+
fieldRef:
83+
fieldPath: status.podIP
84+
- name: MY_NODE_NAME
85+
valueFrom:
86+
fieldRef:
87+
fieldPath: spec.nodeName
3788
command:
3889
- /bin/sh
3990
- -c
4091
- |
4192
#!/bin/sh
4293
set -euo pipefail
94+
set -x
95+
96+
export ETCD_NAME=events-etcd
4397
44-
export ETCD_NAME=${NODE_NODE_ENVVAR_NAME_ETCD_NAME}
98+
echo "----------------"
4599
env | grep ETCD | grep -v NODE
46-
47-
set -x
48-
# See https://etcd.io/docs/v3.4.0/tuning/ for why we use ionice
49-
exec nice -n -19 ionice -c2 -n0 etcd \
100+
echo "----------------"
101+
echo "$MY_NODE_NAME"
102+
echo "$MY_POD_IP"
103+
echo "----------------"
104+
ls -l /etcd-all-certs
105+
echo "----------------"
106+
ls -l /etcd-all-bundles
107+
echo "----------------"
108+
109+
etcd \
110+
--data-dir=/var/lib/etcd \
50111
--logger=zap \
51112
--log-level=WARN \
52113
--snapshot-count=10000 \
53-
--initial-advertise-peer-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2380 \
54-
--cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-NODE_NAME.crt \
55-
--key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-NODE_NAME.key \
56-
--trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
114+
--quota-backend-bytes 8589934592 \
115+
--cert-file="/etcd-all-certs/etcd-serving-${MY_NODE_NAME}.crt" \
116+
--key-file="/etcd-all-certs/etcd-serving-${MY_NODE_NAME}.key" \
117+
--trusted-ca-file="/etcd-all-bundles/ca-bundle.crt" \
57118
--client-cert-auth=true \
58-
--peer-cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt \
59-
--peer-key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.key \
60-
--peer-trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
119+
--initial-cluster="${ETCD_NAME}=https://${MY_POD_IP}:20380" \
120+
--initial-advertise-peer-urls="https://${MY_POD_IP}:20380" \
121+
--listen-peer-urls="https://${MY_POD_IP}:20380" \
122+
--peer-cert-file="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.crt"\
123+
--peer-key-file="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.key" \
124+
--peer-trusted-ca-file="/etcd-all-bundles/ca-bundle.crt" \
61125
--peer-client-cert-auth=true \
62-
--advertise-client-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2379 \
63-
--listen-client-urls=https://0.0.0.0:2379 \
64-
--listen-peer-urls=https://0.0.0.0:2380
65-
126+
--advertise-client-urls=https://${MY_POD_IP}:20379 \
127+
--listen-client-urls=https://0.0.0.0:20379
128+
66129
ports:
67-
- containerPort: 2379
130+
- containerPort: 20379
68131
name: etcd
69132
protocol: TCP
133+
- containerPort: 20380
134+
name: etcd-peer
135+
protocol: TCP
70136
resources:
71-
requests:
72-
memory: 5Gi
73137
limits:
74-
memory: 10Gi
138+
memory: 8Gi
75139
securityContext:
76140
privileged: true
77141
readOnlyRootFilesystem: true
78142
volumeMounts:
79-
- mountPath: /etc/kubernetes/manifests
80-
name: static-pod-dir
81-
- mountPath: /etc/kubernetes/static-pod-resources
82-
name: resource-dir
83-
- mountPath: /etc/kubernetes/static-pod-certs
84-
name: cert-dir
85-
- mountPath: /tmp
86-
name: tmp-dir
143+
# TODO inject etcd-all-certs and etcd-all-bundles
144+
# TODO this is going to be annoying, because the certs are not issued for the pod IP
145+
# and the peer cert is the client cert IIRC, so might make sense to schedule on the
146+
# existing CP anyway and mount the static pods for the respective node name via downward API
87147
- mountPath: /var/lib/etcd/
88148
name: data-dir
149+
- mountPath: /etcd-all-bundles
150+
name: etcd-ca-bundle
151+
- mountPath: /etcd-all-certs
152+
name: etcd-all-certs
89153
volumes:
90-
- hostPath:
91-
path: /etc/kubernetes/manifests
92-
name: static-pod-dir
93-
- hostPath:
94-
path: /etc/kubernetes/static-pod-resources/etcd-pod-REVISION
95-
name: resource-dir
96-
- hostPath:
97-
path: /etc/kubernetes/static-pod-resources/etcd-certs
98-
name: cert-dir
99-
- emptyDir: {}
100-
name: tmp-dir
154+
- configMap:
155+
name: etcd-ca-bundle
156+
name: etcd-ca-bundle
157+
- secret:
158+
secretName: etcd-all-certs
159+
name: etcd-all-certs
101160
- name: data-dir
102161
emptyDir:
103162
medium: Memory
104-
sizeLimit: 5Gi
163+
sizeLimit: 8Gi
105164

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
namespace: openshift-etcd
5+
name: events-etcd
6+
annotations:
7+
service.alpha.openshift.io/serving-cert-secret-name: serving-cert
8+
prometheus.io/scrape: "false"
9+
prometheus.io/scheme: https
10+
labels:
11+
k8s-app: dedicated-event-etcd
12+
spec:
13+
selector:
14+
k8s-app: dedicated-event-etcd
15+
ports:
16+
- name: etcd
17+
port: 2379
18+
protocol: TCP

pkg/operator/dedicatedetcdcontroller/dedicated_etcd_controller.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,35 @@ func (c *DedicatedEtcdController) sync(ctx context.Context, syncCtx factory.Sync
9191
// 2. Configure it to store only events
9292
// 3. Update status accordingly
9393

94+
/*
95+
TODO
96+
seems the hostNetwork pod can't connect to the service network / DNS lookup the service:
97+
grpc: addrConn.createTransport failed to connect to {Addr: "events-etcd.openshift-etcd.svc.cluster.local:2379", ServerName: "events-etcd.openshift-etcd.svc.cluster.local:2379", }. Err: connection error: desc = "transport: Error while dialing: dial tcp: lookup events-etcd.openshift-etcd.svc.cluster.local on 169.254.169.254:53: no such host"
98+
*/
99+
100+
/*
101+
TODO Patch for KAS-O
102+
apiVersion: operator.openshift.io/v1
103+
kind: KubeAPIServer
104+
spec:
105+
unsupportedConfigOverrides:
106+
apiServerArguments:
107+
etcd-servers-overrides:
108+
- "/kubernetes.io/events#https://events-etcd.openshift-etcd.svc.cluster.local:2379"
109+
110+
# TODO also problematic somehow:
111+
# err="--etcd-servers-overrides invalid, must be of format: group/resource#servers, where servers are URLs, semicolon separated"
112+
113+
*/
114+
115+
/*
116+
TODO using pod IPs instead will result in cert errors like:
117+
desc = "transport: authentication handshake failed: tls: failed to verify certificate: x509: certificate is valid for 10.0.0.5, 127.0.0.1, ::1, not 10.130.0.68"
118+
*/
119+
120+
// TODO one can verify whether it works by watching the event prefix on etcdctl
121+
// sh-5.1# etcdctl watch /kubernetes.io/events --prefix
122+
94123
_, _, updateErr := v1helpers.UpdateStatus(ctx, c.operatorClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{
95124
Type: "DedicatedEtcdForEventsAvailable",
96125
Status: operatorv1.ConditionTrue,

pkg/tlshelpers/tlshelpers.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ func getServerHostNames(nodeInternalIPs []string) []string {
6565
"etcd.kube-system.svc",
6666
"etcd.kube-system.svc.cluster.local",
6767
"etcd.openshift-etcd.svc",
68+
"events-etcd.openshift-etcd.svc",
6869
"etcd.openshift-etcd.svc.cluster.local",
70+
"events-etcd.openshift-etcd.svc.cluster.local",
6971
"127.0.0.1",
7072
"::1",
7173
// "0:0:0:0:0:0:0:1" will be automatically collapsed to "::1", so we don't have to add it on top

0 commit comments

Comments
 (0)