Skip to content

Commit 1c8064c

Browse files
authored
Merge pull request #33486 from def-/pr-nightly40
Nightly fixes (2025-09-01)
2 parents 2c7cfb9 + 4cf84ca commit 1c8064c

File tree

5 files changed

+67
-10
lines changed

5 files changed

+67
-10
lines changed

ci/nightly/pipeline.template.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ steps:
13621362
depends_on: build-aarch64
13631363
timeout_in_minutes: 1200
13641364
concurrency: 1
1365-
concurrency_group: 'cloud-canary'
1365+
concurrency_group: 'mz-e2e'
13661366
agents:
13671367
queue: linux-aarch64-small
13681368
plugins:
@@ -1375,7 +1375,7 @@ steps:
13751375
depends_on: build-aarch64
13761376
timeout_in_minutes: 1200
13771377
concurrency: 1
1378-
concurrency_group: 'mz-e2e'
1378+
concurrency_group: 'cloud-canary'
13791379
agents:
13801380
# Requires real Mz access, CONFLUENT_CLOUD_DEVEX_KAFKA_USERNAME, etc.
13811381
queue: linux-aarch64-small
@@ -1813,7 +1813,8 @@ steps:
18131813
artifact_paths: [parallel-workload-queries.log.zst]
18141814
timeout_in_minutes: 90
18151815
agents:
1816-
queue: hetzner-x86-64-8cpu-16gb
1816+
# Azure blob store uses more memory
1817+
queue: hetzner-x86-64-16cpu-32gb
18171818
plugins:
18181819
- ./ci/plugins/mzcompose:
18191820
composition: parallel-workload

ci/plugins/mzcompose/hooks/command

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ mzcompose() {
1919
stdbuf --output=L --error=L bin/ci-builder run "$builder" bin/mzcompose --find "$BUILDKITE_PLUGIN_MZCOMPOSE_COMPOSITION" "$@"
2020
}
2121

22+
kubectl() {
23+
bin/ci-builder run stable kubectl --context="$K8S_CONTEXT" "$@"
24+
}
25+
2226
faketty() {
2327
script -qfc "$(printf "%q " "$@")" /dev/null
2428
}
@@ -99,7 +103,7 @@ cleanup() {
99103
printf "\n%s" "$BUILDKITE_LABEL: test timed out" >> run.log
100104
fi
101105

102-
echo "--- Post command steps"
106+
ci_unimportant_heading "Post command steps"
103107
# Run before potential "run down" in coverage
104108
docker ps --all --quiet | xargs --no-run-if-empty docker inspect | jq '
105109
.[]
@@ -126,6 +130,47 @@ cleanup() {
126130
ps aux | sed -E "s/\S*mzp_\S*/[REDACTED]/g" > ps-aux.log
127131
docker stats --all --no-stream > docker-stats.log
128132

133+
if [ "$BUILDKITE_PLUGIN_MZCOMPOSE_COMPOSITION" = "orchestratord" ]; then
134+
ci_unimportant_heading "orchestratord test: Uploading logs..."
135+
K8S_CLUSTER_NAME=kind
136+
K8S_CONTEXT="kind-$K8S_CLUSTER_NAME"
137+
for pod in $(kubectl get pods -o name | grep -v -E 'kubernetes|minio|cockroach|redpanda'); do
138+
kubectl logs --prefix=true "$pod" &>> kubectl-get-logs.log || true
139+
kubectl logs --previous --prefix=true "$pod" &>> kubectl-get-logs-previous.log || true
140+
done
141+
kubectl get events > kubectl-get-events.log || true
142+
kubectl get all > kubectl-get-all.log || true
143+
kubectl get events > kubectl-get-events.log || true
144+
kubectl get all > kubectl-get-all.log || true
145+
kubectl describe all | awk '
146+
BEGIN { redact=0 }
147+
/^[[:space:]]*Environment:/ {
148+
indent = match($0, /[^ ]/) - 1
149+
print substr($0, 1, indent) "Environment: [REDACTED]"
150+
redact = 1
151+
next
152+
}
153+
redact {
154+
current_indent = match($0, /[^ ]/) - 1
155+
if (current_indent <= indent || NF == 0) {
156+
redact = 0
157+
} else {
158+
next
159+
}
160+
}
161+
{ print }
162+
' > kubectl-describe-all.log || true
163+
kubectl get pods -o wide > kubectl-pods-with-nodes.log || true
164+
165+
kubectl -n kube-system get events > kubectl-get-events-kube-system.log || true
166+
kubectl -n kube-system get all > kubectl-get-all-kube-system.log || true
167+
kubectl -n kube-system describe all > kubectl-describe-all-kube-system.log || true
168+
169+
mapfile -t artifacts < <(printf "kubectl-get-logs.log\nkubectl-get-logs-previous.log\nkubectl-get-events.log\nkubectl-get-all.log\nkubectl-describe-all.log\nkubectl-pods-with-nodes.log\nkubectl-get-events-kube-system.log\nkubectl-get-all-kube-system.log\nkubectl-describe-all-kube-system.log\nkail-output.log\n")
170+
artifacts_str=$(IFS=";"; echo "${artifacts[*]}")
171+
buildkite-agent artifact upload "$artifacts_str"
172+
fi
173+
129174
mv "$cores" . || true
130175

131176
if find cores -name 'core.*' | grep -q .; then
@@ -174,7 +219,7 @@ cleanup() {
174219
mapfile -t artifacts < <(printf "run.log\nservices.log\njournalctl-merge.log\nnetstat-ant.log\nnetstat-panelot.log\nps-aux.log\ndocker-inspect.log\n"; find . -name 'junit_*.xml' -printf '%P\n'; find . -maxdepth 1 -name 'mz_debug_*.log' -printf '%P\n'; find . -maxdepth 1 -name 'slt*.diff' -printf '%P\n')
175220
artifacts_str=$(IFS=";"; echo "${artifacts[*]}")
176221

177-
echo "--- Running trufflehog to scan artifacts for secrets & uploading artifacts"
222+
ci_unimportant_heading "Running trufflehog to scan artifacts for secrets & uploading artifacts"
178223
{
179224
bin/ci-builder run "$builder" trufflehog --no-update --no-verification --json --exclude-detectors=coda,dockerhub,box,npmtoken,github,snykkey,eightxeight,sumologickey,miro,fmfw,logzio,qase,cannyio,uplead,tatumio filesystem "${artifacts[@]}" | trufflehog_jq_filter_logs > trufflehog.log
180225
} &
@@ -187,7 +232,7 @@ cleanup() {
187232
buildkite-agent artifact upload "$artifacts_str" || true
188233
} &
189234
wait
190-
echo "--- Annotating errors"
235+
ci_unimportant_heading "Annotating errors"
191236
bin/ci-builder run "$builder" bin/ci-annotate-errors --test-cmd="$TEST_CMD" --test-desc="$TEST_DESC" --test-result="$TEST_RESULT" "${artifacts[@]}" trufflehog.log > ci-annotate-errors.log || CI_ANNOTATE_ERRORS_RESULT=$?
192237
buildkite-agent artifact upload "ci-annotate-errors.log" &
193238

misc/python/materialize/parallel_workload/action.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,8 @@ def run(self, exe: Executor) -> bool:
698698
# Was dropped while we were acquiring lock
699699
if table not in exe.db.tables:
700700
return False
701+
if len(exe.db.tables) <= 2:
702+
return False
701703

702704
query = f"DROP TABLE {table}"
703705
exe.execute(query, http=Http.RANDOM)
@@ -894,6 +896,8 @@ def run(self, exe: Executor) -> bool:
894896
# Was dropped while we were acquiring lock
895897
if db not in exe.db.dbs:
896898
return False
899+
if len(exe.db.dbs) <= 1:
900+
return False
897901

898902
query = f"DROP DATABASE {db} RESTRICT"
899903
exe.execute(query, http=Http.RANDOM)
@@ -929,6 +933,8 @@ def run(self, exe: Executor) -> bool:
929933
# Was dropped while we were acquiring lock
930934
if schema not in exe.db.schemas:
931935
return False
936+
if len(exe.db.schemas) <= 1:
937+
return False
932938

933939
query = f"DROP SCHEMA {schema}"
934940
exe.execute(query, http=Http.RANDOM)
@@ -1511,6 +1517,10 @@ def run(self, exe: Executor) -> bool:
15111517
if cluster not in exe.db.clusters:
15121518
return False
15131519

1520+
# Avoid removing all clusters
1521+
if len(exe.db.clusters) <= 1:
1522+
return False
1523+
15141524
query = f"DROP CLUSTER {cluster}"
15151525
try:
15161526
exe.execute(query, http=Http.RANDOM)
@@ -1942,7 +1952,7 @@ def run(self, exe: Executor) -> bool:
19421952
):
19431953
self.composition.up(mz_service, detach=True)
19441954
self.composition.await_mz_deployment_status(
1945-
DeploymentStatus.READY_TO_PROMOTE, mz_service
1955+
DeploymentStatus.READY_TO_PROMOTE, mz_service, timeout=1800
19461956
)
19471957
self.composition.promote_mz(mz_service)
19481958
self.composition.await_mz_deployment_status(

test/aws/mzcompose.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,10 @@ def test_s3tablesrest_connection(c: Composition, ctx: TestContext):
247247
customer_role = f"testdrive-{ctx.seed}-Customer"
248248
customer_role_arn = f"arn:aws:iam::{ctx.account_id}:role/{customer_role}"
249249
c.sql(
250-
f"CREATE CONNECTION aws_assume_role TO AWS (ASSUME ROLE ARN '{customer_role_arn}')"
250+
f"CREATE CONNECTION aws_assume_role_s3tablesrest TO AWS (ASSUME ROLE ARN '{customer_role_arn}')"
251251
)
252252
connection_id = c.sql_query(
253-
"SELECT id FROM mz_connections WHERE name = 'aws_assume_role'"
253+
"SELECT id FROM mz_connections WHERE name = 'aws_assume_role_s3tablesrest'"
254254
)[0][0]
255255

256256
principal = c.sql_query(
@@ -289,7 +289,7 @@ def test_s3tablesrest_connection(c: Composition, ctx: TestContext):
289289
c.sleep(ctx.iam_propagation_seconds)
290290

291291
c.sql(
292-
f"CREATE CONNECTION s3tables TO ICEBERG CATALOG (CATALOG TYPE = 's3tablesrest', URL = 'https://s3tables.us-east-1.amazonaws.com/iceberg', WAREHOUSE = '{bucket['arn']}', AWS CONNECTION = aws_assume_role)"
292+
f"CREATE CONNECTION s3tables TO ICEBERG CATALOG (CATALOG TYPE = 's3tablesrest', URL = 'https://s3tables.us-east-1.amazonaws.com/iceberg', WAREHOUSE = '{bucket['arn']}', AWS CONNECTION = aws_assume_role_s3tablesrest)"
293293
)
294294
finally:
295295
if bucket is not None:

test/upsert/mzcompose.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
additional_system_parameter_defaults={
4747
"unsafe_enable_unorchestrated_cluster_replicas": "true",
4848
"storage_dataflow_delay_sources_past_rehydration": "true",
49+
"memory_limiter_interval": "0",
4950
},
5051
environment_extra=materialized_environment_extra,
5152
default_replication_factor=2,

0 commit comments

Comments
 (0)