Skip to content

Commit b00d888

Browse files
tmessiHugo Vieira
and
Hugo Vieira
authored
ci(schema-diff): Add script to generate diff of database schema (hashicorp#2924)
The script should be given a base branch to compare against, if not set it defaults to main. The script will apply the schema migrations for both the base branch and the current commit and generate dumps that are then compared to create a diff. The dumps are left in a temporary directory so that they can be compared using a users preferred diff tool. For example, generate the dumps and diff against main ./scripts/schema-diff.sh main Examine the diff to database functions using delta: delta .schema-diff/funcs_$(git rev-parse main) .schema-diff/funcs_$(git rev-parse HEAD) This also adds a github action that runs the script on pull-requests that contain schema changes and reports the diffs as a comment on the pull-request. Co-authored-by: Hugo Vieira <[email protected]>
1 parent 21e73e4 commit b00d888

File tree

4 files changed

+299
-0
lines changed

4 files changed

+299
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) HashiCorp, Inc.
3+
# SPDX-License-Identifier: MPL-2.0
4+
5+
# This script finds all issue comments on a PR that match a specific prefix and
6+
# have been posted by a specific user and minimizes them.
7+
8+
die() {
9+
# We don't ever return a non-zero status code because GitHub actions exits out
10+
# of the workflow if any of the commands exit with a non-zero status code. In
11+
# the case of this script, which is tailored to run in GitHub Actions, the
12+
# next operation (post a new comment with the new db schema diff) should still
13+
# run even when this script fails.
14+
echo "$@"
15+
exit 0
16+
}
17+
18+
which jq &> /dev/null || die "jq must be installed"
19+
which curl &> /dev/null || die "curl must be installed"
20+
21+
gh_api_url=$1
22+
gh_gql_url=$2
23+
gh_token=$3
24+
gh_repo=$4
25+
gh_pr_number=$5
26+
gh_comment_prefix=$6
27+
gh_user_login=$7
28+
29+
# List all comments for the Pull Request we're working on.
30+
echo "Listing all issue comments for PR #$gh_pr_number"
31+
curl -fsSX GET \
32+
-H "Accept: application/vnd.github+json" \
33+
-H "Authorization: Bearer $gh_token" \
34+
--output response.json \
35+
$gh_api_url/repos/$gh_repo/issues/$gh_pr_number/comments \
36+
|| die "Failed to list all issue comments for PR #$gh_pr_number"
37+
38+
[[ $(jq 'length' response.json) -gt 0 ]] || die "No comments found for PR #$gh_pr_number, nothing to hide"
39+
40+
# Use jq to find all comments we've posted before (matches against the action
41+
# runner's user login and a partial string match on comment body).
42+
cat response.json \
43+
| jq \
44+
--arg user_login "$gh_user_login" \
45+
--arg pfx "$gh_comment_prefix" \
46+
'.[] |
47+
select(.user.login == $user_login) |
48+
select(.body | startswith($pfx)) |
49+
.node_id
50+
' > comment_ids.txt \
51+
|| die 'Failed to parse issue comments response'
52+
53+
[[ $(cat comment_ids.txt | wc -l) -gt 0 ]] || die "No comments matching message prefix and github user id ($gh_user_login) found"
54+
55+
# Build GitHub GraphQL queries for each comment id. Because GitHub doesn't
56+
# return whether a comment is already hidden or not in its comment listing
57+
# endpoint, we have to hide all of them.
58+
while IFS= read -r node_id; do
59+
echo "mutation { minimizeComment(input: {subjectId: $node_id, classifier: OUTDATED}) { minimizedComment { isMinimized } } }" >> graphql.txt
60+
done <<< "$(cat comment_ids.txt)"
61+
62+
# Parse it through jq to build a valid json object.
63+
while IFS= read -r graphql; do
64+
jq --null-input -c --arg q "$graphql" '{"query": $q}' >> hide_queries.json \
65+
|| die "Failed to create http minimizeComment query for graphql query $graphql"
66+
done <<< "$(cat graphql.txt)"
67+
68+
# Hide Comments
69+
echo 'Issuing GraphQL calls to GitHub to hide previous schema-diff comments'
70+
while IFS= read -r hide_query; do
71+
curl -fsSX POST \
72+
-H "Authorization: Bearer $gh_token" \
73+
-d "$hide_query" \
74+
$gh_gql_url \
75+
|| die "Failed to issue request to minimize comment"
76+
done <<< "$(cat hide_queries.json)"

.github/workflows/schema-diff.yml

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
name: schema-diff
2+
3+
on:
4+
pull_request:
5+
paths:
6+
- 'internal/db/schema/migrations/**/*.sql'
7+
- 'scripts/schema-diff.sh'
8+
- '.github/scripts/schema-diff-hide-gh-comments.sh'
9+
- '.github/workflows/schema-diff.yml'
10+
11+
permissions:
12+
contents: read
13+
issues: write
14+
pull-requests: write
15+
16+
jobs:
17+
schema-diff:
18+
name: "Schema Diff"
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
22+
with:
23+
fetch-depth: '0'
24+
- name: Generate Schema Diff
25+
env:
26+
PGPASSWORD: boundary
27+
run: |
28+
# Run schema-diff between the code in the PR and its base.
29+
./scripts/schema-diff.sh "origin/${GITHUB_BASE_REF}"
30+
31+
gh_comment_prefix='Database schema diff between'
32+
gh_user_login='github-actions[bot]'
33+
34+
# Hide previous schema-diff comments we've posted.
35+
echo 'Hiding previous PR comments'
36+
./.github/scripts/schema-diff-hide-gh-comments.sh \
37+
"$GITHUB_API_URL" \
38+
"$GITHUB_GRAPHQL_URL" \
39+
"${{ secrets.GITHUB_TOKEN }}" \
40+
"$GITHUB_REPOSITORY" \
41+
"${{ github.event.pull_request.number }}" \
42+
"$gh_comment_prefix" \
43+
"$gh_user_login"
44+
45+
# getdiff echoes the diff in GitHub markdown syntax or "Unchanged" if
46+
# the file is empty.
47+
getdiff() {
48+
if [[ -s $1 ]]; then
49+
echo \`\`\`diff
50+
echo "$(cat $1)"
51+
echo \`\`\`
52+
else
53+
echo "Unchanged"
54+
fi
55+
}
56+
57+
# Build heredoc with all the diffs the schema diff tool generated.
58+
echo 'Building new GitHub schema-diff comment'
59+
cat << EOF > github-comment.txt
60+
$gh_comment_prefix \`${GITHUB_BASE_REF}\` and \`${GITHUB_HEAD_REF}\` @ ${{ github.event.pull_request.head.sha }}
61+
62+
#### Functions
63+
$(getdiff .schema-diff/funcs.diff)
64+
65+
#### Tables
66+
$(getdiff .schema-diff/tables.diff)
67+
68+
#### Views
69+
$(getdiff .schema-diff/views.diff)
70+
71+
#### Triggers
72+
$(getdiff .schema-diff/triggers.diff)
73+
74+
#### Indexes
75+
$(getdiff .schema-diff/indexes.diff)
76+
77+
#### Post Data
78+
$(getdiff .schema-diff/post_data.diff)
79+
EOF
80+
81+
# Parse it through jq to build a valid json object.
82+
jq --null-input \
83+
--arg comment "$(cat github-comment.txt)" \
84+
'{"body": $comment}' > body.json
85+
86+
# Post comment on PR.
87+
echo "Posting new GitHub schema-diff comment under PR #${{ github.event.pull_request.number }}"
88+
curl -sX POST \
89+
-H "Accept: application/vnd.github+json" \
90+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
91+
-d @body.json \
92+
$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/issues/${{ github.event.pull_request.number }}/comments

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,5 @@ enos/ci/*/.terraform.lock.hcl
143143
!.release/ci.hcl
144144
dist
145145
out
146+
147+
.schema-diff/

scripts/schema-diff.sh

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) HashiCorp, Inc.
3+
# SPDX-License-Identifier: MPL-2.0
4+
5+
6+
die() {
7+
echo "$@"
8+
exit 255
9+
}
10+
11+
which pg_dump &> /dev/null || die "pg_dump must be installed"
12+
which pg_restore &> /dev/null || die "pg_restore must be installed"
13+
which pg_isready &> /dev/null || die "pg_isready must be installed"
14+
which awk &> /dev/null || die "awk must be installed"
15+
which git &> /dev/null || die "git must be installed"
16+
which make &> /dev/null || die "make must be installed"
17+
18+
set -e
19+
20+
SQL_TEST_DB_PORT=${SQL_TEST_DB_PORT:=5432}
21+
export SQL_TEST_DB_PORT
22+
23+
DB_HOST=${DB_HOST:=127.0.0.1}
24+
25+
base_branch=${1}
26+
if [[ -z "${base_branch}" ]]; then
27+
base_branch="main"
28+
fi
29+
base_commit=$(git rev-parse "${base_branch}")
30+
31+
new_branch=$(git rev-parse --abbrev-ref HEAD)
32+
new_commit=$(git rev-parse HEAD)
33+
34+
tmp_dir=".schema-diff"
35+
36+
extract() {
37+
local suffix=$1
38+
local dump="${tmp_dir}/${suffix}.dump"
39+
40+
mkdir -p \
41+
"${tmp_dir}/funcs_${suffix}" \
42+
"${tmp_dir}/tables_${suffix}" \
43+
"${tmp_dir}/views_${suffix}" \
44+
"${tmp_dir}/triggers_${suffix}" \
45+
"${tmp_dir}/indexes_${suffix}"
46+
47+
echo "extracting function definitions from ${dump}"
48+
while read -r f; do
49+
fname="${f%(*}"
50+
pg_restore -s -O -P "${f}" -f - "${dump}" | tr '[:upper:]' '[:lower:]' > "${tmp_dir}/funcs_${suffix}/${fname}.sql"
51+
done < <(pg_restore -l "${dump}" -f - | awk '$4 == "FUNCTION" {for(i=6;i<NF;i++) printf $i" "; print ""}')
52+
53+
echo "extracting table definitions from ${dump}"
54+
while read -r t; do
55+
tname="${t%(*}"
56+
pg_restore -s -O -t "${t}" -f - "${dump}" | tr '[:upper:]' '[:lower:]' > "${tmp_dir}/tables_${suffix}/${tname}.sql"
57+
done < <(pg_restore -l "${dump}" -f - | awk '$4 == "TABLE" {for(i=6;i<NF;i++) printf $i" "; print ""}')
58+
59+
echo "extracting view definitions from ${dump}"
60+
while read -r v; do
61+
vname="${v%(*}"
62+
pg_restore -s -O -t "${v}" -f - "${dump}" | tr '[:upper:]' '[:lower:]' > "${tmp_dir}/views_${suffix}/${vname}.sql"
63+
done < <(pg_restore -l "${dump}" -f - | awk '$4 == "VIEW" {for(i=6;i<NF;i++) printf $i" "; print ""}')
64+
65+
echo "extracting trigger definitions from ${dump}"
66+
while read -r t; do
67+
tname="${t%(*}"
68+
pg_restore -s -O -T "${t}" -f - "${dump}" | tr '[:upper:]' '[:lower:]' > "${tmp_dir}/triggers_${suffix}/${tname}.sql"
69+
done < <(pg_restore -l "${dump}" -f - | awk '$4 == "TRIGGER" {for(i=6;i<NF;i++) printf $i" "; print ""}')
70+
71+
echo "extracting index definitions from ${dump}"
72+
while read -r d; do
73+
dname="${d%(*}"
74+
pg_restore -s -O -I "${d}" -f - "${dump}" | tr '[:upper:]' '[:lower:]' > "${tmp_dir}/indexes_${suffix}/${dname}.sql"
75+
done < <(pg_restore -l "${dump}" -f - | awk '$4 == "INDEX" {for(i=6;i<NF;i++) printf $i" "; print ""}')
76+
77+
echo "extracting post-data from ${dump}"
78+
pg_restore --section=post-data -O -f - "${dump}" | tr '[:upper:]' '[:lower:]' > "${tmp_dir}/post_data_${suffix}.sql"
79+
}
80+
81+
dump() {
82+
local suffix=$1
83+
local dump="${tmp_dir}/${suffix}.dump"
84+
85+
mkdir -p "${tmp_dir}"
86+
87+
make -C internal/db/sqltest clean
88+
make -C internal/db/sqltest database-up
89+
max=120
90+
c=0
91+
until pg_isready -h "${DB_HOST}" -p "${SQL_TEST_DB_PORT}"; do
92+
((c+=1))
93+
if [[ $c -ge $max ]]; then
94+
docker logs boundary-sql-tests
95+
make -C internal/db/sqltest clean
96+
die "timeout waiting for database, likely an error in a migration"
97+
fi
98+
sleep 1
99+
done
100+
101+
echo "dumping to ${dump}"
102+
pg_dump -Fc -h "${DB_HOST}" -U boundary -f "${dump}"
103+
104+
make -C internal/db/sqltest clean
105+
}
106+
107+
rm -rf "${tmp_dir}"
108+
109+
echo "Comparing schema between ${new_branch}@${new_commit} ${base_branch}@${base_commit}"
110+
111+
dump "${new_commit}"
112+
extract "${new_commit}"
113+
114+
git checkout "${base_commit}"
115+
116+
dump "${base_commit}"
117+
extract "${base_commit}"
118+
119+
if [[ "${new_branch}" == "HEAD" ]]; then
120+
git checkout "${new_commit}"
121+
else
122+
git checkout "${new_branch}"
123+
fi
124+
125+
for t in "funcs" "tables" "views" "triggers" "indexes"; do
126+
git diff --no-index "${tmp_dir}/${t}_${base_commit}" "${tmp_dir}/${t}_${new_commit}" | tee "${tmp_dir}/${t}.diff"
127+
done
128+
129+
git diff --no-index "${tmp_dir}/post_data_${base_commit}.sql" "${tmp_dir}/post_data_${new_commit}.sql" | tee "${tmp_dir}/post_data.diff"

0 commit comments

Comments
 (0)