Skip to content

Commit 01a18f8

Browse files
authored
feat(pubsub-avro): add a pubsub avro example (#292)
1 parent 954b9d5 commit 01a18f8

File tree

8 files changed

+465
-0
lines changed

8 files changed

+465
-0
lines changed

pubsub-avro/CMakeLists.txt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# ~~~
2+
# Copyright 2023 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ~~~
16+
17+
cmake_minimum_required(VERSION 3.20)
18+
19+
# Define the project name and where to report bugs.
20+
set(PACKAGE_BUGREPORT
21+
"https://github.com/GoogleCloudPlatform/cpp-samples/issues")
22+
project(pubsub-avro CXX)
23+
24+
find_package(google_cloud_cpp_pubsub CONFIG REQUIRED)
25+
find_package(unofficial-avro-cpp CONFIG REQUIRED)
26+
27+
# Generate the avro C++ files using the avro compiler.
28+
find_program(AVROGENCPP NAMES avrogencpp)
29+
macro (run_avro_compiler file namespace)
30+
add_custom_command(
31+
OUTPUT ${file}.h
32+
COMMAND
33+
${AVROGENCPP} ARGS --input
34+
"${CMAKE_CURRENT_SOURCE_DIR}/${file}.avro" --output ${file}.h
35+
--namespace ${namespace}
36+
DEPENDS ${AVROGENCPP} "${CMAKE_CURRENT_SOURCE_DIR}/${file}.avro"
37+
COMMENT "Executing Avro compiler")
38+
set_source_files_properties(${file}.h PROPERTIES GENERATED TRUE)
39+
endmacro (run_avro_compiler)
40+
41+
run_avro_compiler(schema1 v1)
42+
run_avro_compiler(schema2 v2)
43+
44+
add_executable(quickstart quickstart.cc schema1.h schema2.h)
45+
target_compile_features(quickstart PRIVATE cxx_std_14)
46+
target_include_directories(quickstart PRIVATE SYSTEM
47+
"${CMAKE_CURRENT_BINARY_DIR}")
48+
target_link_libraries(quickstart PRIVATE google-cloud-cpp::pubsub
49+
unofficial::avro-cpp::avrocpp)

pubsub-avro/README.md

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Subscribe to avro records
2+
3+
## Overview
4+
5+
The quickstart shows how to subscribe to receive avro messages that could be for
6+
different schema revisions. This example uses the [Avro C++] library and
7+
[C++ Cloud Pub/Sub] library to use the [Cloud Pub/Sub] service. The setup
8+
involves:
9+
10+
1. Creating an initial schema (Schema 1)
11+
1. Creating a topic with Schema 1
12+
1. Creating a subscription to the topic
13+
1. Publishing a message to the topic with Schema 1
14+
1. Commiting a revision schema (Schema 2)
15+
1. Publishing a message to the topic with Schema 2
16+
1. Recieve both messages using a subscriber
17+
18+
## Prerequisites
19+
20+
### 1. Create a project in the Google Cloud Platform Console
21+
22+
If you haven't already created a project, create one now.
23+
24+
Projects enable you to manage all Google Cloud Platform resources for your app,
25+
including deployment, access control, billing, and services.
26+
27+
1. Open the [Cloud Platform Console](https://console.cloud.google.com/).
28+
1. In the drop-down menu at the top, select Create a project.
29+
1. Give your project a name.
30+
1. Make a note of the project ID, which might be different from the project
31+
name. The project ID is used in commands and in configurations.
32+
33+
### 2. Enable billing for your project
34+
35+
If you haven't already enabled billing for your project,
36+
[enable billing now](https://console.cloud.google.com/project/_/settings).
37+
Enabling billing allows the application to consume billable resources such as
38+
Pub/Sub API calls.
39+
40+
See
41+
[Cloud Platform Console Help](https://support.google.com/cloud/answer/6288653)
42+
for more information about billing settings.
43+
44+
### 3. Enable APIs for your project
45+
46+
[Click here](https://console.cloud.google.com/flows/enableapi?apiid=speech&showconfirmation=true)
47+
to visit Cloud Platform Console and enable the Pub/Sub and Trace API via the UI.
48+
49+
Or use the CLI:
50+
51+
```
52+
gcloud services enable pubsub.googleapis.com
53+
```
54+
55+
## Build using CMake and Vcpkg
56+
57+
To build and run the sample, [setup a C++ development environment].
58+
59+
### 1. Install vcpkg
60+
61+
This project uses [`vcpkg`](https://github.com/microsoft/vcpkg) for dependency
62+
management. Clone the vcpkg repository to your preferred location. In these
63+
instructions we use`$HOME`:
64+
65+
```shell
66+
git clone -C $HOME https://github.com/microsoft/vcpkg.git
67+
cd $HOME/vcpkg
68+
./vcpkg install google-cloud-cpp
69+
```
70+
71+
### 2. Download or clone this repo
72+
73+
```shell
74+
git clone https://github.com/GoogleCloudPlatform/cpp-samples
75+
```
76+
77+
### 3. Compile these examples
78+
79+
Use the `vcpkg` toolchain file to download and compile dependencies. This file
80+
would be in the directory you cloned `vcpkg` into, `$HOME/vcpkg` if you are
81+
following the instructions to the letter. Note that building all the
82+
dependencies can take up to an hour, depending on the performance of your
83+
workstation. These dependencies are cached, so a second build should be
84+
substantially faster.
85+
86+
```sh
87+
cd cpp-samples/pubsub-open-telemetry
88+
cmake -S . -B .build -DCMAKE_TOOLCHAIN_FILE=$HOME/vcpkg/scripts/buildsystems/vcpkg.cmake -G Ninja
89+
cmake --build .build
90+
```
91+
92+
## Setup the Pub/Sub messages
93+
94+
Export the following environment variables to run the setup scripts:
95+
96+
```shell
97+
export GOOGLE_CLOUD_PROJECT=[PROJECT ID] # Use your project ID here
98+
export GOOGLE_CLOUD_TOPIC=avro-topic
99+
export GOOGLE_CLOUD_SUBSCRIPTION=avro-sub
100+
export GOOGLE_CLOUD_SCHEMA_NAME=state
101+
export GOOGLE_CLOUD_SCHEMA_FILE1=schema1.avro
102+
export GOOGLE_CLOUD_SCHEMA_FILE2=schema2.avro
103+
```
104+
105+
```shell
106+
./setup.sh
107+
```
108+
109+
## Run the example
110+
111+
This will resolve the schemas when recieving the message and return data in the
112+
format of schema2, even if it was sent in the format of schema1.
113+
114+
```sh
115+
.build/quickstart ${GOOGLE_CLOUD_PROJECT} avro-sub schema2.avro
116+
```
117+
118+
If you want to send more message to test, you can use the following commands to
119+
send a message in schema1
120+
121+
```sh
122+
gcloud pubsub topics publish ${GOOGLE_CLOUD_TOPIC} \
123+
--project ${GOOGLE_CLOUD_PROJECT} \
124+
--message '{"name": "New York", "post_abbr": "NY"}'
125+
```
126+
127+
Or in schema2
128+
129+
```sh
130+
gcloud pubsub topics publish ${GOOGLE_CLOUD_TOPIC} \
131+
--project ${GOOGLE_CLOUD_PROJECT} \
132+
--message '{"name": "New York", "post_abbr": "NY", "population": 10000}'
133+
```
134+
135+
## Cleanup
136+
137+
To delete the created resources (topic, subscription, schema), run:
138+
139+
```shell
140+
./cleanup.sh
141+
```
142+
143+
## Platform Specific Notes
144+
145+
### macOS
146+
147+
gRPC [requires][grpc-roots-pem-bug] an environment variable to configure the
148+
trust store for SSL certificates, you can download and configure this using:
149+
150+
```bash
151+
curl -Lo roots.pem https://pki.google.com/roots.pem
152+
export GRPC_DEFAULT_SSL_ROOTS_FILE_PATH="$PWD/roots.pem"
153+
```
154+
155+
### Windows
156+
157+
gRPC [requires][grpc-roots-pem-bug] an environment variable to configure the
158+
trust store for SSL certificates, you can download and configure this using:
159+
160+
```console
161+
@powershell -NoProfile -ExecutionPolicy unrestricted -Command ^
162+
(new-object System.Net.WebClient).Downloadfile( ^
163+
'https://pki.google.com/roots.pem', 'roots.pem')
164+
set GRPC_DEFAULT_SSL_ROOTS_FILE_PATH=%cd%\roots.pem
165+
```
166+
167+
[avro c++]: https://avro.apache.org/docs/1.11.1/api/cpp/html/
168+
[c++ cloud pub/sub]: https://cloud.google.com/cpp/docs/reference/pubsub/latest
169+
[cloud pub/sub]: https://cloud.google.com/pubsub/docs
170+
[grpc-roots-pem-bug]: https://github.com/grpc/grpc/issues/16571
171+
[setup a c++ development environment]: cloud.google.com/cpp/docs/setup

pubsub-avro/cleanup.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2023 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# Delete the topic, subscription, and schema.
18+
gcloud pubsub topics delete ${GOOGLE_CLOUD_TOPIC} "--project=${GOOGLE_CLOUD_PROJECT}"
19+
gcloud pubsub subscriptions delete ${GOOGLE_CLOUD_SUBSCRIPTION} "--project=${GOOGLE_CLOUD_PROJECT}"
20+
gcloud pubsub schemas delete ${GOOGLE_CLOUD_SCHEMA_NAME} "--project=${GOOGLE_CLOUD_PROJECT}"

pubsub-avro/quickstart.cc

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "avro/Compiler.hh"
16+
#include "avro/DataFile.hh"
17+
#include "avro/Decoder.hh"
18+
#include "avro/Stream.hh"
19+
#include "avro/ValidSchema.hh"
20+
#include "google/cloud/pubsub/message.h"
21+
#include "google/cloud/pubsub/schema_client.h"
22+
#include "google/cloud/pubsub/subscriber.h"
23+
#include "google/cloud/pubsub/subscription.h"
24+
#include "schema1.h"
25+
#include "schema2.h"
26+
#include <fstream>
27+
#include <iostream>
28+
#include <sstream>
29+
#include <string>
30+
31+
int main(int argc, char* argv[]) try {
32+
if (argc != 4) {
33+
std::cerr << "Usage: " << argv[0]
34+
<< " <project-id> <subscription-id> <avro-file>\n";
35+
return 1;
36+
}
37+
38+
std::string const project_id = argv[1];
39+
std::string const subscription_id = argv[2];
40+
std::string const avro_file = argv[3];
41+
42+
auto constexpr kWaitTimeout = std::chrono::seconds(30);
43+
44+
// Create a namespace alias to make the code easier to read.
45+
namespace pubsub = ::google::cloud::pubsub;
46+
47+
//! [START pubsub_subscribe_avro_records_with_revisions]
48+
auto subscriber = pubsub::Subscriber(pubsub::MakeSubscriberConnection(
49+
pubsub::Subscription(project_id, subscription_id)));
50+
51+
// Create a schema client.
52+
auto schema_client =
53+
pubsub::SchemaServiceClient(pubsub::MakeSchemaServiceConnection());
54+
55+
// Read the reader schema. This is the schema you want the messages to be
56+
// evaluated using.
57+
std::ifstream ifs(avro_file);
58+
avro::ValidSchema reader_schema;
59+
avro::compileJsonSchema(ifs, reader_schema);
60+
61+
std::unordered_map<std::string, avro::ValidSchema> revisions_to_schemas;
62+
auto session = subscriber.Subscribe(
63+
[&](pubsub::Message const& message, pubsub::AckHandler h) {
64+
// Get the reader schema revision for the message.
65+
auto schema_name = message.attributes()["googclient_schemaname"];
66+
auto schema_revision_id =
67+
message.attributes()["googclient_schemarevisionid"];
68+
// If we haven't received a message with this schema, look it up.
69+
if (revisions_to_schemas.find(schema_revision_id) ==
70+
revisions_to_schemas.end()) {
71+
auto schema_path = schema_name + "@" + schema_revision_id;
72+
// Use the schema client to get the path.
73+
auto schema = schema_client.GetSchema(schema_path);
74+
if (!schema) {
75+
std::cout << "Schema not found:" << schema_path << "\n";
76+
return;
77+
}
78+
avro::ValidSchema writer_schema;
79+
std::stringstream in;
80+
in << schema.value().definition();
81+
avro::compileJsonSchema(in, writer_schema);
82+
revisions_to_schemas[schema_revision_id] = writer_schema;
83+
}
84+
auto writer_schema = revisions_to_schemas[schema_revision_id];
85+
86+
auto encoding = message.attributes()["googclient_schemaencoding"];
87+
if (encoding == "JSON") {
88+
std::stringstream in;
89+
in << message.data();
90+
auto avro_in = avro::istreamInputStream(in);
91+
avro::DecoderPtr decoder = avro::resolvingDecoder(
92+
writer_schema, reader_schema, avro::jsonDecoder(writer_schema));
93+
decoder->init(*avro_in);
94+
95+
v2::State state;
96+
avro::decode(*decoder, state);
97+
std::cout << "Name: " << state.name << "\n";
98+
std::cout << "Postal Abbreviation: " << state.post_abbr << "\n";
99+
std::cout << "Population: " << state.population << "\n";
100+
} else {
101+
std::cout << "Unable to decode. Received message using encoding"
102+
<< encoding << "\n";
103+
}
104+
std::move(h).ack();
105+
});
106+
// [END pubsub_subscribe_avro_records_with_revisions]
107+
108+
std::cout << "Waiting for messages on " + subscription_id + "...\n";
109+
110+
// Blocks until the timeout is reached.
111+
auto result = session.wait_for(kWaitTimeout);
112+
if (result == std::future_status::timeout) {
113+
std::cout << "timeout reached, ending session\n";
114+
session.cancel();
115+
}
116+
117+
return 0;
118+
} catch (google::cloud::Status const& status) {
119+
std::cerr << "google::cloud::Status thrown: " << status << "\n";
120+
return 1;
121+
} catch (const std::exception& error) {
122+
std::cout << error.what() << std::endl;
123+
}

pubsub-avro/schema1.avro

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"type": "record",
3+
"name": "State",
4+
"namespace": "utilities",
5+
"doc": "A list of states in the United States of America.",
6+
"fields": [
7+
{
8+
"name": "name",
9+
"type": "string",
10+
"doc": "The common name of the state."
11+
},
12+
{
13+
"name": "post_abbr",
14+
"type": "string",
15+
"doc": "The postal code abbreviation of the state."
16+
}
17+
]
18+
}

0 commit comments

Comments
 (0)