Skip to content

Commit 8e55f1f

Browse files
Merge pull request #279 from m-lab/sandbox-soltesz-geohash
Add client-geohash query and update server-metro query
2 parents a427f35 + 8b5a92a commit 8e55f1f

File tree

4 files changed

+271
-26
lines changed

4 files changed

+271
-26
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#standardSQL
2+
3+
CREATE TEMP FUNCTION EncodeGeoHASH(latitude FLOAT64, longitude FLOAT64, hashLength INT64)
4+
RETURNS STRING
5+
LANGUAGE js AS """
6+
/**
7+
* Function sources derived from node-geohash library:
8+
* https://github.com/sunng87/node-geohash/blob/master/main.js
9+
*
10+
* Copyright (c) 2011, Sun Ning.
11+
*
12+
* Permission is hereby granted, free of charge, to any person
13+
* obtaining a copy of this software and associated documentation
14+
* files (the "Software"), to deal in the Software without
15+
* restriction, including without limitation the rights to use, copy,
16+
* modify, merge, publish, distribute, sublicense, and/or sell copies
17+
* of the Software, and to permit persons to whom the Software is
18+
* furnished to do so, subject to the following conditions:
19+
*
20+
* The above copyright notice and this permission notice shall be
21+
* included in all copies or substantial portions of the Software.
22+
*
23+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30+
* SOFTWARE.
31+
*
32+
*/
33+
var BASE32_CODES = "0123456789bcdefghjkmnpqrstuvwxyz";
34+
var SIGFIG_HASH_LENGTH = [0, 5, 7, 8, 11, 12, 13, 15, 16, 17, 18];
35+
function encodeGeohash(latitude, longitude) {
36+
var numberOfChars = hashLength;
37+
38+
var chars = [],
39+
bits = 0,
40+
bitsTotal = 0,
41+
hash_value = 0,
42+
maxLat = 90,
43+
minLat = -90,
44+
maxLon = 180,
45+
minLon = -180,
46+
mid;
47+
while (chars.length < numberOfChars) {
48+
if (bitsTotal % 2 === 0) {
49+
mid = (maxLon + minLon) / 2;
50+
if (longitude > mid) {
51+
hash_value = (hash_value << 1) + 1;
52+
minLon = mid;
53+
} else {
54+
hash_value = (hash_value << 1) + 0;
55+
maxLon = mid;
56+
}
57+
} else {
58+
mid = (maxLat + minLat) / 2;
59+
if (latitude > mid) {
60+
hash_value = (hash_value << 1) + 1;
61+
minLat = mid;
62+
} else {
63+
hash_value = (hash_value << 1) + 0;
64+
maxLat = mid;
65+
}
66+
}
67+
68+
bits++;
69+
bitsTotal++;
70+
if (bits === 5) {
71+
var code = BASE32_CODES[hash_value];
72+
chars.push(code);
73+
bits = 0;
74+
hash_value = 0;
75+
}
76+
}
77+
return chars.join('');
78+
};
79+
return encodeGeohash(latitude, longitude);
80+
""";
81+
SELECT
82+
APPROX_QUANTILES(
83+
IF(direction = "s2c", download, NULL), 10)[OFFSET(5)] AS value_download_median_rate,
84+
APPROX_QUANTILES(
85+
IF(direction = "c2s", upload, NULL), 10)[OFFSET(5)] AS value_upload_median_rate,
86+
EncodeGeoHASH(latitude, longitude, 4) AS geohash,
87+
country_code,
88+
continent_code,
89+
COUNT(*) AS value_tests
90+
FROM (
91+
SELECT
92+
-- Direction
93+
CASE connection_spec.data_direction
94+
WHEN 0 THEN "c2s"
95+
WHEN 1 THEN "s2c"
96+
ELSE "error"
97+
END AS direction,
98+
-- Download as bits-per-second
99+
8 * 1000000 * (web100_log_entry.snap.HCThruOctetsAcked / (
100+
web100_log_entry.snap.SndLimTimeRwin +
101+
web100_log_entry.snap.SndLimTimeCwnd +
102+
web100_log_entry.snap.SndLimTimeSnd)) AS download,
103+
-- Upload as bits-per-second
104+
8 * 1000000 * (web100_log_entry.snap.HCThruOctetsReceived /
105+
web100_log_entry.snap.Duration) AS upload,
106+
-- Client Lat/Lon.
107+
connection_spec.client_geolocation.latitude AS latitude,
108+
connection_spec.client_geolocation.longitude AS longitude,
109+
connection_spec.client_geolocation.country_code AS country_code,
110+
connection_spec.client_geolocation.continent_code AS continent_code
111+
FROM
112+
`measurement-lab.base_tables.ndt`
113+
WHERE
114+
-- For faster queries we use _PARTITIONTIME boundaries. And, to
115+
-- guarantee the _PARTITIONTIME data is "complete" (all data collected
116+
-- and parsed) we should wait 36 hours after start of a given day.
117+
-- The following is equivalent to the pseudo code: date(now() - 12h) - 1d
118+
_PARTITIONTIME = TIMESTAMP_SUB(TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 12 HOUR), DAY), INTERVAL 24 HOUR)
119+
-- Basic test quality filters for safe division.
120+
AND web100_log_entry.snap.Duration > 0
121+
AND (web100_log_entry.snap.SndLimTimeRwin +
122+
web100_log_entry.snap.SndLimTimeCwnd +
123+
web100_log_entry.snap.SndLimTimeSnd) > 0
124+
AND web100_log_entry.snap.CountRTT > 0
125+
AND web100_log_entry.snap.HCThruOctetsReceived > 0
126+
AND web100_log_entry.snap.HCThruOctetsAcked > 0
127+
-- AND connection_spec.tls IS TRUE
128+
)
129+
GROUP BY
130+
geohash, country_code, continent_code
131+
HAVING
132+
value_tests > 10
133+
AND value_download_median_rate IS NOT NULL
134+
AND value_upload_median_rate IS NOT NULL
135+
AND geohash IS NOT NULL
136+
ORDER BY
137+
value_tests,
138+
geohash
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#standardSQL
2+
3+
CREATE TEMP FUNCTION EncodeGeoHASH(latitude FLOAT64, longitude FLOAT64, hashLength INT64)
4+
RETURNS STRING
5+
LANGUAGE js AS """
6+
/**
7+
* Function sources derived from node-geohash library:
8+
* https://github.com/sunng87/node-geohash/blob/master/main.js
9+
*
10+
* Copyright (c) 2011, Sun Ning.
11+
*
12+
* Permission is hereby granted, free of charge, to any person
13+
* obtaining a copy of this software and associated documentation
14+
* files (the "Software"), to deal in the Software without
15+
* restriction, including without limitation the rights to use, copy,
16+
* modify, merge, publish, distribute, sublicense, and/or sell copies
17+
* of the Software, and to permit persons to whom the Software is
18+
* furnished to do so, subject to the following conditions:
19+
*
20+
* The above copyright notice and this permission notice shall be
21+
* included in all copies or substantial portions of the Software.
22+
*
23+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30+
* SOFTWARE.
31+
*
32+
*/
33+
var BASE32_CODES = "0123456789bcdefghjkmnpqrstuvwxyz";
34+
var SIGFIG_HASH_LENGTH = [0, 5, 7, 8, 11, 12, 13, 15, 16, 17, 18];
35+
function encodeGeohash(latitude, longitude) {
36+
var numberOfChars = hashLength;
37+
38+
var chars = [],
39+
bits = 0,
40+
bitsTotal = 0,
41+
hash_value = 0,
42+
maxLat = 90,
43+
minLat = -90,
44+
maxLon = 180,
45+
minLon = -180,
46+
mid;
47+
while (chars.length < numberOfChars) {
48+
if (bitsTotal % 2 === 0) {
49+
mid = (maxLon + minLon) / 2;
50+
if (longitude > mid) {
51+
hash_value = (hash_value << 1) + 1;
52+
minLon = mid;
53+
} else {
54+
hash_value = (hash_value << 1) + 0;
55+
maxLon = mid;
56+
}
57+
} else {
58+
mid = (maxLat + minLat) / 2;
59+
if (latitude > mid) {
60+
hash_value = (hash_value << 1) + 1;
61+
minLat = mid;
62+
} else {
63+
hash_value = (hash_value << 1) + 0;
64+
maxLat = mid;
65+
}
66+
}
67+
68+
bits++;
69+
bitsTotal++;
70+
if (bits === 5) {
71+
var code = BASE32_CODES[hash_value];
72+
chars.push(code);
73+
bits = 0;
74+
hash_value = 0;
75+
}
76+
}
77+
return chars.join('');
78+
};
79+
return encodeGeohash(latitude, longitude);
80+
""";
81+
SELECT
82+
metro, site, geohash, value_tests
83+
FROM
84+
(
85+
SELECT
86+
MAX(REGEXP_EXTRACT(connection_spec.server_hostname, r"mlab[1-4].([a-z]{3})[0-9]{2}.*")) as metro,
87+
REGEXP_EXTRACT(connection_spec.server_hostname, r"mlab[1-4].([a-z]{3}[0-9]{2}).*") as site,
88+
COUNT(*) AS value_tests
89+
FROM
90+
`measurement-lab.base_tables.ndt`
91+
WHERE
92+
-- For faster queries we use _PARTITIONTIME boundaries. And, to
93+
-- guarantee the _PARTITIONTIME data is "complete" (all data collected
94+
-- and parsed) we should wait 36 hours after start of a given day.
95+
-- The following is equivalent to the pseudo code: date(now() - 12h) - 1d
96+
_PARTITIONTIME = TIMESTAMP_SUB(TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 12 HOUR), DAY), INTERVAL 24 HOUR)
97+
-- Basic test quality filters for safe division.
98+
AND web100_log_entry.snap.Duration > 0
99+
AND (web100_log_entry.snap.SndLimTimeRwin + web100_log_entry.snap.SndLimTimeCwnd + web100_log_entry.snap.SndLimTimeSnd) > 0
100+
AND web100_log_entry.snap.CountRTT > 0
101+
AND web100_log_entry.snap.HCThruOctetsReceived > 0
102+
AND web100_log_entry.snap.HCThruOctetsAcked > 0
103+
GROUP BY
104+
site
105+
HAVING
106+
value_tests > 10
107+
AND site IS NOT NULL
108+
109+
) JOIN (
110+
-- metro to geohash.
111+
SELECT
112+
REGEXP_EXTRACT(connection_spec.server_hostname, r"mlab[1-4].([a-z]{3})[0-9]{2}.*") as metro,
113+
MIN(EncodeGeoHASH(connection_spec.server_geolocation.latitude, connection_spec.server_geolocation.longitude, 5)) AS geohash
114+
FROM
115+
`measurement-lab.base_tables.ndt`
116+
WHERE
117+
-- For faster queries we use _PARTITIONTIME boundaries. And, to
118+
-- guarantee the _PARTITIONTIME data is "complete" (all data collected
119+
-- and parsed) we should wait 36 hours after start of a given day.
120+
-- The following is equivalent to the pseudo code: date(now() - 12h) - 1d
121+
_PARTITIONTIME = TIMESTAMP_SUB(TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 12 HOUR), DAY), INTERVAL 24 HOUR)
122+
GROUP BY
123+
metro
124+
HAVING
125+
geohash IS NOT NULL
126+
127+
) USING (metro)

config/federation/bigquery/bq_ndt_worldmap.sql renamed to config/federation/bigquery/bq_ndt_worldmap_server.sql

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22

33
SELECT
44
machine,
5-
APPROX_QUANTILES(IF(direction = "s2c", download, NULL), 4)[OFFSET(2)] AS value_download_median_rate,
6-
APPROX_QUANTILES(IF(direction = "c2s", upload, NULL), 4)[OFFSET(2)] AS value_upload_median_rate,
7-
FORMAT("s%03dx%03d", latitude + 180, longitude + 180) as position,
5+
REGEXP_EXTRACT(machine, "mlab[1-4].([a-z]{3}[0-9]{2}).*") as site,
6+
REGEXP_EXTRACT(machine, "mlab[1-4].([a-z]{3})[0-9]{2}.*") as metro,
87
COUNT(*) AS value_tests
98
FROM (
109
SELECT
@@ -16,22 +15,7 @@ FROM (
1615
WHEN 0 THEN "c2s"
1716
WHEN 1 THEN "s2c"
1817
ELSE "error"
19-
END AS direction,
20-
21-
-- Download as bits-per-second
22-
8 * 1000000 * (web100_log_entry.snap.HCThruOctetsAcked /
23-
(web100_log_entry.snap.SndLimTimeRwin +
24-
web100_log_entry.snap.SndLimTimeCwnd +
25-
web100_log_entry.snap.SndLimTimeSnd)) AS download,
26-
27-
-- Upload as bits-per-second
28-
8 * 1000000 * (web100_log_entry.snap.HCThruOctetsReceived /
29-
web100_log_entry.snap.Duration) AS upload,
30-
31-
-- Client latitude, rounded to 5 degrees.
32-
CAST(connection_spec.client_geolocation.latitude / 3.0 as INT64) * 3 as latitude,
33-
-- Client longitude, rounded to 5 degrees.
34-
CAST(connection_spec.client_geolocation.longitude / 3.0 as INT64) * 3 as longitude
18+
END AS direction
3519

3620
FROM
3721
`measurement-lab.base_tables.ndt`
@@ -51,11 +35,6 @@ FROM (
5135
AND web100_log_entry.snap.HCThruOctetsAcked > 0
5236
)
5337
GROUP BY
54-
machine, latitude, longitude, position
55-
HAVING
56-
value_tests > 10
57-
AND value_download_median_rate is not NULL
58-
AND value_upload_median_rate is not NULL
59-
AND position is not NULL
38+
machine
6039
ORDER BY
6140
machine

k8s/prometheus-federation/deployments/bigquery-exporter.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ spec:
2020
args: [ "--project={{GCLOUD_PROJECT}}",
2121
"--type=gauge", "--query=/queries/bq_ndt_tests.sql",
2222
"--type=gauge", "--query=/queries/bq_ipv6_bias.sql",
23-
"--type=gauge", "--query=/queries/bq_ndt_worldmap.sql",
23+
"--type=gauge", "--query=/queries/bq_ndt_worldmap_server.sql",
24+
"--type=gauge", "--query=/queries/bq_ndt_geohash_client.sql",
2425
"--type=gauge", "--query=/queries/bq_ndt_server.sql" ]
2526
ports:
2627
- containerPort: 9050

0 commit comments

Comments
 (0)