-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathsearch.py
145 lines (125 loc) · 6.72 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""Search example
Use case: "What data does Gro have available for x?"
This example shows how to query the API for sources matching a search term and
discover what data series are available for a selected source.
Note that the search functions work with any of the following entity types:
- metrics
- items
- regions
- sources (shown here)
See Also
--------
groclient.GroClient.search()
groclient.GroClient.search_and_lookup()
groclient.GroClient.get_data_series()
https://github.com/gro-intelligence/api-client/wiki/Entities-Definition
https://github.com/gro-intelligence/api-client/wiki/Data-Series-Definition
"""
import os
from groclient import GroClient
API_HOST = "api.gro-intelligence.com"
ACCESS_TOKEN = os.environ["GROAPI_TOKEN"]
def main():
client = GroClient(API_HOST, ACCESS_TOKEN)
# ===================
# | client.search() |
# ===================
# Returns a list of ids, ordered by relevance to your given search term
# Note that you can search across metrics, items, regions, and sources.
print("client.search()")
print(client.search("metrics", "Exports")[0]) # { 'id': 125 }
print(client.search("items", "Wheat")[0]) # { 'id': 95 }
print(client.search("regions", "India")[0]) # { 'id': 1094 }
print(client.search("sources", "USDA NASS")[0]) # { 'id': 29 }
# ==============================
# | client.search_and_lookup() |
# ==============================
# Helper function to use the client.lookup() function on each search result
# and see more details about the result.
# Returns a generator, which yields one search result at a time. Use the
# next() method to get the first result:
print("\nclient.search_and_lookup()")
print(next(client.search_and_lookup("metrics", "Export Value")))
# {'id': 10000, 'contains': [10065, 11078], 'name': 'Export Value',
# 'definition': 'The value of exports, or goods that have been sent to a \
# foreign country for sale. Data is mostly reported as free-on-board, \
# which includes the cost of delivering the goods to a designated \
# delivery vessel; exports of a good may not necessarily equal imports \
# for the partner region, since imports and exports are measured \
# differently by different governments.'}
print(next(client.search_and_lookup("items", "Wheat")))
# {'id': 95, 'contains': [3595, 5772], 'name': 'Wheat',
# 'definition': "Cereals within the genus <i>Triticum</i>, which is one \
# of the world's most popular and widely cultivated grain crops. Data \
# primarily covers common and durum wheat, as well as spelt."}
print(next(client.search_and_lookup("regions", "India")))
# {'id': 1094, 'contains': [11187, 11190, 11174, 11197, 11188, 11200,
# 11204, 11186, 11180, 11177, 11207, 11201, 11173, 11178, 11195, 11194,
# 11183, 11199, 11203, 11202, 11193, 11181, 13475, 11196, 11185, 11175,
# 11198, 11192, 11179, 11191, 11189, 11176, 11182, 11205, 11184, 11206],
# 'name': 'India', 'level': 3, 'latitude': 22.8838, 'longitude': 79.6201}
print(next(client.search_and_lookup("sources", "USDA NASS")))
# {'id': 29, 'name': 'USDA NASS Animals', 'longName': 'USDA National \
# Agricultural Statistics Database', 'metaType': 'data_series',
# 'sourceLag': {'annual': '4m15d', 'weekly': '4d', 'monthly': '1m10d'},
# 'historicalStartDate': '1866-12-01T00:00:00.000Z',
# 'description': 'The National Agricultural Statistics Service is an arm \
# of the USDA and one of its primary intelligence- and data-gathering \
# units. The database provides updates almost daily on livestock, crops, \
# demographics, economics, and environmental indicators. Metrics covered \
# include production, yield, area harvested, price, inputs, stocks, etc. \
# The granularity is mostly internal US data and goes back as far as \
# 1850.', 'resolution': 'District', 'regionalCoverage': 'United States',
# 'language': 'English', 'fileFormat': 'CSV'}
# ==========================
# | client.get_data_series |
# ==========================
# Once you have identified one or more entities of interest, you can see
# what data series are available for those entities using the
# client.get_data_series() function.
# The normal process of data discovery using the API would be to look up
# items and/or regions of interest first. i.e. if you know you are
# interested in United States Corn data. Then you can see what metrics are
# available for that item and region: production, exports, prices, etc.
# For example:
print("\nclient.get_data_series() Part 1: Search by item/region")
# First look up the item/region of interest as seen in the above examples.
# We just need the id number, so we will use search(). search_and_lookup()
# would also work.
corn = client.search("items", "corn")[0]
united_states = client.search("regions", "united states")[0]
# Now we can use client.get_data_series() to see what data series exist:
data_series_list = client.get_data_series(
**{"item_id": corn["id"], "region_id": united_states["id"]}
)
print("There are", len(data_series_list), "different US Corn data series")
unique_metrics = set(data_series["metric_name"] for data_series in data_series_list)
print("Unique metrics:", len(unique_metrics))
unique_sources = set(data_series["source_name"] for data_series in data_series_list)
print("Unique sources:", len(unique_sources))
# If you are interested in a particular source, you can also start there
# and see what data series exist for it. One frequently asked question is
# how to see what items/regions Gro publishes yield models for. Here is how
# one would find out programmatically:
print("\nclient.get_data_series() Part 2: search by source")
# Gro publishes its own yield model values under the "Gro Yield Model"
# source, which is treated as its own source just like any other, and you
# can find it in the same manner:
gro_yield_model = client.search("sources", "Gro Yield Model")[0]
# Now we can use client.get_data_series() to see what data series exist
# under that source.
data_series_list = client.get_data_series(**{"source_id": gro_yield_model["id"]})
print("There are", len(data_series_list), "different Gro Yield Model data series")
# There are thousands of data series in data_series_list since there are
# many different regions. Let's just check the unique items there are yield
# models for:
unique_items = set(data_series["item_name"] for data_series in data_series_list)
for item in unique_items:
print(item)
# Winter wheat
# Soybeans
# Hard red winter wheat
# Corn
# Wheat
if __name__ == "__main__":
main()