Skip to content

Commit

Permalink
Better query id handling (#4)
Browse files Browse the repository at this point in the history
* Update query_id handling to prevent some edge cases

* Update docs

* Update version number
rmtrmt authored Nov 19, 2020

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 785fdb0 commit 499553e
Showing 6 changed files with 63 additions and 40 deletions.
58 changes: 35 additions & 23 deletions docs/_modules/placekey/api.html
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ <h1>Source code for placekey.api</h1><div class="highlight"><pre>
<span class="n">logging</span><span class="o">.</span><span class="n">Formatter</span><span class="p">(</span><span class="s1">&#39;</span><span class="si">%(asctime)s</span><span class="se">\t</span><span class="si">%(levelname)s</span><span class="se">\t</span><span class="si">%(message)s</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">log</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span>
<span class="n">log</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
<span class="n">log</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">ERROR</span><span class="p">)</span>
<span class="n">log</span><span class="o">.</span><span class="n">handlers</span> <span class="o">=</span> <span class="p">[</span><span class="n">console_log</span><span class="p">]</span>


@@ -104,6 +104,8 @@ <h1>Source code for placekey.api</h1><div class="highlight"><pre>
<span class="s1">&#39;query_id&#39;</span>
<span class="p">}</span>

<span class="n">DEFAULT_QUERY_ID_PREFIX</span> <span class="o">=</span> <span class="s2">&quot;place_&quot;</span>

<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">max_retries</span><span class="o">=</span><span class="n">DEFAULT_MAX_RETRIES</span><span class="p">,</span> <span class="n">logger</span><span class="o">=</span><span class="n">log</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">api_key</span> <span class="o">=</span> <span class="n">api_key</span>
<span class="bp">self</span><span class="o">.</span><span class="n">max_retries</span> <span class="o">=</span> <span class="n">max_retries</span>
@@ -173,9 +175,14 @@ <h1>Source code for placekey.api</h1><div class="highlight"><pre>
<span class="sd"> Lookup Placekeys for an iterable of places specified by place dictionaries.</span>
<span class="sd"> This method checks that the place dictionaries are valid before querying</span>
<span class="sd"> the API, and it will return partial results if it encounters a fatal error.</span>
<span class="sd"> This method follows the rate limits of the Placekey API. This function is a</span>
<span class="sd"> wrapper for `lookup_batch`, and that function may be used if different error</span>
<span class="sd"> handling or logic around batch processing is desired.</span>
<span class="sd"> Places without a `query_id` will have one generated for them based on their</span>
<span class="sd"> index in `places`, e.g., &quot;place_0&quot; for the first item in the list, but a</span>
<span class="sd"> user-provided `query_id` will be passed through as is.</span>

<span class="sd"> This function is a wrapper for `lookup_batch`, and that function may be</span>
<span class="sd"> used if different error handling or logic around batch processing is desired.</span>

<span class="sd"> This method follows the rate limits of the Placekey API.</span>

<span class="sd"> :param places: An iterable of of place dictionaries.</span>
<span class="sd"> :param strict_address_match: Boolean for whether or not to strict match</span>
@@ -197,9 +204,22 @@ <h1>Source code for placekey.api</h1><div class="highlight"><pre>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Some queries contain keys other than: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">QUERY_PARAMETERS</span><span class="p">))</span>

<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s1">&#39;backoff&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">ERROR</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s1">&#39;backoff&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">ERROR</span><span class="p">)</span>

<span class="c1"># Add a query_id to each place that doesn&#39;t have one</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">place</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">places</span><span class="p">):</span>
<span class="k">if</span> <span class="s1">&#39;query_id&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">place</span><span class="p">:</span>
<span class="n">place</span><span class="p">[</span><span class="s1">&#39;query_id&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DEFAULT_QUERY_ID_PREFIX</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>

<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">places</span><span class="p">),</span> <span class="n">batch_size</span><span class="p">):</span>
<span class="n">max_batch_idx</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">places</span><span class="p">))</span>
<span class="n">batch_query_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span><span class="p">[</span><span class="s1">&#39;query_id&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">places</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">max_batch_idx</span><span class="p">]]</span>

<span class="k">try</span><span class="p">:</span>
<span class="n">res</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lookup_batch</span><span class="p">(</span>
@@ -208,41 +228,33 @@ <h1>Source code for placekey.api</h1><div class="highlight"><pre>
<span class="n">strict_name_match</span><span class="o">=</span><span class="n">strict_name_match</span>
<span class="p">)</span>
<span class="k">except</span> <span class="n">RateLimitException</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span>
<span class="s1">&#39;Fatal error encountered. Returning processed items.&#39;</span><span class="p">)</span>
<span class="k">break</span>

<span class="c1"># Catch case where all queries in batch having an error,</span>
<span class="c1"># and generate rows for individual items.</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">res</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="s1">&#39;error&#39;</span> <span class="ow">in</span> <span class="n">res</span><span class="p">:</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;All queries in batch (</span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">) had errors&#39;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">max_batch_idx</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;All queries in batch (</span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">) had errors&#39;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">max_batch_idx</span><span class="p">)</span>

<span class="n">res</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;query_id&#39;</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">),</span> <span class="s1">&#39;error&#39;</span><span class="p">:</span> <span class="n">res</span><span class="p">[</span><span class="s1">&#39;error&#39;</span><span class="p">]}</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">max_batch_idx</span><span class="p">)]</span>
<span class="n">res</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;query_id&#39;</span><span class="p">:</span> <span class="n">query_id</span><span class="p">,</span> <span class="s1">&#39;error&#39;</span><span class="p">:</span> <span class="n">res</span><span class="p">[</span><span class="s1">&#39;error&#39;</span><span class="p">]}</span>
<span class="k">for</span> <span class="n">query_id</span> <span class="ow">in</span> <span class="n">batch_query_ids</span><span class="p">]</span>

<span class="c1"># Catch other server-side errors</span>
<span class="k">elif</span> <span class="s1">&#39;message&#39;</span> <span class="ow">in</span> <span class="n">res</span><span class="p">:</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="s1">&#39;message&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">&#39;Returning completed queries&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="s1">&#39;message&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">&#39;Returning completed queries&#39;</span><span class="p">)</span>
<span class="k">break</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Remap the &#39;query_id&#39; field to match address index</span>
<span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">res</span><span class="p">:</span>
<span class="k">if</span> <span class="n">r</span><span class="p">[</span><span class="s1">&#39;query_id&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">isdigit</span><span class="p">():</span>
<span class="n">r</span><span class="p">[</span><span class="s1">&#39;query_id&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">r</span><span class="p">[</span><span class="s1">&#39;query_id&#39;</span><span class="p">])</span> <span class="o">+</span> <span class="n">i</span><span class="p">)</span>

<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">res</span><span class="p">)</span>

<span class="k">if</span> <span class="n">verbose</span> <span class="ow">and</span> <span class="n">max_batch_idx</span> <span class="o">%</span> <span class="p">(</span><span class="mi">10</span> <span class="o">*</span> <span class="n">batch_size</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Processed </span><span class="si">%s</span><span class="s1"> items&#39;</span><span class="p">,</span> <span class="n">max_batch_idx</span><span class="p">)</span>
<span class="k">if</span> <span class="n">max_batch_idx</span> <span class="o">%</span> <span class="p">(</span><span class="mi">10</span> <span class="o">*</span> <span class="n">batch_size</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Processed </span><span class="si">%s</span><span class="s1"> items&#39;</span><span class="p">,</span> <span class="n">max_batch_idx</span><span class="p">)</span>

<span class="n">result_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="o">.</span><span class="n">from_iterable</span><span class="p">(</span><span class="n">results</span><span class="p">))</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Processed </span><span class="si">%s</span><span class="s1"> items&#39;</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">result_list</span><span class="p">))</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Done&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Processed </span><span class="si">%s</span><span class="s1"> items&#39;</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">result_list</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Done&#39;</span><span class="p">)</span>

<span class="k">return</span> <span class="n">result_list</span></div>

11 changes: 7 additions & 4 deletions docs/placekey.html
Original file line number Diff line number Diff line change
@@ -46,7 +46,7 @@ <h1>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
<span id="placekey-api"></span><h2>placekey.api<a class="headerlink" href="#module-placekey.api" title="Permalink to this headline"></a></h2>
<dl class="py class">
<dt id="placekey.api.PlacekeyAPI">
<em class="property">class </em><code class="sig-prename descclassname">placekey.api.</code><code class="sig-name descname">PlacekeyAPI</code><span class="sig-paren">(</span><em class="sig-param">api_key=None</em>, <em class="sig-param">max_retries=20</em>, <em class="sig-param">logger=&lt;RootLogger root (INFO)&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/placekey/api.html#PlacekeyAPI"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#placekey.api.PlacekeyAPI" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">placekey.api.</code><code class="sig-name descname">PlacekeyAPI</code><span class="sig-paren">(</span><em class="sig-param">api_key=None</em>, <em class="sig-param">max_retries=20</em>, <em class="sig-param">logger=&lt;RootLogger root (ERROR)&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/placekey/api.html#PlacekeyAPI"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#placekey.api.PlacekeyAPI" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>PlacekeyAPI class</p>
<p>This class provides functionality for looking up Placekeys using the Placekey
@@ -128,9 +128,12 @@ <h1>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
<dd><p>Lookup Placekeys for an iterable of places specified by place dictionaries.
This method checks that the place dictionaries are valid before querying
the API, and it will return partial results if it encounters a fatal error.
This method follows the rate limits of the Placekey API. This function is a
wrapper for <cite>lookup_batch</cite>, and that function may be used if different error
handling or logic around batch processing is desired.</p>
Places without a <cite>query_id</cite> will have one generated for them based on their
index in <cite>places</cite>, e.g., “place_0” for the first item in the list, but a
user-provided <cite>query_id</cite> will be passed through as is.</p>
<p>This function is a wrapper for <cite>lookup_batch</cite>, and that function may be
used if different error handling or logic around batch processing is desired.</p>
<p>This method follows the rate limits of the Placekey API.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
2 changes: 1 addition & 1 deletion docs/searchindex.js
2 changes: 1 addition & 1 deletion placekey/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .placekey import *
__version__ = '0.0.7'
__version__ = '0.0.8'
__all__ = ['placekey', 'api']
28 changes: 18 additions & 10 deletions placekey/api.py
Original file line number Diff line number Diff line change
@@ -65,6 +65,8 @@ class PlacekeyAPI:
'query_id'
}

DEFAULT_QUERY_ID_PREFIX = "place_"

def __init__(self, api_key=None, max_retries=DEFAULT_MAX_RETRIES, logger=log):
self.api_key = api_key
self.max_retries = max_retries
@@ -134,9 +136,14 @@ def lookup_placekeys(self,
Lookup Placekeys for an iterable of places specified by place dictionaries.
This method checks that the place dictionaries are valid before querying
the API, and it will return partial results if it encounters a fatal error.
This method follows the rate limits of the Placekey API. This function is a
wrapper for `lookup_batch`, and that function may be used if different error
handling or logic around batch processing is desired.
Places without a `query_id` will have one generated for them based on their
index in `places`, e.g., "place_0" for the first item in the list, but a
user-provided `query_id` will be passed through as is.
This function is a wrapper for `lookup_batch`, and that function may be
used if different error handling or logic around batch processing is desired.
This method follows the rate limits of the Placekey API.
:param places: An iterable of of place dictionaries.
:param strict_address_match: Boolean for whether or not to strict match
@@ -165,9 +172,15 @@ def lookup_placekeys(self,
self.logger.setLevel(logging.ERROR)
logging.getLogger('backoff').setLevel(logging.ERROR)

# Add a query_id to each place that doesn't have one
for i, place in enumerate(places):
if 'query_id' not in place:
place['query_id'] = self.DEFAULT_QUERY_ID_PREFIX + str(i)

results = []
for i in range(0, len(places), batch_size):
max_batch_idx = min(i + batch_size, len(places))
batch_query_ids = [p['query_id'] for p in places[i:max_batch_idx]]

try:
res = self.lookup_batch(
@@ -186,19 +199,14 @@ def lookup_placekeys(self,
self.logger.info(
'All queries in batch (%s, %s) had errors', i, max_batch_idx)

res = [{'query_id': str(i), 'error': res['error']}
for i in range(i, max_batch_idx)]
res = [{'query_id': query_id, 'error': res['error']}
for query_id in batch_query_ids]

# Catch other server-side errors
elif 'message' in res:
self.logger.error(res['message'])
self.logger.error('Returning completed queries')
break
else:
# Remap the 'query_id' field to match address index
for r in res:
if r['query_id'].isdigit():
r['query_id'] = str(int(r['query_id']) + i)

results.append(res)

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

setuptools.setup(
name="placekey",
version="0.0.7",
version="0.0.8",
author="SafeGraph Inc.",
author_email="russ@safegraph.com",
description="Utilities for working with Placekeys",

0 comments on commit 499553e

Please sign in to comment.