|
4 | 4 | from json import JSONDecodeError
|
5 | 5 |
|
6 | 6 | import backoff
|
| 7 | +import pandas as pd |
7 | 8 | import requests
|
8 | 9 | from ratelimit import limits, RateLimitException
|
9 | 10 | from .general import _post_request_function
|
@@ -88,6 +89,26 @@ class PlacekeyAPI:
|
88 | 89 |
|
89 | 90 | DEFAULT_QUERY_ID_PREFIX = "place_"
|
90 | 91 |
|
| 92 | + MIN_INPUTS = [ |
| 93 | + ['latitude', 'longitude'], |
| 94 | + ['street_address', 'city', 'region', 'postal_code'], |
| 95 | + ['street_address', 'region', 'postal_code'], |
| 96 | + ['street_address', 'region', 'city'], |
| 97 | + ] |
| 98 | + |
| 99 | + PLACEKEY_OUTPUTS = { |
| 100 | + "placekey", |
| 101 | + "address_placekey", |
| 102 | + "building_placekey", |
| 103 | + "gers", |
| 104 | + "confidence_score", |
| 105 | + "upi", |
| 106 | + "geoid", |
| 107 | + "parcel", |
| 108 | + "gers", |
| 109 | + "address_confidence_score" |
| 110 | + } |
| 111 | + |
91 | 112 | def __init__(self, api_key=None, max_retries=DEFAULT_MAX_RETRIES, logger=log,
|
92 | 113 | user_agent_comment=None):
|
93 | 114 | self.api_key = api_key
|
@@ -120,6 +141,79 @@ def __init__(self, api_key=None, max_retries=DEFAULT_MAX_RETRIES, logger=log,
|
120 | 141 | calls=self.BULK_REQUEST_LIMIT,
|
121 | 142 | period=self.BULK_REQUEST_WINDOW,
|
122 | 143 | max_tries=self.max_retries)
|
| 144 | + |
| 145 | + def _has_minimum_inputs(self, user_inputs: set[str]) -> bool: |
| 146 | + for inputs in self.MIN_INPUTS: |
| 147 | + hasRequiredInputs = True |
| 148 | + for key in inputs: |
| 149 | + if key not in user_inputs: |
| 150 | + hasRequiredInputs = False |
| 151 | + break |
| 152 | + if hasRequiredInputs: |
| 153 | + return True |
| 154 | + return False |
| 155 | + |
| 156 | + def _join_pandas_df(self, df1: pd.DataFrame, column_mapping_1: dict, df2: pd.DataFrame, column_mapping_2: dict, how: str = 'inner', on: str = "placekey", fields=None, batch_size=MAX_BATCH_SIZE, verbose=False): |
| 157 | + fields = [on] if fields is None else fields + [on] |
| 158 | + if on not in df1: |
| 159 | + if on in self.PLACEKEY_OUTPUTS: |
| 160 | + df1 = self._placekey_pandas_df(df1, column_mapping=column_mapping_1, fields=fields, batch_size=batch_size, verbose=verbose, return_original_values=True) |
| 161 | + else: |
| 162 | + raise ValueError("The first dataset does not contain the join key {}".format(on)) |
| 163 | + if on not in df2: |
| 164 | + if on in self.PLACEKEY_OUTPUTS: |
| 165 | + df2 = self._placekey_pandas_df(df2, column_mapping=column_mapping_2, fields=fields, batch_size=batch_size, verbose=verbose, return_original_values=True) |
| 166 | + else: |
| 167 | + raise ValueError("The second dataset does not contain the join key {}".format(on)) |
| 168 | + |
| 169 | + return pd.merge(df1, df2, how=how, on=on) |
| 170 | + |
| 171 | + |
| 172 | + def _placekey_pandas_df(self, df: pd.DataFrame, column_mapping: dict, fields=None, batch_size=MAX_BATCH_SIZE, verbose=False, return_original_values=True): |
| 173 | + """ |
| 174 | + Takes a DataFrame and a list of column names that map to placekey input fields and returns a placekey'd pandas dataframe. |
| 175 | +
|
| 176 | + Args: |
| 177 | + :param df (pd.DataFrame): The input DataFrame. |
| 178 | + :param column_mapping (dict): List of column names to map as inputs to the method. |
| 179 | + :param fields: A list of requested parameters other than placekey. For example: address_placekey, building_placekey |
| 180 | + Defaults to None |
| 181 | + :param batch_size: Integer for the number of places to lookup in a single batch. |
| 182 | + Defaults to 100, and cannot exceeded 100. |
| 183 | + :param verbose: Boolean for whether or not to log additional information. |
| 184 | + Defaults to False |
| 185 | +
|
| 186 | + Returns: |
| 187 | + - pd.DataFrame: The updated DataFrame with new rows for placekey outputs |
| 188 | + """ |
| 189 | + if not self._validate_query(column_mapping): |
| 190 | + raise ValueError( |
| 191 | + "Some queries contain keys other than: {}".format(self.QUERY_PARAMETERS)) |
| 192 | + |
| 193 | + if not self._has_minimum_inputs(column_mapping.keys()): |
| 194 | + raise ValueError( |
| 195 | + "The inputted DataFrame doesn't have enough information. Refer to minimum inputs documentation here: https://docs.placekey.io/documentation/placekey-api/input-parameters/minimum-inputs") |
| 196 | + |
| 197 | + temp_query_id = 'temp_query_id' |
| 198 | + df[temp_query_id] = '' |
| 199 | + places = [] |
| 200 | + for i, row in df.iterrows(): |
| 201 | + place = {} |
| 202 | + for place_key, column_name in column_mapping.items(): |
| 203 | + if column_name in df.columns and pd.notna(row[column_name]): |
| 204 | + place[place_key] = row[column_name] |
| 205 | + query_id = self.DEFAULT_QUERY_ID_PREFIX + str(i) |
| 206 | + place['query_id'] = query_id |
| 207 | + df.at[i, temp_query_id] = query_id |
| 208 | + places.append(place) |
| 209 | + result = self.lookup_placekeys(places=places, fields=fields, batch_size=batch_size, verbose=verbose) |
| 210 | + result_df = pd.DataFrame(result).rename(columns={"query_id": temp_query_id}) |
| 211 | + |
| 212 | + if not return_original_values: |
| 213 | + return result_df |
| 214 | + merged_df = pd.merge(df, result_df, how='inner', on=temp_query_id).drop([temp_query_id], axis=1) |
| 215 | + return merged_df |
| 216 | + |
123 | 217 |
|
124 | 218 | def lookup_placekey(self,
|
125 | 219 | fields=None,
|
|
0 commit comments