|
| 1 | +import numpy as np |
| 2 | + |
| 3 | + |
| 4 | +def calculate_approximated_historical(df, last_year_with_smhi_data, current_year): |
| 5 | + """ |
| 6 | + Calculate approximated historical data values for years passed since the last year with SMHI data. |
| 7 | + This is done by interpolation using previously calculated linear trend coefficients. |
| 8 | +
|
| 9 | + Args: |
| 10 | + df (pandas.DataFrame): The input DataFrame containing the data. |
| 11 | + last_year_with_smhi_data (int): The last year with SMHI data. |
| 12 | + current_year (int): The current year. |
| 13 | +
|
| 14 | + Returns: |
| 15 | + pandas.DataFrame: The DataFrame with the approximated historical data values added. |
| 16 | +
|
| 17 | + """ |
| 18 | + |
| 19 | + # Get the years passed since last year with SMHI data (including current year) |
| 20 | + approximated_years = range(last_year_with_smhi_data+1, current_year+1) |
| 21 | + |
| 22 | + temp = [] # temporary list that we will append to |
| 23 | + df = df.sort_values('Kommun', ascending=True) |
| 24 | + for i in range(len(df)): |
| 25 | + # We'll store the approximated values for each municipality |
| 26 | + # in a dictionary where the keys are the years |
| 27 | + approximated_data_dict = {} |
| 28 | + |
| 29 | + if list(approximated_years): # only fill dict if approximation is needed |
| 30 | + # Add the latest recorded datapoint to the dict |
| 31 | + # The rest of the years will be added below |
| 32 | + approximated_data_dict = {last_year_with_smhi_data: |
| 33 | + df.iloc[i][last_year_with_smhi_data]} |
| 34 | + # Get trend coefficients |
| 35 | + fit = df.iloc[i]['trendCoefficients'] |
| 36 | + |
| 37 | + for year in approximated_years: |
| 38 | + # Add the approximated value for each year using the trend line coefficients |
| 39 | + # Max function so we don't get negative values |
| 40 | + approximated_data_dict[year] = max(0, fit[0]*year+fit[1]) |
| 41 | + |
| 42 | + temp.append(approximated_data_dict) |
| 43 | + |
| 44 | + df['approximatedHistorical'] = temp |
| 45 | + |
| 46 | + temp = [ |
| 47 | + np.trapz( |
| 48 | + list(df.iloc[i]['approximatedHistorical'].values()), |
| 49 | + list(df.iloc[i]['approximatedHistorical'].keys()), |
| 50 | + ) |
| 51 | + for i in range(len(df)) |
| 52 | + ] |
| 53 | + df['totalApproximatedHistorical'] = temp |
| 54 | + |
| 55 | + return df |
0 commit comments