Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions data/DEV_load_canadian_renewables_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import marimo

__generated_with = "0.17.8"
app = marimo.App(width="full")


@app.cell
def _():
import marimo as mo
import numpy as np
import pandas as pd
import pdfplumber
import requests
from io import BytesIO
return BytesIO, mo, np, pd, pdfplumber, requests


@app.function
def levenshtein_distance(s1, s2):
m, n = len(s1), len(s2)
dp = [[0] * (n + 1) for _ in range(m + 1)]

for i in range(m + 1):
dp[i][0] = i
for j in range(n + 1):
dp[0][j] = j

for i in range(1, m + 1):
for j in range(1, n + 1):
cost = 0 if s1[i - 1] == s2[j - 1] else 1
dp[i][j] = min(dp[i - 1][j] + 1, # Deletion
dp[i][j - 1] + 1, # Insertion
dp[i - 1][j - 1] + cost) # Substitution

return dp[m][n]


@app.cell
def _(BytesIO, np, pd, pdfplumber, requests):
def load_canadian_renewables_data(pdf_location="https://renewablesassociation.ca/wp-content/uploads/2025/01/New-Project-List.pdf"):
tech_labels = ['Wind', 'Solar', 'Energy Storage', "Solar-Storage", "Wind-Storage"]
provinces = ['NL', 'PE' ,'NS', 'NB', 'QC', 'ON', 'MB', 'SK', 'AB', 'BC', 'YT', 'NT', 'NU']
pdf_response = requests.get(pdf_location)
def my_float(_x):
try:
_o = float(_x)
except ValueError:
_o = np.nan
return _o
def my_int(_x):
try:
_o = int(_x)
except ValueError:
_o = np.nan
return _o
all_tables = []
with BytesIO(pdf_response.content) as pdf_file:
with pdfplumber.open(pdf_file) as pdf:
for _page in pdf.pages:
_tables = _page.extract_tables()
all_tables.append(_tables)
columns = all_tables[0][0][0]
data = pd.DataFrame(columns=columns)
_ix = 0
for _p in range(len(all_tables)):
for _r in range(len(all_tables[_p][0])):
if _p == 0 and _r == 0:
continue
else:
_v = all_tables[_p][0][_r]
_label = _v[1]
_distances = [levenshtein_distance(_label, _t) for _t in tech_labels]
_new_label = tech_labels[np.argmin(_distances)]
_province = _v[2]
_distances = [levenshtein_distance(_province, _t) for _t in provinces]
_new_p = provinces[np.argmin(_distances)]
data.loc[_ix] = [_v[0], _new_label, _new_p, my_int(_v[3]), my_float(_v[4]),
my_float(_v[5]), my_float(_v[6]),
my_float(_v[7]), _v[8]]
_ix += 1
return data
return (load_canadian_renewables_data,)


@app.cell
def _(load_canadian_renewables_data):
canadian_renewables = load_canadian_renewables_data()
return (canadian_renewables,)


@app.cell
def _(canadian_renewables):
canadian_renewables
return


@app.cell
def _():
pdf_location = "https://renewablesassociation.ca/wp-content/uploads/2025/01/New-Project-List.pdf"
return (pdf_location,)


@app.cell
def _(pdf_location, requests):
pdf_response = requests.get(pdf_location)
return (pdf_response,)


@app.cell
def _(BytesIO, np, pd, pdf_response, pdfplumber):
def my_float(_x):
try:
_o = float(_x)
except ValueError:
_o = np.nan
return _o
def my_int(_x):
try:
_o = int(_x)
except ValueError:
_o = np.nan
return _o
all_tables = []
with BytesIO(pdf_response.content) as pdf_file:
with pdfplumber.open(pdf_file) as pdf:
for _page in pdf.pages:
_tables = _page.extract_tables()
all_tables.append(_tables)
columns = all_tables[0][0][0]
data = pd.DataFrame(columns=columns)
_ix = 0
for _p in range(len(all_tables)):
for _r in range(len(all_tables[_p][0])):
if _p == 0 and _r == 0:
continue
else:
_v = all_tables[_p][0][_r]
data.loc[_ix] = [_v[0], _v[1], _v[2], my_int(_v[3]), my_float(_v[4]),
my_float(_v[5]), my_float(_v[6]),
my_float(_v[7]), _v[8]]
_ix += 1
return (data,)


@app.cell
def _():
return


@app.cell
def _(data):
data
return


@app.cell
def _(data):
data.groupby('Technology')['Project Name'].count()
return


@app.cell
def _(data):
techs = list(set(data['Technology']))
techs
return (techs,)


@app.cell
def _(mo, np, techs):
targets = ['Wind', 'Solar', 'Energy Storage', "Solar-Storage", "Wind-Storage"]
test = techs[10]
distances = [levenshtein_distance(test, _t) for _t in targets]
_text = f"""
- test string: {test}
- closest match: {targets[np.argmin(distances)]}"""
mo.md(_text)
return


if __name__ == "__main__":
app.run()
5 changes: 2 additions & 3 deletions data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ To update the GLM file, run the Makefile.

# Description

The original WECC 240 model files are in the `wecc240` subdirectory, and the main file is `wecc240/240busWECC_2018_PSS.raw`. The bus information is extracted in `wecc240_gis.csv`. The file `nodes.csv` contains all the reduced set of nodes with unique locations, as many of the nodes in the original model are co-located at the same physical location (see `nodes.py` for node reduction methodology).

# Validation

To review the load model, run the following marimo app:
Expand All @@ -13,9 +15,6 @@ To review the load model, run the following marimo app:
Bob is the subject matter expert who can tell whether your solution is any good
just by looking at it.

# Notes

The `nodes.csv` contains a list of all the WECC 240 bus model locations with duplicate locations removed (see `nodes.py` for node reduction methodology).

# Data Sources
1. `powerplants.csv`: https://hifld-geoplatform.hub.arcgis.com/datasets/9dd630378fcf439999094a56c352670d_0/explore
Expand Down
182 changes: 182 additions & 0 deletions data/WECC_DG_solar_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import marimo

__generated_with = "0.17.8"
app = marimo.App(width="full")


@app.cell
def _():
import marimo as mo
import pandas as pd
import numpy as np
return np, pd


@app.cell
def _():
short_to_long = {
'NM': 'New Mexico',
'CA': 'California',
'WY': 'Wyoming',
'OR': 'Oregon',
'UT': 'Utah',
'WA': 'Washington',
'AZ': 'Arizona',
'NV': 'Nevada',
'ID': 'Idaho',
'MT': 'Montana',
'CO': 'Colorado'
}
long_to_short = {value: key for key, value in short_to_long.items()}
return (long_to_short,)


@app.cell
def _():
import requests
import geopandas as gpd
from shapely.geometry import Point
import json
import time

# Load US states shapefile. This is a sample URL; you can find others or download and point to your local file.
STATES_URL = "https://raw.githubusercontent.com/PublicaMundi/MappingAPI/refs/heads/master/data/geojson/us-states.json"

# Load GeoDataFrame with US states
states = gpd.read_file(STATES_URL)

def get_location(lat, lon):
point = Point(lon, lat) # Note: (lon, lat) order is used for Point
state_found = states[states['geometry'].contains(point)]

if not state_found.empty:
# If a state is found, return the name of the state
return state_found.iloc[0]['name'] # Adjust this key based on the GeoJSON properties
else:
# If no state is found, call an API to get the country
return get_country(lat, lon)

def get_country(lat, lon):
# Using a free API for country lookup
response = requests.get(f'http://geocode.xyz/{lat},{lon}?json=1')
if response.status_code == 200:
data = response.content
response_json = json.loads(response.content.decode('utf-8'))
provence = response_json['prov']
if provence == 'Throttled! See geocode.xyz/pricing':
# oops, wait a second for free API to unlock...
time.sleep(1)
response = requests.get(f'http://geocode.xyz/{lat},{lon}?json=1')
response_json = json.loads(response.content.decode('utf-8'))
provence = response_json['prov']
if provence == 'MX':
country = 'Mexico'
elif provence == 'CA':
country = 'Canada'
elif provence == 'US':
# If in the US, return the state
country = response_json['statename']
else:
country = provence
return country
else:
return 'Unable to find country'
return (get_location,)


@app.cell
def _(get_location):
## Test State/Country lookup
# lat = 51.5074 # Latitude for London
# lon = -0.1278 # Longitude for London
lat = 47.6321 # Spokane, WA
lon = -117.478965 # Spokane, WA
# lat = 12.3548 # Hanoi, Vietnam
# lon = 108.4654 # Hanoi, Vietnam
location = get_location(lat, lon)
print(location)
return


@app.cell
def _(pd):
wecc_dg_data = pd.read_csv('wecc_dg_solar.csv')
# numbers have commas which causes data to be read as strings instead of floats
for _col in wecc_dg_data.columns:
if _col not in ['State', 'Data Status']:
try:
wecc_dg_data[_col] = wecc_dg_data[_col].str.replace(',', '').astype(float)
except AttributeError:
pass
return (wecc_dg_data,)


@app.cell
def _(wecc_dg_data):
wecc_dg_data
return


@app.cell
def _(get_location, np, pd):
wecc_bus_summary = pd.read_csv('test_wecc240_2020m_gis.csv')
wecc_bus_summary['state'] = [get_location(_row['LAT'], _row['LON']) for _, _row in wecc_bus_summary.iterrows()]
wecc_bus_summary['load_fraction'] = 0.0
grouped = wecc_bus_summary[wecc_bus_summary['LOAD'] > 0].groupby('state')
for _state in set(wecc_bus_summary['state']):
load_frac = grouped.get_group(_state)['LOAD'] / np.sum(grouped.get_group(_state)['LOAD'])
wecc_bus_summary.loc[grouped.get_group(_state).index, 'load_fraction'] = load_frac
return (wecc_bus_summary,)


@app.cell
def _(wecc_bus_summary):
wecc_bus_summary
return


@app.cell
def _(long_to_short, pd, wecc_bus_summary, wecc_dg_data):
dataframe_list = []
grouped_dg = wecc_dg_data.groupby('State')
for _ix, _row in wecc_bus_summary.iterrows():
include = _row['load_fraction'] > 0 and _row['state'] not in ['Canada', 'Mexico']
if include:
new_df = pd.DataFrame(columns=['Year', 'Month', 'State', 'bus_id', 'bus_name', 'geohash', 'lat', 'lon', 'Capacity [MW]', 'Generation [MWh]'])
for _ix2, _row2 in grouped_dg.get_group(long_to_short[_row['state']]).iterrows():
_e = [_row2['Year'], _row2['Month'], _row2['State'], _row['BUS_I'], _row['NAME'],
_row['GEOHASH'], _row['LAT'], _row['LON'],
float(_row['load_fraction']) * float(_row2['Total Capacity (MW)']),
float(_row['load_fraction']) * float(_row2['Total Generation (MWh)'])]
new_df.loc[_ix2] = _e
dataframe_list.append(new_df)
return (dataframe_list,)


@app.cell
def _(dataframe_list):
dataframe_list
return


@app.cell
def _(dataframe_list, np, pd):
wecc_bus_dg_cap_and_gen_by_month = pd.concat(dataframe_list)
wecc_bus_dg_cap_and_gen_by_month.index = np.arange(len(wecc_bus_dg_cap_and_gen_by_month))
return (wecc_bus_dg_cap_and_gen_by_month,)


@app.cell
def _(wecc_bus_dg_cap_and_gen_by_month):
wecc_bus_dg_cap_and_gen_by_month
return


@app.cell
def _(wecc_bus_dg_cap_and_gen_by_month):
wecc_bus_dg_cap_and_gen_by_month.to_csv('wecc_bus_dg_cap_and_gen_by_month.csv')
return


if __name__ == "__main__":
app.run()
Binary file added data/WECC_node_generation_mix_estimation.pptx
Binary file not shown.
Loading
Loading