Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import time
import os
from datetime import datetime, timedelta, timezone
from flask import Flask
from flask_cors import CORS
import threading
Expand All @@ -9,6 +12,9 @@
from blueprints.route_analysis_bp import route_analysis_blueprint
import mongo_client

from libs.wind_lib import download_rap_grib2, interpolate_uv_temp_at_flight_levels, get_date
from libs.weather_data_lock import weather_data, data_lock

PREFIX = '/api'


Expand All @@ -35,7 +41,47 @@ def _close_mongo_clients(response):
mongo_client.close_reader_mongo_client()
return response

def run_updater():
"""Run once at startup and then every hour at HH:05 UTC."""
while True:
try:
# Always run once immediately at startup
date_str, cycle_hour, forecast_hour, save_path = get_date()
download_rap_grib2(date_str, cycle_hour, forecast_hour, save_path)
interpolate_uv_temp_at_flight_levels(save_path)
processed_at = datetime.now(timezone.utc).isoformat()
new_state = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dictionary / structure should be a class so that its members could be accessed via attributes instead of via string keys

'String': date_str,
'Cycle': cycle_hour,
'Forecast Hour': forecast_hour,
'Processed At': processed_at
}
with data_lock:
weather_data['wx_state'] = new_state
weather_data['last_updated_utc'] = processed_at
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to store processed_at twice?

weather_data['grib_path'] = save_path

print(f"[Updater] Weather data updated {date_str} {cycle_hour}z f{forecast_hour}")

# schedule next run at the next HH:05 UTC
now = datetime.now(timezone.utc)
next_run = now.replace(minute=5, second=0, microsecond=0)
if now >= next_run:
next_run = next_run + timedelta(hours=1)
sleep_seconds = (next_run - now).total_seconds()
print(f"[Updater] Next run scheduled at {next_run.isoformat()} (in {int(sleep_seconds)}s)")
time.sleep(sleep_seconds)

except Exception as e:
print("Updater error:", e)
time.sleep(300) # retry in 5 min if failed


if __name__ == '__main__':
# Only start updater in the *main process*, not the reloader
if os.environ.get("WERKZEUG_RUN_MAIN") == "true":
updater_thread = threading.Thread(target=run_updater, daemon=True)
updater_thread.start()

app = create_app()
app.run(use_reloader=True)
app.run(use_reloader=True)
57 changes: 56 additions & 1 deletion blueprints/weather_bp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import re
from typing import Optional

import numpy as np
import requests
from lxml import etree
from flask import Blueprint, jsonify
from flask import Blueprint, jsonify, request
from libs.weather_data_lock import weather_data, data_lock

weather_blueprint = Blueprint('weather', __name__)

Expand Down Expand Up @@ -36,6 +38,59 @@ def _metar(airport):
metar_text = response.content.decode('utf-8')
return jsonify([metar_text])

@weather_blueprint.route('/winds')
def winds():
# query params
top_lat = float(request.args.get("toplat"))
top_lon = float(request.args.get("toplong"))
bottom_lat = float(request.args.get("bottomlat"))
bottom_lon = float(request.args.get("bottomlong"))
fl = int(request.args.get("fl", 300)) # default FL300
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it actually make sense to provide a default here? Or should we require that consumers specify what flight level they care about?


with data_lock:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to hold this lock the whole time?
As far as I can tell, we should only need to hold it for lines 51-53 right?

lats = weather_data["lats"]
lons = weather_data["lons"]
levels = weather_data["levels"]

if lats is None or fl not in levels:
return jsonify({"error": "No data loaded"}), 503

lat_min, lat_max = sorted([bottom_lat, top_lat])
lon_min, lon_max = sorted([bottom_lon, top_lon])

mask = ((lats >= lat_min) & (lats <= lat_max) &
(lons >= lon_min) & (lons <= lon_max))

# find indices inside mask
indices = np.where(mask)
if indices[0].size == 0:
return jsonify({"points": []})

# Limit to at most 15x15 samples regardless of zoom
MAX_DIM = 15
unique_rows = np.unique(indices[0])
unique_cols = np.unique(indices[1])

keep_rows = unique_rows[np.linspace(0, len(unique_rows) - 1, min(len(unique_rows), MAX_DIM), dtype=int)]
keep_cols = unique_cols[np.linspace(0, len(unique_cols) - 1, min(len(unique_cols), MAX_DIM), dtype=int)]

points = []
for i in keep_rows:
for j in keep_cols:
if not mask[i, j]:
continue
points.append({
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: It would be nice if this dictionary / structure was a class called Point.

"latitude": float(round(lats[i, j], 2)),
"longitude": float(round(lons[i, j], 2)),
"wind_speed_kt": int(levels[fl]["spd"][i, j]),
"wind_direction_deg_true": int(levels[fl]["dir"][i, j]),
"temperature_c": int(levels[fl]["temp"][i, j]),
})
with data_lock:
metadata = weather_data.get("wx_state")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not retrieve this at the same time as the lats, lons, and levels so as to prevent needing the lock multiple times?


return jsonify({"points": points, "metadata": metadata})


@weather_blueprint.route('/sigmets')
def _get_sigmets():
Expand Down
12 changes: 12 additions & 0 deletions libs/weather_data_lock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import threading

# thread-safe global storage
data_lock = threading.Lock()
weather_data = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dictionary should be a class so that its members can be accessed via attributes instead of string keys

"lats": None,
"lons": None,
"levels": {}, # { FL: {"temp": 2Darray, "dir": 2Darray, "spd": 2Darray} }
"wx_state": None, # {'String': 'YYYYMMDD', 'Cycle': '00'|'12', 'Forecast Hour': '00'}
"last_updated_utc": None, # ISO timestamp string set when interpolate_uv_temp_at_flight_levels finishes
"grib_path": None, # optional: path to last downloaded grib file
}
127 changes: 127 additions & 0 deletions libs/wind_lib.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please get rid of all "magic" numbers. They should all be stored in constant variables with descriptive names so future readers can understand what they represent.

Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import pygrib
import requests
import numpy as np
from libs.weather_data_lock import weather_data, data_lock
import json
from datetime import datetime, timezone
import os
import csv

# Function to download the latest RAP GRIB2 file based on the current date and cycle hour
def download_rap_grib2(date_str, cycle_hour, forecast_hour, save_path):
base_url = 'https://nomads.ncep.noaa.gov/pub/data/nccf/com/rap/prod'
file_name = f"rap.t{cycle_hour}z.awp130pgrbf{forecast_hour.zfill(2)}.grib2"
url = f"{base_url}/rap.{date_str}/{file_name}"

response = requests.get(url, stream=True)
if response.status_code == 200:
with open(save_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason to actually save the file to disk instead of just holding the data in memory?

print(f"Downloaded: {save_path}")
else:
print(f"Failed to download file. HTTP {response.status_code}: {url}")

# Function to interpolate U, V wind components and temperature to flight levels and save to CSV files
def interpolate_uv_temp_at_flight_levels(grib_file):
grbs = pygrib.open(grib_file)

hgt_grbs = grbs.select(name='Geopotential height', typeOfLevel='isobaricInhPa')
u_grbs = grbs.select(name='U component of wind', typeOfLevel='isobaricInhPa')
v_grbs = grbs.select(name='V component of wind', typeOfLevel='isobaricInhPa')
t_grbs = grbs.select(name='Temperature', typeOfLevel='isobaricInhPa')

hgt_3d = np.array([g.values for g in hgt_grbs])
u_3d = np.array([g.values for g in u_grbs])
v_3d = np.array([g.values for g in v_grbs])
t_3d = np.array([g.values for g in t_grbs]) - 273.15
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cameronnegrete if you could add context as to this and all the other constants used throughout the file that would be awesome.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, what is this and the other magic numbers?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GRIB files present the temps in KELVIN because Kelvin is the thermodynically correct unit to use where absolute 0 is where the atoms have no more energy. Temp-273.15 converts from kelvin to degree C.


lats, lons = hgt_grbs[0].latlons()

hgt_ft = hgt_3d * 3.281

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Convert meter to ft.

fl_3d = hgt_ft / 100.0

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ft to FL


fl_target = np.arange(0, 510, 10)
level_dict = {}

for fl in fl_target:
temp_2d = np.full(lats.shape, np.nan)
speed_2d = np.full(lats.shape, np.nan)
dir_2d = np.full(lats.shape, np.nan)

for i in range(lats.shape[0]):
for j in range(lats.shape[1]):
profile_fl = fl_3d[:, i, j]
profile_u = u_3d[:, i, j]
profile_v = v_3d[:, i, j]
profile_t = t_3d[:, i, j]

if (np.any(np.isnan(profile_fl)) or np.any(np.isnan(profile_u)) or
np.any(np.isnan(profile_v)) or np.any(np.isnan(profile_t))):
continue

u_val = np.interp(fl, profile_fl[::-1], profile_u[::-1])
v_val = np.interp(fl, profile_fl[::-1], profile_v[::-1])
t_val = np.interp(fl, profile_fl[::-1], profile_t[::-1])

speed = np.sqrt(u_val**2 + v_val**2)
direction = (270 - np.degrees(np.arctan2(v_val, u_val))) % 360

# convert speed from m/s to knots (1 m/s = 1.94384449 kt)
speed_kt = speed * 1.94384449

temp_2d[i, j] = int(round(t_val))
speed_2d[i, j] = int(round(speed_kt))
dir_2d[i, j] = int(round(direction))

level_dict[int(fl)] = {"temp": temp_2d, "spd": speed_2d, "dir": dir_2d}

with data_lock:
weather_data["lats"] = lats
weather_data["lons"] = lons
weather_data["levels"] = level_dict

grbs.close()

# Function to get the current RAP forecast date and time, and build file path info
def get_date():
now = datetime.now(timezone.utc)
date_str = now.strftime('%Y%m%d')
cycle_hour = '12' if now.hour >= 12 else '00'
forecast_hour = '00'
save_path = 'rap_latest.grib2'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the definition of save_path should be moved outside of this function because it is independent of RAP (we could name our copy of the file whatever we want)

I think it should live as a constant (either in this file or in app.py)

With that said, please see my other comment asking why we are saving the data as a file to disk in the first place.

return date_str, cycle_hour, forecast_hour, save_path

# Function to check if new forecast hour differs from previously stored state
def check_state(new_forecast):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please change the name of this function and parameter to something more descriptive

Suggested change
def check_state(new_forecast):
def different_hour(new_hour):

# read previous forecast from in-memory store
with data_lock:
prev_state = weather_data.get('wx_state')
if not prev_state:
# no previous state -> treat as new
return True
return new_forecast != prev_state.get('Forecast Hour')

# Wrapper function to execute the full workflow conditionally based on forecast state
def run_grid():
date_str, cycle_hour, forecast_hour, save_path = get_date()
state_check = check_state(forecast_hour)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As with the other comment about the function name...
please change the name of this variable to something more descriptive

Suggested change
state_check = check_state(forecast_hour)
need_new_file = diff_hour(forecast_hour)


if state_check == True:
download_rap_grib2(date_str, cycle_hour, forecast_hour, save_path)
interpolate_uv_temp_at_flight_levels(save_path)
# persist new state in memory (add processed timestamp)
processed_at = datetime.now(timezone.utc).isoformat()
new_state = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dictionary should be a class so that its members can be accessed via attributes instead of string keys

'String': date_str,
'Cycle': cycle_hour,
'Forecast Hour': forecast_hour,
'Processed At': processed_at
}
with data_lock:
weather_data['wx_state'] = new_state
output = 'Completed'
else:
output = 'Error, forecast is the same as previous forecast'

return output
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ itsdangerous==2.2.0
Jinja2==3.1.6
lxml==5.3.2
MarkupSafe==3.0.2
numpy==2.3.2
packaging==25.0
pygrib==2.1.6
pymongo==4.11.3
pyproj==3.7.2
requests==2.32.3
urllib3==2.3.0
Werkzeug==3.1.3