ioos · RATED-R-SUNDRAM · Apr 1, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/GOSC-25_SHIVAM_CODES/NetCDF_DataReader.py b/GOSC-25_SHIVAM_CODES/NetCDF_DataReader.py
@@ -0,0 +1,79 @@
+"""
+NetCDF_DataReader.py
+Purpose: Load and manipulate netCDF data (e.g., CBOFS) using xarray for ocean model analysis. 
+Explore data attributes, variables, and perform basic data manipulation.
+
+"""
+#%%%%
+
+# Import necessary libraries
+import xarray as xr
+import numpy as np
+import matplotlib.pyplot as plt
+
+#%%%
+
+class NetCDFDataReader:
+    """
+     Create class to load netcdf file and explore its functionalities
+    """
+    def __init__(self, file_path): # constructor for initializing the class
+
+
+        self.file_path = file_path
+        self.dataset = None
+
+    def load_data(self):      # fucntion for reading a netcdf file from specified file path
+
+        try:
+            self.dataset = xr.open_dataset(self.file_path, decode_times=True)  #load the dataset 
+            print("Dataset loaded successfully.")
+            return self.dataset
+        except Exception as e:
+            print(f"Error loading dataset: {e}")
+            return None
+
+    def get_subset(self, **kwargs): # function for extracting a subset of the dataset based on time, latitude, and longitude ranges
+
+        # the dictionary passed will have key value pairs of {features:columns} for data sclicing
+        if self.dataset is None:
+            print("Error: Dataset not loaded.")
+            return None
+        try:
+            keys=list(kwargs.get('features'))
+            values=list(kwargs.get('columns'))
+            subset = self.dataset.sel(**{keys[0]:slice(values[0], values[1])})
+
+
+            print("Subset extracted successfully.")
+            return subset
+        except Exception as e:
+            print(f"Error extracting subset: {e}")
+            return None
+
+
+# %%
+
+# for demonstration purpose using a netcdf file hosted on opendap server
+file_loc = 'https://opendap1.nodc.no/opendap/physics/point/cruise/nansen_legacy-single_profile/NMDC_Nansen-Legacy_PR_CT_58US_2021708/CTD_station_P1_NLEG01-1_-_Nansen_Legacy_Cruise_-_2021_Joint_Cruise_2-1.nc'
+# create an instance of the NetCDFDataReader class
+ncd = NetCDFDataReader(file_loc)
+data =ncd.load_data() # load the data
+print(data)
+# %%
+# exploring the dimenisons, variales and attrivutes of data
+
+""" Dimensions"""
+print(data.dims) # print the dimensions of the dataset 
+# output is ({'PRES': 320}) so it has data across 320 values of pressure 
+
+
+"""Exploring the Temp vars"""
+print(data['TEMP']) # print the data of the dataset for TEMP variable
+print(data['TEMP'].to_dataframe()) # print the data of the dataset for TEMP variable in pandas dataframe format
+
+
+"""Attributes"""
+print(data.attrs) # print the attributes of the dataset
+# output is a dictionary of attributes of the dataset
+# %%
diff --git a/GOSC-25_SHIVAM_CODES/Plotting_data.py b/GOSC-25_SHIVAM_CODES/Plotting_data.py
@@ -0,0 +1,129 @@
+"""
+Plotting_data.py
+Purpose: Plot the combined data created in TimeSeries_Data_Exploration.py 
+Explore multiple plotting functions for gridded data
+
+"""
+
+#%%# Import necessary libraries
+import xarray as xr
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import scipy
+import cartopy 
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
+import cmocean
+
+
+# %%
+
+
+# Loading the combined data
+data = xr.load_dataset(r'Example Data/combined_data.nc') # load the combined data
+
+
+# %%
+
+combined_data = data.copy() #create a copy of the data
+# Set up the map projection (Mercator is good for ocean data)
+projection = ccrs.PlateCarree()
+
+# Create a figure with subplots for each time step
+fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 15), 
+                         subplot_kw={'projection': projection})
+
+# Plot surface temperature for each time step
+for t in range(len(combined_data.ocean_time)):
+    ax = axes[t]
+
+    # Select surface temperature at this time step
+    surface_temp = combined_data.temp.isel(ocean_time=t,s_rho=10)
+
+    print(surface_temp.shape)
+    print(surface_temp.to_dataframe())
+    # Create the map
+    ax.coastlines(resolution='110m')
+    ax.add_feature(cfeature.LAND)
+    ax.add_feature(cfeature.OCEAN)
+    ax.gridlines(draw_labels=True)
+
+    # Plot the temperature data
+    p = ax.pcolormesh(combined_data.lon_rho, combined_data.lat_rho, surface_temp,
+                      transform=ccrs.PlateCarree(),  # Data is in lat/lon coordinates
+                      cmap=cmocean.cm.thermal,  # Use cmocean's thermal colormap
+                      vmin=surface_temp.min(), vmax=surface_temp.max())
+
+    # Add a colorbar
+    plt.colorbar(p, ax=ax, label='Temperature (°C)')
+
+    # Set title
+    ax.set_title(f'Surface Temperature at {combined_data.ocean_time[t].values}')
+
+plt.tight_layout()
+plt.show()
+# %%
+
+import numpy as np
+from scipy.ndimage import gaussian_filter1d
+
+# Compute the spatially averaged surface temperature
+surface_temp = combined_data.temp.isel(s_rho=19).mean(dim=['eta_rho', 'xi_rho'])
+
+# Convert to numpy for smoothing
+temp_values = surface_temp.values
+time_values = combined_data.ocean_time.values
+
+# Smooth the temperature data using a Gaussian filter
+smoothed_temp = gaussian_filter1d(temp_values, sigma=1)  # Adjust sigma for more/less smoothing
+
+# Plot the original and smoothed time series
+plt.figure(figsize=(8, 4))
+plt.plot(time_values, temp_values, marker='o', label='Original', color='blue')
+plt.plot(time_values, smoothed_temp, label='Smoothed (Gaussian)', color='red')
+plt.xlabel('Time')
+plt.ylabel('Temperature (°C)')
+plt.title('Spatially Averaged Surface Temperature (Smoothed)')
+plt.grid(True)
+plt.legend()
+plt.show()
+# %%
+
+# Some data preprocessing to plot on curve
+df = combined_data.temp.isel(ocean_time=0,s_rho=0).to_dataframe().reset_index()
+df
+print(df['lat_rho'].min(),df['lat_rho'].max())
+print(df['lon_rho'].min(),df['lon_rho'].max())
+# %%
+# %%
+# %%
+
+# plot to highlight regions of data in the US MAP
+
+# intializing figure
+plt.figure(figsize=(10, 6))
+m1= plt.axes(projection=ccrs.PlateCarree())
+
+# adding features to the map
+m1.add_feature(cfeature.LAND)
+m1.add_feature(cfeature.OCEAN)
+m1.gridlines(draw_labels=True)
+m1.add_feature(cfeature.COASTLINE)
+m1.add_feature(cfeature.BORDERS, linestyle=':')
+m1.add_feature(cfeature.LAKES, color='lightblue')
+m1.add_feature(cfeature.RIVERS, color='blue')
+m1.add_feature(cfeature.STATES, linestyle=':')
+
+# adding subset to see in the map 
+m1.set_extent([-130,-60,20,35])
+
+# plotting the data as a scatter plot to see the regions covered by the data
+for i in df.itertuples():
+    m1.plot(i.lon_rho,i.lat_rho,color='red',marker='o')
+m1.stock_img()
+
+
+
+# %%
diff --git a/GOSC-25_SHIVAM_CODES/TimeSeries_Data_Exploration.py b/GOSC-25_SHIVAM_CODES/TimeSeries_Data_Exploration.py
@@ -0,0 +1,70 @@
+"""
+TimeSeries_Data_Exploration.py
+Purpose: Load and explore the data of different time steps to get a flavour of timeseries data 
+Handling multiple time periods data ,plotting and saving it
+
+"""
+
+#%%%%
+
+# Import necessary libraries
+import xarray as xr
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+#%%%
+# LOADING DATA 
+
+loc = "../Example Data"  # specify folder where data will be loaded
+files=  os.listdir(loc)  # list all files in the folder
+print(files) # print the list of files
+"""
+we have 3 files for the same day but executed at gap of 6 hrs
+these files are :
+1.  nos.cbofs.fields.f001.20231127.t00z.nc
+2.  nos.cbofs.fields.f001.20231127.t06z.nc
+3 . nos.cbofs.fields.f001.20231127.t12z.nc
+"""
+
+data1  = xr.open_dataset(f"{files[0]}",engine='netcdf4')
+data2  = xr.open_dataset(f"{files[1]}",engine='netcdf4')
+data3  = xr.open_dataset(f"{files[2]}",engine='netcdf4')
+
+
+# %%
+
+"""
+We would append temp data for all time-steps and create a combined data
+
+Then we would slice data to create timeseries data for a specific location
+
+Then we would try to plot the same
+
+"""
+
+datasets = [data1, data2, data3]
+
+# Combine along the ocean_time dimension
+combined_data = xr.concat(datasets, dim='ocean_time')
+
+# Check the combined dataset
+print(combined_data)
+
+
+""" PLOTTING TEMP AT A SPECIFIC LOCATION """
+specific_point_temp = combined_data.temp.isel(eta_rho=100, xi_rho=100, s_rho=19)
+
+# Plot
+plt.figure(figsize=(8, 4))
+plt.plot(combined_data.ocean_time, specific_point_temp, marker='o', label='Temp at (eta_rho=100, xi_rho=100, s_rho=19)')
+plt.xlabel('Time')
+plt.ylabel('Temperature (°C)')
+plt.title('Temperature Time Series at (eta_rho=100, xi_rho=100, s_rho=19 Surface)')
+plt.grid(True)
+plt.legend()
+plt.show()
+
+combined_data.to_netcdf('combined_data.nc') # save the combined data/
+
+#%%
diff --git a/GOSC-25_SHIVAM_CODES/Validation_module.py b/GOSC-25_SHIVAM_CODES/Validation_module.py
@@ -0,0 +1,68 @@
+"""
+Validation_module.py 
+Purpose: Create RMSE error between 2 different NETCDF Files  
+Assuming 2 model prediction files(at 6 hrs assuming one is predicted and one is observed) create a module for creating RMSE
+
+"""
+
+#%%%%
+
+# Import necessary libraries
+import xarray as xr
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+# %%
+
+# creating a class to preprocess data and evaluation code using RMSE after slicing entered by user
+class Eval():
+    def __init__(self, data1, data2):
+        self.pred = data1
+        self.actual = data2
+
+
+# create a function taking preproceess data and calculate RMSE 
+    def rmse(self,filter):
+
+        # the dictionary passed will have key value pairs of {features:columns} for data sclicing
+        if self.pred is None:
+            print("Error: Dataset not loaded.")
+            return None
+
+        keys=list(filter.keys())
+        values=list(filter.values())
+
+        for i in range(len(keys)):
+            pred_subset = self.pred['temp'].sel(**{keys[i]:slice(values[i][0], values[i][1])})
+            actual_subset = self.actual['temp'].sel(**{keys[i]:slice(values[i][0], values[i][1])})
+        # convert temp column to numpy array 
+
+        pred_subset_df= pred_subset.to_dataframe().reset_index()
+        actual_subset_df =actual_subset.to_dataframe().reset_index()
+
+        pred_subset_df= pred_subset_df[~(pred_subset_df['temp'].isna())]['temp']
+        actual_subset_df= actual_subset_df[~(actual_subset_df['temp'].isna())]['temp']
+
+        # Calculate RMSE
+        return np.sqrt(((pred_subset_df - actual_subset_df) ** 2).mean())
+
+    # abstract class for computing any metric provided by user
+     def Metric(Self,filter) : 
+         pass
+
+
+
+
+# %%
+
+pred = xr.open_dataset("../Example Data/nos.cbofs.fields.f001.20231127.t00z.nc") # load predicted data for now
+actual = xr.open_dataset("../Example Data/nos.cbofs.fields.f001.20231127.t06z.nc") # load actual data for now
+
+#%%
+eval = Eval(pred, actual)
+filter= {"s_rho":[-0.975,-0.875]}
+rmse = eval.rmse(filter)
+print("RMSE extracted successfully")
+print(f"RMSE is :{rmse}")
+# %%