UNSW-CEEM · nick-gorman · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,21 +1,19 @@
 [project]
-name = "CICCADA"
+name = "ciccada"
 version = "0.1.0"
 description = "A package for analysing compliance and curtailment of consumer energy resources."
 authors = [
     { name = "nick-gorman", email = "[email protected]" },
 ]
 dependencies = [
+    "matplotlib>=3.10.0",
     "pandas>=2.2.2",
+    "pytest-cov>=6.0.0",
+    "pytest>=8.3.4",
 ]
 readme = "README.md"
 requires-python = ">= 3.10"
 
-[project.optional-dependencies]
-solvers = [
-    "linopy>=0.4.2",
-]
-
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
@@ -24,23 +22,23 @@ build-backend = "hatchling.build"
 managed = true
 dev-dependencies = [
     "pytest>=8.3.2",
-    "pytest-cov>=5.0.0",
+    "pytest-cov>=6.0.0",
     "pre-commit>=3.8.0",
 ]
 
 [tool.hatch.metadata]
 allow-direct-references = true
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/ispypsa"]
+packages = ["src/ciccada"]
 
 [tool.pytest.ini_options]
 # path to tests for pytest
 testpaths = ["src", "tests"]
 # addopts = add options
 # --cov points pytest-cov to the src/ dir
 # --cov-branch runs branch coverage
-addopts = "-ra --doctest-modules --cov=src/ --cov-branch --cov-report xml:tests/coverage.xml --cov-report html:tests/htmlcov"
+ addopts = "-ra --doctest-modules --cov=src/ --cov-branch --cov-report xml:tests/coverage.xml --cov-report html:tests/htmlcov"
 
 [tool.ruff.lint]
 select = [

diff --git a/scripts/5007_detect_clearsky_results.csv b/scripts/5007_detect_clearsky_results.csv
@@ -0,0 +1,31 @@
+,date,is_clear_algo,mean_change
+0,2023-01-12,False,27.318869047619046
+1,2023-01-20,False,45.63298780487805
+2,2023-02-22,False,40.245986842105275
+3,2023-02-24,False,57.00276315789474
+4,2023-02-26,False,31.595789473684206
+5,2023-03-25,False,59.47407142857142
+6,2023-04-26,False,49.607936507936536
+7,2023-05-02,False,79.49888888888889
+8,2023-05-03,False,57.29411290322581
+9,2023-05-12,False,38.87266666666665
+10,2023-05-20,False,46.900084745762705
+11,2023-05-22,False,23.927118644067797
+12,2023-06-01,False,45.999824561403514
+13,2023-06-03,False,22.26763157894737
+14,2023-06-09,False,60.14859649122807
+15,2023-06-19,True,23.815175438596487
+16,2023-06-21,False,35.86394736842105
+17,2023-07-08,False,64.12724137931035
+18,2023-07-09,False,51.08568965517241
+19,2023-07-12,True,21.741810344827584
+20,2023-07-30,False,45.529508196721316
+21,2023-08-28,False,28.724318181818173
+22,2023-09-21,False,51.28278571428572
+23,2023-10-05,False,44.63760273972602
+24,2023-10-10,True,25.650000000000002
+25,2023-10-11,False,27.9178
+26,2023-10-19,False,31.4724025974026
+27,2023-10-27,False,37.29967532467531
+28,2023-11-08,False,40.46886075949367
+29,2023-11-18,False,46.152926829268296
diff --git a/scripts/5007_detect_clearsky_results_old.csv b/scripts/5007_detect_clearsky_results_old.csv
@@ -0,0 +1,31 @@
+,date,is_clear_algo,mean_change
+0,2023-01-12,False,27.31886904761904
+1,2023-01-20,False,45.63298780487805
+2,2023-02-22,False,40.24598684210527
+3,2023-02-24,False,57.00276315789473
+4,2023-02-26,False,31.595789473684206
+5,2023-03-25,False,59.47407142857142
+6,2023-04-26,False,49.607936507936536
+7,2023-05-02,False,79.49888888888891
+8,2023-05-03,False,57.294112903225816
+9,2023-05-12,False,38.87266666666667
+10,2023-05-20,False,46.90008474576269
+11,2023-05-22,False,23.927118644067797
+12,2023-06-01,False,45.99982456140352
+13,2023-06-03,False,22.267631578947373
+14,2023-06-09,False,60.148596491228076
+15,2023-06-19,True,23.815175438596476
+16,2023-06-21,False,35.86394736842105
+17,2023-07-08,False,64.12724137931035
+18,2023-07-09,False,51.0856896551724
+19,2023-07-12,True,21.74181034482758
+20,2023-07-30,False,45.52950819672131
+21,2023-08-28,False,28.72431818181818
+22,2023-09-21,False,51.28278571428572
+23,2023-10-05,False,44.637602739726034
+24,2023-10-10,True,25.65000000000001
+25,2023-10-11,False,27.917800000000007
+26,2023-10-19,False,31.4724025974026
+27,2023-10-27,False,37.29967532467532
+28,2023-11-08,False,40.46886075949367
+29,2023-11-18,False,46.1529268292683
diff --git a/scripts/plot_ghi_test_data.py b/scripts/plot_ghi_test_data.py
@@ -0,0 +1,79 @@
+import glob
+import os
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.dates as mdates
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+def plot_time_series(input_dir, output_dir):
+    """
+    Iterate through all CSV files in input_dir, plot the time series data,
+    and save plots as PNG files in output_dir.
+
+    Parameters:
+    -----------
+    input_dir : str
+        Directory containing the CSV data files
+    output_dir : str
+        Directory where PNG plot files will be saved
+    """
+    # Create output directory if it doesn't exist
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+        print(f"Created output directory: {output_dir}")
+
+    # Find all CSV files in the input directory
+    csv_files = glob.glob(os.path.join(input_dir, "*.csv"))
+
+    if not csv_files:
+        print(f"No CSV files found in {input_dir}")
+        return
+
+    print(f"Found {len(csv_files)} CSV files to process")
+
+    # Process each CSV file
+    for file_path in csv_files:
+        filename = os.path.basename(file_path)
+        file_stem = os.path.splitext(filename)[0]
+
+        print(f"Processing: {filename}")
+
+        # Read the CSV file
+        df = pd.read_csv(file_path)
+
+        # Convert time column to datetime
+        df["time"] = pd.to_datetime(df["time"])
+
+        # Create the plot
+        plt.figure(figsize=(12, 6))
+        plt.plot(df["time"], df["ghi_mean"], "-", linewidth=1)
+
+        # Format the plot
+        plt.title(f"Global Horizontal Irradiance (GHI) - {file_stem}")
+        plt.xlabel("Time")
+        plt.ylabel("Mean GHI")
+        plt.grid(True, alpha=0.3)
+
+        # Format x-axis to show date nicely
+        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
+        plt.gcf().autofmt_xdate()
+
+        # Save the plot as PNG
+        output_path = os.path.join(output_dir, f"{file_stem}_plot.png")
+        plt.savefig(output_path, dpi=300, bbox_inches="tight")
+        plt.close()
+
+        print(f"  Saved plot to: {output_path}")
+
+    print("Processing complete!")
+
+
+if __name__ == "__main__":
+    # Set your input and output directories here
+    input_directory = Path("tests/data/ghi_csvs")
+    output_directory = Path("tests/data/ghi_data_plotted")
+
+    plot_time_series(input_directory, output_directory)
diff --git a/scripts/prepare_ghi_test_data.py b/scripts/prepare_ghi_test_data.py
@@ -0,0 +1,50 @@
+import random
+from datetime import timedelta
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pandas as pd
+
+data_folder = Path("D:/ciccada/ghi")
+
+ghi_data_by_postcode_file = data_folder / Path(
+    "ghi_postcode/NCI_processed_Adelaide_grouped.csv"
+)
+
+ghi_data_by_postcode = pd.read_csv(ghi_data_by_postcode_file)
+
+ghi_data_by_postcode["postcode"] = ghi_data_by_postcode["postcode"].astype(int)
+
+single_postcode_data = ghi_data_by_postcode.loc[
+    ghi_data_by_postcode["postcode"] == 5007, :
+]
+
+single_postcode_data["time"] = pd.to_datetime(
+    single_postcode_data["time"], format="%Y-%m-%d %H:%M:%S"
+)
+
+single_postcode_data["time"] += timedelta(hours=10, minutes=30)
+
+unique_dates = single_postcode_data["time"].dt.date.unique()
+
+# Randomly select 30 dates
+selected_dates = random.sample(list(unique_dates), 30)
+
+for date in unique_dates:
+    # Filter data for the current date
+    daily_data = single_postcode_data[single_postcode_data["time"].dt.date == date]
+
+    csv_filename = data_folder / Path(f"5007_ghi_csvs/ghi_{date}.csv")
+    daily_data.to_csv(csv_filename, index=False)
+
+    # Create and save plot
+    plt.figure(figsize=(10, 6))
+    plt.plot(daily_data["time"], daily_data["surface_global_irradiance"])
+    plt.title(f"GHI Data for {date}")
+    plt.xlabel("Time")
+    plt.ylabel("GHI")
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+
+    plt.savefig(data_folder / Path(f"5007_ghi_plots/ghi_{date}.png"))
+    plt.close()
diff --git a/scripts/run_detect_clearsky_days_on_5007.py b/scripts/run_detect_clearsky_days_on_5007.py
@@ -0,0 +1,27 @@
+from pathlib import Path
+
+import pandas as pd
+
+from ciccada.clear_sky_days import detect_clear_sky_day
+
+test_data_filepaths = Path("D:/ciccada/ghi/5007_ghi_csvs").glob("*.csv")
+
+algorithm_classification = {}
+mean_change = {}
+for file_path in test_data_filepaths:
+    test_data = pd.read_csv(file_path)
+    test_data = test_data.rename(columns={"surface_global_irradiance": "ghi_mean"})
+    date = test_data["time"].iloc[0][:10]
+    algorithm_classification[date], mean_change[date] = detect_clear_sky_day(
+        test_data, 500.0
+    )
+
+    is_cloudy_algo = pd.DataFrame(
+        {
+            "date": algorithm_classification.keys(),
+            "is_clear_algo": algorithm_classification.values(),
+            "mean_change": mean_change.values(),
+        }
+    )
+
+    is_cloudy_algo.to_csv("5007_detect_clearsky_results.csv")
diff --git a/src/ciccada/clear_sky_days.py b/src/ciccada/clear_sky_days.py
@@ -0,0 +1,69 @@
+import pandas as pd
+
+
+def detect_clear_sky_day(ghi_df: pd.DataFrame, min_max_ghi: float) -> bool:
+    """Check whether a certain day is a clear sky day or not.
+
+    It will judge that it is a clear sky day if satisfying two criteria:
+    1. The average change in ghi is small (less than 5 W/m2).
+    2. The maximum ghi value is higher than a certain threshold (min_max_ghi).
+
+    This algorithim is based on: https://github.com/UNSW-CEEM/Solar-Curtailment/blob/163d31545bcc7bdf049b59b470fa15636c867fed/src/solarcurtailment/clear_sky_day.py#L204C1-L236C42
+
+    Examples:
+
+    >>> ghi_data = pd.DataFrame({
+    ... 'ghi_mean': [501.0, 502.0, 503.0]
+    ... })
+
+    >>> detect_clear_sky_day(ghi_data, min_max_ghi=500.0)
+    True
+
+    Args:
+        ghi_df (df) : ghi data sorted in time sequential order with a column
+            `mean_ghi` specifying the ghi for the interval in W/m2.
+        min_max_ghi (int) : the minimum value of maximum ghi. If the maximum ghi is
+            lower than this value it means there must be cloud.
+
+    Returns:
+        (bool) : bool value if the day is clear sky day or not.
+    """
+    df_daytime = ghi_df.loc[ghi_df["ghi_mean"] > 0]
+    collective_change = df_daytime["ghi_mean"].diff().abs().sum()
+    average_change = collective_change / len(df_daytime.index)
+    return average_change < 26 and max(ghi_df.ghi_mean) > min_max_ghi, average_change
+
+
+def old_detect_clear_sky_day(ghi_df, min_max_ghi):
+    """Check whether a certain day is a clear sky day or not.
+
+    Args:
+        ghi_df (df) : ghi data
+        min_max_ghi (int) : the minimum value of maximum ghi. If the maximum ghi is lower than
+                            this value, means there must be cloud.
+
+    Returns:
+        (bool) : bool value if the day is clear sky day or not.
+
+    It will judge that it is a clear sky day if satisfying two criterias:
+    1. There is no sudden change in ghi (means cloud)
+    2. The maximum ghi value is higher than a certain threshold (min_max_ghi).
+    """
+
+    df_daytime = ghi_df.loc[ghi_df["ghi_mean"] > 0]
+
+    collective_change = 0
+    ghi_list = df_daytime.ghi_mean.tolist()
+
+    for i in range(len(ghi_list) - 1):
+        collective_change += abs(ghi_list[i + 1] - ghi_list[i])
+
+    if len(df_daytime.index) == 0:
+        return False, 0
+
+    average_delta_y = collective_change / len(df_daytime.index)
+
+    if average_delta_y < 26 and max(ghi_df.ghi_mean) > min_max_ghi:
+        return True, average_delta_y
+    else:
+        return False, average_delta_y