Skip to content

Clear sky days #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
16 changes: 7 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
[project]
name = "CICCADA"
name = "ciccada"
version = "0.1.0"
description = "A package for analysing compliance and curtailment of consumer energy resources."
authors = [
{ name = "nick-gorman", email = "[email protected]" },
]
dependencies = [
"matplotlib>=3.10.0",
"pandas>=2.2.2",
"pytest-cov>=6.0.0",
"pytest>=8.3.4",
]
readme = "README.md"
requires-python = ">= 3.10"

[project.optional-dependencies]
solvers = [
"linopy>=0.4.2",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
Expand All @@ -24,23 +22,23 @@ build-backend = "hatchling.build"
managed = true
dev-dependencies = [
"pytest>=8.3.2",
"pytest-cov>=5.0.0",
"pytest-cov>=6.0.0",
"pre-commit>=3.8.0",
]

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["src/ispypsa"]
packages = ["src/ciccada"]

[tool.pytest.ini_options]
# path to tests for pytest
testpaths = ["src", "tests"]
# addopts = add options
# --cov points pytest-cov to the src/ dir
# --cov-branch runs branch coverage
addopts = "-ra --doctest-modules --cov=src/ --cov-branch --cov-report xml:tests/coverage.xml --cov-report html:tests/htmlcov"
addopts = "-ra --doctest-modules --cov=src/ --cov-branch --cov-report xml:tests/coverage.xml --cov-report html:tests/htmlcov"

[tool.ruff.lint]
select = [
Expand Down
31 changes: 31 additions & 0 deletions scripts/5007_detect_clearsky_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
,date,is_clear_algo,mean_change
0,2023-01-12,False,27.318869047619046
1,2023-01-20,False,45.63298780487805
2,2023-02-22,False,40.245986842105275
3,2023-02-24,False,57.00276315789474
4,2023-02-26,False,31.595789473684206
5,2023-03-25,False,59.47407142857142
6,2023-04-26,False,49.607936507936536
7,2023-05-02,False,79.49888888888889
8,2023-05-03,False,57.29411290322581
9,2023-05-12,False,38.87266666666665
10,2023-05-20,False,46.900084745762705
11,2023-05-22,False,23.927118644067797
12,2023-06-01,False,45.999824561403514
13,2023-06-03,False,22.26763157894737
14,2023-06-09,False,60.14859649122807
15,2023-06-19,True,23.815175438596487
16,2023-06-21,False,35.86394736842105
17,2023-07-08,False,64.12724137931035
18,2023-07-09,False,51.08568965517241
19,2023-07-12,True,21.741810344827584
20,2023-07-30,False,45.529508196721316
21,2023-08-28,False,28.724318181818173
22,2023-09-21,False,51.28278571428572
23,2023-10-05,False,44.63760273972602
24,2023-10-10,True,25.650000000000002
25,2023-10-11,False,27.9178
26,2023-10-19,False,31.4724025974026
27,2023-10-27,False,37.29967532467531
28,2023-11-08,False,40.46886075949367
29,2023-11-18,False,46.152926829268296
31 changes: 31 additions & 0 deletions scripts/5007_detect_clearsky_results_old.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
,date,is_clear_algo,mean_change
0,2023-01-12,False,27.31886904761904
1,2023-01-20,False,45.63298780487805
2,2023-02-22,False,40.24598684210527
3,2023-02-24,False,57.00276315789473
4,2023-02-26,False,31.595789473684206
5,2023-03-25,False,59.47407142857142
6,2023-04-26,False,49.607936507936536
7,2023-05-02,False,79.49888888888891
8,2023-05-03,False,57.294112903225816
9,2023-05-12,False,38.87266666666667
10,2023-05-20,False,46.90008474576269
11,2023-05-22,False,23.927118644067797
12,2023-06-01,False,45.99982456140352
13,2023-06-03,False,22.267631578947373
14,2023-06-09,False,60.148596491228076
15,2023-06-19,True,23.815175438596476
16,2023-06-21,False,35.86394736842105
17,2023-07-08,False,64.12724137931035
18,2023-07-09,False,51.0856896551724
19,2023-07-12,True,21.74181034482758
20,2023-07-30,False,45.52950819672131
21,2023-08-28,False,28.72431818181818
22,2023-09-21,False,51.28278571428572
23,2023-10-05,False,44.637602739726034
24,2023-10-10,True,25.65000000000001
25,2023-10-11,False,27.917800000000007
26,2023-10-19,False,31.4724025974026
27,2023-10-27,False,37.29967532467532
28,2023-11-08,False,40.46886075949367
29,2023-11-18,False,46.1529268292683
79 changes: 79 additions & 0 deletions scripts/plot_ghi_test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import glob
import os
from datetime import datetime
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


def plot_time_series(input_dir, output_dir):
"""
Iterate through all CSV files in input_dir, plot the time series data,
and save plots as PNG files in output_dir.

Parameters:
-----------
input_dir : str
Directory containing the CSV data files
output_dir : str
Directory where PNG plot files will be saved
"""
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"Created output directory: {output_dir}")

# Find all CSV files in the input directory
csv_files = glob.glob(os.path.join(input_dir, "*.csv"))

if not csv_files:
print(f"No CSV files found in {input_dir}")
return

print(f"Found {len(csv_files)} CSV files to process")

# Process each CSV file
for file_path in csv_files:
filename = os.path.basename(file_path)
file_stem = os.path.splitext(filename)[0]

print(f"Processing: {filename}")

# Read the CSV file
df = pd.read_csv(file_path)

# Convert time column to datetime
df["time"] = pd.to_datetime(df["time"])

# Create the plot
plt.figure(figsize=(12, 6))
plt.plot(df["time"], df["ghi_mean"], "-", linewidth=1)

# Format the plot
plt.title(f"Global Horizontal Irradiance (GHI) - {file_stem}")
plt.xlabel("Time")
plt.ylabel("Mean GHI")
plt.grid(True, alpha=0.3)

# Format x-axis to show date nicely
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
plt.gcf().autofmt_xdate()

# Save the plot as PNG
output_path = os.path.join(output_dir, f"{file_stem}_plot.png")
plt.savefig(output_path, dpi=300, bbox_inches="tight")
plt.close()

print(f" Saved plot to: {output_path}")

print("Processing complete!")


if __name__ == "__main__":
# Set your input and output directories here
input_directory = Path("tests/data/ghi_csvs")
output_directory = Path("tests/data/ghi_data_plotted")

plot_time_series(input_directory, output_directory)
50 changes: 50 additions & 0 deletions scripts/prepare_ghi_test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import random
from datetime import timedelta
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd

data_folder = Path("D:/ciccada/ghi")

ghi_data_by_postcode_file = data_folder / Path(
"ghi_postcode/NCI_processed_Adelaide_grouped.csv"
)

ghi_data_by_postcode = pd.read_csv(ghi_data_by_postcode_file)

ghi_data_by_postcode["postcode"] = ghi_data_by_postcode["postcode"].astype(int)

single_postcode_data = ghi_data_by_postcode.loc[
ghi_data_by_postcode["postcode"] == 5007, :
]

single_postcode_data["time"] = pd.to_datetime(
single_postcode_data["time"], format="%Y-%m-%d %H:%M:%S"
)

single_postcode_data["time"] += timedelta(hours=10, minutes=30)

unique_dates = single_postcode_data["time"].dt.date.unique()

# Randomly select 30 dates
selected_dates = random.sample(list(unique_dates), 30)

for date in unique_dates:
# Filter data for the current date
daily_data = single_postcode_data[single_postcode_data["time"].dt.date == date]

csv_filename = data_folder / Path(f"5007_ghi_csvs/ghi_{date}.csv")
daily_data.to_csv(csv_filename, index=False)

# Create and save plot
plt.figure(figsize=(10, 6))
plt.plot(daily_data["time"], daily_data["surface_global_irradiance"])
plt.title(f"GHI Data for {date}")
plt.xlabel("Time")
plt.ylabel("GHI")
plt.xticks(rotation=45)
plt.tight_layout()

plt.savefig(data_folder / Path(f"5007_ghi_plots/ghi_{date}.png"))
plt.close()
27 changes: 27 additions & 0 deletions scripts/run_detect_clearsky_days_on_5007.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pathlib import Path

import pandas as pd

from ciccada.clear_sky_days import detect_clear_sky_day

test_data_filepaths = Path("D:/ciccada/ghi/5007_ghi_csvs").glob("*.csv")

algorithm_classification = {}
mean_change = {}
for file_path in test_data_filepaths:
test_data = pd.read_csv(file_path)
test_data = test_data.rename(columns={"surface_global_irradiance": "ghi_mean"})
date = test_data["time"].iloc[0][:10]
algorithm_classification[date], mean_change[date] = detect_clear_sky_day(
test_data, 500.0
)

is_cloudy_algo = pd.DataFrame(
{
"date": algorithm_classification.keys(),
"is_clear_algo": algorithm_classification.values(),
"mean_change": mean_change.values(),
}
)

is_cloudy_algo.to_csv("5007_detect_clearsky_results.csv")
69 changes: 69 additions & 0 deletions src/ciccada/clear_sky_days.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pandas as pd


def detect_clear_sky_day(ghi_df: pd.DataFrame, min_max_ghi: float) -> bool:
"""Check whether a certain day is a clear sky day or not.

It will judge that it is a clear sky day if satisfying two criteria:
1. The average change in ghi is small (less than 5 W/m2).
2. The maximum ghi value is higher than a certain threshold (min_max_ghi).

This algorithim is based on: https://github.com/UNSW-CEEM/Solar-Curtailment/blob/163d31545bcc7bdf049b59b470fa15636c867fed/src/solarcurtailment/clear_sky_day.py#L204C1-L236C42

Examples:

>>> ghi_data = pd.DataFrame({
... 'ghi_mean': [501.0, 502.0, 503.0]
... })

>>> detect_clear_sky_day(ghi_data, min_max_ghi=500.0)
True

Args:
ghi_df (df) : ghi data sorted in time sequential order with a column
`mean_ghi` specifying the ghi for the interval in W/m2.
min_max_ghi (int) : the minimum value of maximum ghi. If the maximum ghi is
lower than this value it means there must be cloud.

Returns:
(bool) : bool value if the day is clear sky day or not.
"""
df_daytime = ghi_df.loc[ghi_df["ghi_mean"] > 0]
collective_change = df_daytime["ghi_mean"].diff().abs().sum()
average_change = collective_change / len(df_daytime.index)
return average_change < 26 and max(ghi_df.ghi_mean) > min_max_ghi, average_change


def old_detect_clear_sky_day(ghi_df, min_max_ghi):
"""Check whether a certain day is a clear sky day or not.

Args:
ghi_df (df) : ghi data
min_max_ghi (int) : the minimum value of maximum ghi. If the maximum ghi is lower than
this value, means there must be cloud.

Returns:
(bool) : bool value if the day is clear sky day or not.

It will judge that it is a clear sky day if satisfying two criterias:
1. There is no sudden change in ghi (means cloud)
2. The maximum ghi value is higher than a certain threshold (min_max_ghi).
"""

df_daytime = ghi_df.loc[ghi_df["ghi_mean"] > 0]

collective_change = 0
ghi_list = df_daytime.ghi_mean.tolist()

for i in range(len(ghi_list) - 1):
collective_change += abs(ghi_list[i + 1] - ghi_list[i])

if len(df_daytime.index) == 0:
return False, 0

average_delta_y = collective_change / len(df_daytime.index)

if average_delta_y < 26 and max(ghi_df.ghi_mean) > min_max_ghi:
return True, average_delta_y
else:
return False, average_delta_y
Loading
Loading