From 976d26c37baeb0d3a13fe5206e8b01f809c824dd Mon Sep 17 00:00:00 2001
From: micah johnson <micah.johnson150@gmail.com>
Date: Fri, 28 Nov 2025 08:38:37 -0700
Subject: [PATCH 1/6] Semi working pit uploader for 2020

---
 scripts/upload/add_time_series_pits_2020.py | 189 ++++++++------------
 1 file changed, 75 insertions(+), 114 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index f8e6ef2..a203b07 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -4,132 +4,93 @@
 
 import glob
 import re
-from os.path import abspath, join
 from pathlib import Path
+from earthaccess_data import get_files
+from import_logger import get_logger
+from snowexsql.db import db_session_with_credentials
 
-from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch
-from snowex_db.upload import PointDataCSV
-from snowex_db import db_session
+from snowex_db.upload.layers import UploadProfileBatch
+
+LOG = get_logger()
 
 
 tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
           'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
           }
 
+SNOWEX_PITS_MAP = {
+    "SNEX20_TS_SP": "10.5067/KZ43HVLZV6G4"
+}
+
+# Filename keyword to the instrument used
+INSTRUMENT_MAP = {
+                    "siteDetails": None,
+                    "density": "Density Cutter",
+                    "temperature": "Digital Thermometer",
+                    "LWC": "A2 Sensor",
+                    "stratigraphy": "Manual"
+                  }
 
-def main():
+def get_site_id(filename: str) -> str:
+    """
+    Get the site ID based on the site code in the filename
+    """
+    compiled = re.compile(
+        r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
+    )
+    match = compiled.match(Path(filename).name)
+    if match:
+        code = match.group(1)
+        return code
+    else:
+        raise RuntimeError(f"No site ID found for {filename}")
+
+
+def get_timezone(site_id: str) -> str:
+    """
+    Get the timezone based on the site code
+    """
+    abbrev = site_id[0:2]
+    tz = [k for k, states in tz_map.items() if abbrev in states][0]
+    return tz
+
+
+def main(file_list: list, doi: str) -> None:
     """
     Add 2020 timeseries pits
     """
-    db_name = 'localhost/snowex'
-
-    # Version 2 DOI
-    # https://nsidc.org/data/snex20_ts_sp/versions/2
-    doi = "https://doi.org/10.5067/KZ43HVLZV6G4"
-    debug = True
-
-    # Point to the downloaded data from
-    data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/')
-    error_msg = []
-
-    # Files to ignore
-    ignore_files = [
-        "SNEX20_TS_SP_Summary_Environment_v02.csv",
-        "SNEX20_TS_SP_Summary_SWE_v02.csv",
-        "SNEX20_TS_SP_Summary_SWE_v02_modified.csv"
-    ]
-
-    # Get all the date folders
-    unique_dt_olders = Path(
-        data_dir
-    ).expanduser().absolute().glob("20*.*.*")
-    for udf in unique_dt_olders:
-        # get all the csvs in the folder
-        dt_folder_files = list(udf.glob("*.csv"))
-        site_ids = []
-        # Get the unique site ids for this date folder
-        compiled = re.compile(
-            r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
-        )
-        for file_path in dt_folder_files:
-            file_name = file_path.name
-            if file_name in ignore_files:
-                print(f"Skipping {file_name}")
-                continue
-            match = compiled.match(file_name)
-            if match:
-                code = match.group(1)
-                site_ids.append(code)
-            else:
-                raise RuntimeError(f"No site ID found for {file_name}")
-
-        # Get the unique site ids
-        site_ids = list(set(site_ids))
-
-        for site_id in site_ids:
-            abbrev = site_id[0:2]
-            tz = [k for k, states in tz_map.items() if abbrev in states][0]
-
-            # Grab all the csvs in the pits folder
-            filenames = glob.glob(join(str(udf), f'*_{site_id}_*.csv'))
-
-            # Grab all the site details files
-            sites = glob.glob(join(
-                str(udf), f'*_{site_id}_*siteDetails*.csv'
-            ))
-
-            # Grab all the perimeter depths and remove them for now.
-            perimeter_depths = glob.glob(join(
-                str(udf), f'*_{site_id}_*perimeterDepths*.csv'
-            ))
-
-            # Use no-gap-filled density for the sole reason that
-            # Gap filled density for profiles where the scale was broken
-            # are just an empty file after the headers. We should
-            # Record that Nan density was collected for the profile
-            density_files = glob.glob(join(
-                str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
-            ))
-
-            # Remove the site details from the total file list to get only the
-            profiles = list(
-                set(filenames) - set(sites) - set(perimeter_depths) -
-                set(density_files)  # remove non-gap-filled denisty
-            )
-
-            # Submit all profiles associated with pit at a time
-            b = UploadProfileBatch(
-                filenames=profiles, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name,
-                allow_split_lines=True  # Logic for split header lines
-            )
-            b.push()
-            error_msg += b.errors
-
-            # Upload the site details
-            sd = UploadSiteDetailsBatch(
-                filenames=sites, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name
-            )
-            sd.push()
-            error_msg += sd.errors
-
-            # Submit all perimeters as point data
-            with db_session(
-                db_name, credentials='credentials.json'
-            ) as (session, engine):
-                for fp in perimeter_depths:
-                    pcsv = PointDataCSV(
-                        fp, doi=doi, debug=debug, depth_is_metadata=False,
-                        in_timezone=tz,
-                        allow_split_lines=True  # Logic for split header lines
-                    )
-                    pcsv.submit(session)
-
-    for f, m in error_msg:
-        print(f)
-    return len(error_msg)
+    # Constant Metadata for the GPR data
+    kwargs = {
+        "campaign_name": "2020 Timeseries",
+        "doi": doi,
+    }
+
+    # Files to remove
+
+
+    with db_session_with_credentials('./credentials.json') as (_engine, session):
+
+        # Filter by instrument
+        for keyword, instrument in INSTRUMENT_MAP.items():
+            instrumented_files = [
+                f for f in file_list if keyword in Path(f).name
+            ]
+            kwargs["instrument"] = instrument
+
+            # Filter to sites to manage the timezones
+            unique_sites = set([get_site_id(f) for f in instrumented_files])
+            
+            for site in unique_sites:
+                site_files = [
+                    f for f in instrumented_files if site in f
+                ]
+                kwargs["timezone"] = get_timezone(site)
+                
+                uploader = UploadProfileBatch(session, site_files, **kwargs)
+                uploader.push()
 
 
 if __name__ == '__main__':
-    main()
+    for data_set_id, doi in SNOWEX_PITS_MAP.items():
+        with get_files(data_set_id, doi) as files:
+            main(files, doi)
\ No newline at end of file

From 0efeb2dabd2b4ec8da1cbef2c729f66ba9065f2c Mon Sep 17 00:00:00 2001
From: micah johnson <micah.johnson150@gmail.com>
Date: Sat, 29 Nov 2025 06:28:27 -0700
Subject: [PATCH 2/6] Working script to uploaded 2020 time series pits. Working
 on #43

---
 scripts/upload/add_time_series_pits_2020.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index a203b07..5b44991 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -2,7 +2,6 @@
 Script to upload the Snowex Time Series pits
 """
 
-import glob
 import re
 from pathlib import Path
 from earthaccess_data import get_files
@@ -14,10 +13,6 @@
 LOG = get_logger()
 
 
-tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
-          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
-          }
-
 SNOWEX_PITS_MAP = {
     "SNEX20_TS_SP": "10.5067/KZ43HVLZV6G4"
 }
@@ -31,6 +26,11 @@
                     "stratigraphy": "Manual"
                   }
 
+tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
+          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
+          }
+
+
 def get_site_id(filename: str) -> str:
     """
     Get the site ID based on the site code in the filename
@@ -65,8 +65,9 @@ def main(file_list: list, doi: str) -> None:
         "doi": doi,
     }
 
-    # Files to remove
-
+    # Files to ignore
+    gap_filled_density = [f for f in file_list if "gapDensity" in f]
+    file_list = list(set(file_list) - set(gap_filled_density))
 
     with db_session_with_credentials('./credentials.json') as (_engine, session):
 
@@ -76,9 +77,10 @@ def main(file_list: list, doi: str) -> None:
                 f for f in file_list if keyword in Path(f).name
             ]
             kwargs["instrument"] = instrument
+            LOG.info(f"\n\nUploading {len(instrumented_files)} files with keyword: {keyword}")
 
             # Filter to sites to manage the timezones
-            unique_sites = set([get_site_id(f) for f in instrumented_files])
+            unique_sites = list(set([get_site_id(f) for f in instrumented_files]))
             
             for site in unique_sites:
                 site_files = [

From 29a3a81b5a4eb3bf51fe129bde1bd9ebcc7e0317 Mon Sep 17 00:00:00 2001
From: micah johnson <micah.johnson150@gmail.com>
Date: Sat, 29 Nov 2025 06:40:49 -0700
Subject: [PATCH 3/6] Drying out some upload code

---
 snowex_db/utilities.py  | 32 ++++++++++++++++++++++++++++++--
 tests/test_utilities.py | 24 +++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/snowex_db/utilities.py b/snowex_db/utilities.py
index c511e79..e0a1b78 100644
--- a/snowex_db/utilities.py
+++ b/snowex_db/utilities.py
@@ -7,9 +7,14 @@
 import logging
 from os import walk
 from os.path import getctime, join
-
+import re
+from pathlib import Path
 import coloredlogs
 
+state_tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
+                'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
+          }
+
 
 def get_logger(name, debug=True, ext_logger=None):
     """
@@ -158,4 +163,27 @@ def get_file_creation_date(file):
     """
 
     result = datetime.datetime.fromtimestamp(getctime(file)).date()
-    return result
\ No newline at end of file
+    return result
+
+
+def get_site_id_from_filename(filename: str, regex: str) -> str:
+    """
+    Get the site ID based on the site code in the filename from the pit files
+    """
+
+    compiled = re.compile(regex)
+    match = compiled.match(Path(filename).name)
+    if match:
+        code = match.group(1)
+        return code
+    else:
+        raise RuntimeError(f"No site ID found for {filename}")
+
+
+def get_timezone_from_site_id(site_id: str) -> str:
+    """
+    Get the timezone based on the site id
+    """
+    abbrev = site_id[0:2]
+    tz = [k for k, states in state_tz_map.items() if abbrev in states][0]
+    return tz
\ No newline at end of file
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index c519725..487ec5e 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -5,7 +5,7 @@
 
 from snowex_db.utilities import (
     read_n_lines, find_files, find_kw_in_lines, assign_default_kwargs,
-    get_file_creation_date
+    get_file_creation_date, get_site_id_from_filename, get_timezone_from_site_id
 )
 
 
@@ -100,3 +100,25 @@ def test_get_file_creation_date():
     """
     result = get_file_creation_date(__file__)
     assert type(result) is date
+
+
+def test_get_site_id_from_filename():
+    """
+    Test getting site ID from filename
+    """
+    filename = "SNEX20_TS_SP_20191029_1210_COFEJ1_data_gapFilledDensity_v02.csv"
+    regex = r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
+    site_id = get_site_id_from_filename(filename, regex)
+    assert site_id == "COFEJ1"
+
+
+@pytest.mark.parametrize('site_id, expected_tz', [
+                         ('COGM', 'US/Mountain'),
+                         ('CAAM', 'US/Pacific'),
+                         ])
+def test_get_timezone_from_site_id(site_id, expected_tz):
+    """
+    Test getting timezone from site ID
+    """
+    tz = get_timezone_from_site_id(site_id)
+    assert tz == expected_tz
\ No newline at end of file

From f35e3f9a671477a1cd458d931bb78a75a54fc325 Mon Sep 17 00:00:00 2001
From: micah johnson <micah.johnson150@gmail.com>
Date: Sat, 29 Nov 2025 06:43:57 -0700
Subject: [PATCH 4/6] updated 2020 to use dried out tools

---
 scripts/upload/add_time_series_pits_2020.py | 37 ++++-----------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index 5b44991..ad7c0b9 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -7,7 +7,7 @@
 from earthaccess_data import get_files
 from import_logger import get_logger
 from snowexsql.db import db_session_with_credentials
-
+from snowex_db.utilities import get_timezone_from_site_id, get_site_id_from_filename
 from snowex_db.upload.layers import UploadProfileBatch
 
 LOG = get_logger()
@@ -26,34 +26,6 @@
                     "stratigraphy": "Manual"
                   }
 
-tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
-          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
-          }
-
-
-def get_site_id(filename: str) -> str:
-    """
-    Get the site ID based on the site code in the filename
-    """
-    compiled = re.compile(
-        r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
-    )
-    match = compiled.match(Path(filename).name)
-    if match:
-        code = match.group(1)
-        return code
-    else:
-        raise RuntimeError(f"No site ID found for {filename}")
-
-
-def get_timezone(site_id: str) -> str:
-    """
-    Get the timezone based on the site code
-    """
-    abbrev = site_id[0:2]
-    tz = [k for k, states in tz_map.items() if abbrev in states][0]
-    return tz
-
 
 def main(file_list: list, doi: str) -> None:
     """
@@ -65,6 +37,9 @@ def main(file_list: list, doi: str) -> None:
         "doi": doi,
     }
 
+    # Regex to get site id from filename
+    snowex_reg = r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_.*\.csv'
+
     # Files to ignore
     gap_filled_density = [f for f in file_list if "gapDensity" in f]
     file_list = list(set(file_list) - set(gap_filled_density))
@@ -80,13 +55,13 @@ def main(file_list: list, doi: str) -> None:
             LOG.info(f"\n\nUploading {len(instrumented_files)} files with keyword: {keyword}")
 
             # Filter to sites to manage the timezones
-            unique_sites = list(set([get_site_id(f) for f in instrumented_files]))
+            unique_sites = list(set([get_site_id_from_filename(f, snowex_reg) for f in instrumented_files]))
             
             for site in unique_sites:
                 site_files = [
                     f for f in instrumented_files if site in f
                 ]
-                kwargs["timezone"] = get_timezone(site)
+                kwargs["timezone"] = get_timezone_from_site_id(site)
                 
                 uploader = UploadProfileBatch(session, site_files, **kwargs)
                 uploader.push()

From b6380ca9594c7a83a556e939521fa37b4b77c2a0 Mon Sep 17 00:00:00 2001
From: micah johnson <micah.johnson150@gmail.com>
Date: Sat, 29 Nov 2025 07:05:08 -0700
Subject: [PATCH 5/6] functioning timeseries pits 2021

---
 scripts/upload/add_time_series_pits_2020.py |   1 -
 scripts/upload/add_time_series_pits_2021.py | 150 +++++++-------------
 2 files changed, 53 insertions(+), 98 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index ad7c0b9..a50b37d 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -2,7 +2,6 @@
 Script to upload the Snowex Time Series pits
 """
 
-import re
 from pathlib import Path
 from earthaccess_data import get_files
 from import_logger import get_logger
diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py
index 244eee3..39e647d 100644
--- a/scripts/upload/add_time_series_pits_2021.py
+++ b/scripts/upload/add_time_series_pits_2021.py
@@ -2,115 +2,71 @@
 Script to upload the Snowex Time Series pits
 """
 
-import glob
-import re
-from os.path import abspath, join
 from pathlib import Path
+from earthaccess_data import get_files
+from snowexsql.db import db_session_with_credentials
+from snowex_db.utilities import get_timezone_from_site_id, get_site_id_from_filename
+from snowex_db.upload.layers import UploadProfileBatch
+from import_logger import get_logger
 
+LOG = get_logger()
 
 
+SNOWEX_PITS_MAP = {
+    "SNEX21_TS_SP": "10.5067/QIANJYJGRWOV"
+}
 
-tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
-          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
-          }
 
+# Filename keyword to the instrument used
+INSTRUMENT_MAP = {
+                    "siteDetails": None,
+                    "density": "Density Cutter",
+                    "temperature": "Digital Thermometer",
+                    "LWC": "A2 Sensor",
+                    "stratigraphy": "Manual"
+                  }
 
-def main():
+def main(file_list: list, doi: str) -> None:
     """
-    Snowex 2021 timeseries pits
+    Add 2021 timeseries pits
     """
-    db_name = 'localhost/snowex'
-    # https://nsidc.org/data/snex21_ts_sp/versions/1
-    doi = "https://doi.org/10.5067/QIANJYJGRWOV"
-    debug = True
+    # Constant Metadata for the GPR data
+    kwargs = {
+        "campaign_name": "2021 Timeseries",
+        "doi": doi,
+    }
 
-    # Point to the downloaded data from
-    data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/')
-    error_msg = []
+    # Regex to get site id from filename
+    snowex_reg = r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_.*\.csv'
 
     # Files to ignore
-    ignore_files = [
-        "SNEX21_TS_SP_Summary_Environment_v01.csv",
-        "SNEX21_TS_SP_Summary_SWE_v01.csv",
-        "SNEX21_TS_SP_Summary_SWE_v01_modified.csv"
-    ]
-
-    # Get all the date folders
-    unique_dt_olders = Path(
-        data_dir
-    ).expanduser().absolute().glob("20*.*.*")
-    for udf in unique_dt_olders:
-        # get all the csvs in the folder
-        dt_folder_files = list(udf.glob("*.csv"))
-        site_ids = []
-        # Get the unique site ids for this date folder
-        compiled = re.compile(
-            r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv'
-        )
-        for file_path in dt_folder_files:
-            file_name = file_path.name
-            if file_name in ignore_files:
-                print(f"Skipping {file_name}")
-                continue
-
-            match = compiled.match(file_name)
-            if match:
-                code = match.group(1)
-                site_ids.append(code)
-            else:
-                raise RuntimeError(f"No site ID found for {file_name}")
-
-        # Get the unique site ids
-        site_ids = list(set(site_ids))
-
-        for site_id in site_ids:
-            abbrev = site_id[0:2]
-            tz = [k for k, states in tz_map.items() if abbrev in states][0]
-
-            # Grab all the csvs in the pits folder
-            filenames = glob.glob(join(str(udf), f'*_{site_id}_*.csv'))
-
-            # Grab all the site details files
-            sites = glob.glob(join(
-                str(udf), f'*_{site_id}_*siteDetails*.csv'
-            ))
-
-            # Use no-gap-filled density for the sole reason that
-            # Gap filled density for profiles where the scale was broken
-            # are just an empty file after the headers. We should
-            # Record that Nan density was collected for the profile
-            density_files = glob.glob(join(
-                str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
-            ))
-
-            # Remove the site details from the total file list to get only the
-            profiles = list(
-                set(filenames) - set(sites) -
-                set(density_files)  # remove non-gap-filled denisty
-            )
-
-            # Submit all profiles associated with pit at a time
-            b = UploadProfileBatch(
-                filenames=profiles, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name,
-                allow_split_lines=True  # Logic for split header lines
-            )
-            b.push()
-            error_msg += b.errors
-
-            # Upload the site details
-            sd = UploadSiteDetailsBatch(
-                filenames=sites, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name,
-                allow_split_lines=True  # Logic for split header lines
-            )
-            sd.push()
-            error_msg += sd.errors
-
-    for f, m in error_msg:
-        print(f)
-    return len(error_msg)
+    gap_filled_density = [f for f in file_list if "gapDensity" in f]
+    file_list = list(set(file_list) - set(gap_filled_density))
+
+    with db_session_with_credentials('./credentials.json') as (_engine, session):
+
+        # Filter by instrument
+        for keyword, instrument in INSTRUMENT_MAP.items():
+            instrumented_files = [
+                f for f in file_list if keyword in Path(f).name
+            ]
+            kwargs["instrument"] = instrument
+            LOG.info(f"\n\nUploading {len(instrumented_files)} files with keyword: {keyword}")
+
+            # Filter to sites to manage the timezones
+            unique_sites = list(set([get_site_id_from_filename(f, snowex_reg) for f in instrumented_files]))
+
+            for site in unique_sites:
+                site_files = [
+                    f for f in instrumented_files if site in f
+                ]
+                kwargs["timezone"] = get_timezone_from_site_id(site)
+
+                uploader = UploadProfileBatch(session, site_files, **kwargs)
+                uploader.push()
 
 
 if __name__ == '__main__':
-    main()
+    for data_set_id, doi in SNOWEX_PITS_MAP.items():
+        with get_files(data_set_id, doi) as files:
+            main(files, doi)
\ No newline at end of file

From d8cdca46816fbcb9642ee72d3291b0ae36abe118 Mon Sep 17 00:00:00 2001
From: aaarendt <arendta@uw.edu>
Date: Fri, 5 Dec 2025 11:43:06 -0800
Subject: [PATCH 6/6] Fix: accommodate Path or str in file list

---
 scripts/upload/add_time_series_pits_2020.py | 2 +-
 scripts/upload/add_time_series_pits_2021.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index a50b37d..1b2f140 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -40,7 +40,7 @@ def main(file_list: list, doi: str) -> None:
     snowex_reg = r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_.*\.csv'
 
     # Files to ignore
-    gap_filled_density = [f for f in file_list if "gapDensity" in f]
+    gap_filled_density = [f for f in file_list if "gapDensity" in str(f)]
     file_list = list(set(file_list) - set(gap_filled_density))
 
     with db_session_with_credentials('./credentials.json') as (_engine, session):
diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py
index 39e647d..6913552 100644
--- a/scripts/upload/add_time_series_pits_2021.py
+++ b/scripts/upload/add_time_series_pits_2021.py
@@ -40,7 +40,7 @@ def main(file_list: list, doi: str) -> None:
     snowex_reg = r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_.*\.csv'
 
     # Files to ignore
-    gap_filled_density = [f for f in file_list if "gapDensity" in f]
+    gap_filled_density = [f for f in file_list if "gapDensity" in str(f)]
     file_list = list(set(file_list) - set(gap_filled_density))
 
     with db_session_with_credentials('./credentials.json') as (_engine, session):