diff --git a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb new file mode 100644 index 00000000..ce40ab27 --- /dev/null +++ b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb @@ -0,0 +1,2026 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e58d3e48", + "metadata": {}, + "source": [ + "Do some matching between records at NCEI and records available through the IOOS data catalog. Essentially come up with a list of datasets that aren't at NCEI.\n", + "\n", + "Outline of process:\n", + "1. Build a dataframe of non-federal buoy datasets and metadata from the IOOS Catalog.\n", + "2. Use that dataframe to search NCEI for matching datasets affiliated with IOOS.\n", + "3. Identify which datasets are not at NCEI that should be.\n", + "\n", + "Borrow code from:\n", + "* https://ioos.github.io/ioos_code_lab/content/code_gallery/data_access_notebooks/2017-06-12-NCEI_RA_archive_history.html\n", + "* https://ioos.github.io/ioos_code_lab/content/code_gallery/data_access_notebooks/2024-09-17-CKAN_API_Query.html" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "66baa186", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Mathew.Biddle\\programs\\miniforge3\\envs\\IOOS\\Lib\\site-packages\\ckanapi\\version.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " import pkg_resources\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ckanapi import RemoteCKAN\n", + "\n", + "ioos_catalog = RemoteCKAN(\n", + " address=\"https://data.ioos.us\",\n", + " user_agent=\"ckanapiioos/1.0 (+https://ioos.us/)\",\n", + ")\n", + "\n", + "\n", + "ioos_catalog" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3bea33bf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['aoos', 'caricoos', 'cdip', 'cencoos', 'comt', 'gcoos', 'glider-dac', 'glos', 'hf-radar-dac', 'ioos', 'maracoos', 'nanoos', 'neracoos', 'noaa-co-ops', 'noaa-ndbc', 'oceansites', 'pacioos', 'sccoos', 'secoora', 'unidata', 'usgs', 'us-navy']\n" + ] + } + ], + "source": [ + "orgs = ioos_catalog.action.organization_list()\n", + "print(orgs)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c460b610", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "44147" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasets = ioos_catalog.action.package_search()\n", + "datasets[\"count\"]" + ] + }, + { + "cell_type": "markdown", + "id": "b77305da", + "metadata": {}, + "source": [ + "## Query IOOS Catalog for appropriate datasets\n", + "\n", + "Gather all the datasets associated with an RA and filter to just buoys and similar platforms." + ] + }, + { + "cell_type": "markdown", + "id": "23812956", + "metadata": {}, + "source": [ + "## Actually do the querying" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "758d93a5", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import pandas as pd\n", + "\n", + "from ckanapi import RemoteCKAN\n", + "from ckanapi.errors import CKANAPIError\n", + "from requests.exceptions import ChunkedEncodingError\n", + "from urllib3.exceptions import IncompleteRead\n", + "\n", + "def ioos_ckan_query(ioos_catalog, filter_query, free_text_query):\n", + " '''\n", + " Function to query the IOOS catalog with a filter query and free text query.\n", + "\n", + " ioos_catalog : RemoteCKAN object\n", + " The RemoteCKAN object to use for querying the IOOS catalog.\n", + " filter_query : str\n", + " The filter query to use for querying the IOOS catalog.\n", + " free_text_query : str\n", + " The free text query to use for querying the IOOS catalog.\n", + " Returns\n", + " -------\n", + " df_plat : pandas.DataFrame\n", + " A DataFrame containing the results of the query.\n", + " '''\n", + "\n", + " df_plat = pd.DataFrame()\n", + "\n", + " result_count = 0\n", + " while True:\n", + " try:\n", + " datasets = ioos_catalog.action.package_search(\n", + " fq=filter_query, \n", + " q=free_text_query, \n", + " rows=500, \n", + " start=result_count,\n", + " )\n", + " except (CKANAPIError, IncompleteRead, ChunkedEncodingError):\n", + " continue\n", + "\n", + " #result_count = datasets.shape[0]\n", + "\n", + " num_results = datasets[\"count\"]\n", + " \n", + " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", + "\n", + " for dataset in datasets[\"results\"]:\n", + " df = pd.DataFrame.from_dict(dataset, orient='index').T\n", + "\n", + " df_plat = pd.concat([df_plat, df], ignore_index=True)\n", + " \n", + " result_count = df_plat.shape[0]\n", + "\n", + " if result_count >= num_results:\n", + " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", + " break\n", + " \n", + " #df_ioos_catalog = pd.concat([df_ioos_catalog, df_plat], ignore_index=True)\n", + "\n", + " print(\n", + " f\"num_results: {num_results}, result_count: {result_count}, total_result_count: {df_plat.shape[0]}\"\n", + " )\n", + " \n", + " return df_plat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80c3323e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "num_results: 95, result_count: 0\n", + "num_results: 95, result_count: 95\n", + "num_results: 95, result_count: 95, total_result_count: 95\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
authorauthor_emailcreator_user_ididisopenlicense_idlicense_titlemaintainermaintainer_emailmetadata_created...titletypeurlversionextrasresourcestagsgroupsrelationships_as_subjectrelationships_as_object
0NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd757da90e03-f8aa-483e-96d0-7a27051b90b4FalseNoneNoneNoneNone2025-04-11T14:32:17.541505...Backyard Buoys - NANOOS - Washington: Quileute...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Backyard Buoys', 'id': 'f80...[][][]
1NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75d0ef6a3a-4894-43f3-b4ea-2a882dccc478FalseNoneNoneNoneNone2025-01-09T02:08:28.700159...NPBY1 - Point Wells: Meteorological Station DatadatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Earth Science > Atmosphere ...[][][]
2NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd750bd3b7ac-cb00-4dd5-87ca-e55bd6fb8d16FalseNoneNoneNoneNone2024-11-08T12:57:32.204016...NPBY2 - Carr Inlet: Meteorological Station DatadatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Earth Science > Atmosphere ...[][][]
3NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd7517f2f79e-bab0-4c2d-a0a1-2abf40acaa52FalseNoneNoneNoneNone2025-01-09T02:08:48.402290...NANOOS Mooring ORCA Pt WellsdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
4NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75cb6612de-ae7b-4827-a1f9-0d943174ae15FalseNoneNoneNoneNone2025-05-09T16:03:29.657747...NEMO - ChaBa Meteorlogical - Gill Metpak ProdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Earth Science > Atmosphere ...[][][]
..................................................................
90NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75bff06122-cf40-4611-b5b3-c8c79a71cfacFalseNoneNoneNoneNone2025-01-09T13:26:01.855237...(APL-UW) Ćháʔba· UW/NANOOS Moore...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
91NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd752449dd5c-57c5-43dd-a3d6-f52de352a0e5FalseNoneNoneNoneNone2025-01-09T13:25:59.040273...(WADOH) Hood Canal 1 site, W shore of Hood Can...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
92NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd7546917a4a-9e77-495b-a0d3-3c5cea2bc5e8FalseNoneNoneNoneNone2025-01-09T13:25:56.552833...(CMOP) Grays Point (USCG day mark green 13)datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
93NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd753261508c-5b1d-42a8-95ae-fe142449a216FalseNoneNoneNoneNone2025-01-09T13:25:53.600691...(WADOH) Skookum Inlet site, N shore near Deer ...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
94NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd7592c7abed-b1f8-4827-94f5-888d0aa7858eFalseNoneNoneNoneNone2025-01-09T13:25:50.892710...(WADOH) Eld Inlet site, W shore near Frye Cove...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
\n", + "

95 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " author author_email creator_user_id \\\n", + "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "1 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "2 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "3 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "4 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + ".. ... ... ... \n", + "90 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "91 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "92 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "93 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "94 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "\n", + " id isopen license_id license_title \\\n", + "0 7da90e03-f8aa-483e-96d0-7a27051b90b4 False None None \n", + "1 d0ef6a3a-4894-43f3-b4ea-2a882dccc478 False None None \n", + "2 0bd3b7ac-cb00-4dd5-87ca-e55bd6fb8d16 False None None \n", + "3 17f2f79e-bab0-4c2d-a0a1-2abf40acaa52 False None None \n", + "4 cb6612de-ae7b-4827-a1f9-0d943174ae15 False None None \n", + ".. ... ... ... ... \n", + "90 bff06122-cf40-4611-b5b3-c8c79a71cfac False None None \n", + "91 2449dd5c-57c5-43dd-a3d6-f52de352a0e5 False None None \n", + "92 46917a4a-9e77-495b-a0d3-3c5cea2bc5e8 False None None \n", + "93 3261508c-5b1d-42a8-95ae-fe142449a216 False None None \n", + "94 92c7abed-b1f8-4827-94f5-888d0aa7858e False None None \n", + "\n", + " maintainer maintainer_email metadata_created ... \\\n", + "0 None None 2025-04-11T14:32:17.541505 ... \n", + "1 None None 2025-01-09T02:08:28.700159 ... \n", + "2 None None 2024-11-08T12:57:32.204016 ... \n", + "3 None None 2025-01-09T02:08:48.402290 ... \n", + "4 None None 2025-05-09T16:03:29.657747 ... \n", + ".. ... ... ... ... \n", + "90 None None 2025-01-09T13:26:01.855237 ... \n", + "91 None None 2025-01-09T13:25:59.040273 ... \n", + "92 None None 2025-01-09T13:25:56.552833 ... \n", + "93 None None 2025-01-09T13:25:53.600691 ... \n", + "94 None None 2025-01-09T13:25:50.892710 ... \n", + "\n", + " title type url version \\\n", + "0 Backyard Buoys - NANOOS - Washington: Quileute... dataset None None \n", + "1 NPBY1 - Point Wells: Meteorological Station Data dataset None None \n", + "2 NPBY2 - Carr Inlet: Meteorological Station Data dataset None None \n", + "3 NANOOS Mooring ORCA Pt Wells dataset None None \n", + "4 NEMO - ChaBa Meteorlogical - Gill Metpak Pro dataset None None \n", + ".. ... ... ... ... \n", + "90 (APL-UW) Ćháʔba· UW/NANOOS Moore... dataset None None \n", + "91 (WADOH) Hood Canal 1 site, W shore of Hood Can... dataset None None \n", + "92 (CMOP) Grays Point (USCG day mark green 13) dataset None None \n", + "93 (WADOH) Skookum Inlet site, N shore near Deer ... dataset None None \n", + "94 (WADOH) Eld Inlet site, W shore near Frye Cove... dataset None None \n", + "\n", + " extras \\\n", + "0 [{'key': 'access-constraints', 'value': '[]'},... \n", + "1 [{'key': 'access-constraints', 'value': '[]'},... \n", + "2 [{'key': 'access-constraints', 'value': '[]'},... \n", + "3 [{'key': 'access-constraints', 'value': '[]'},... \n", + "4 [{'key': 'access-constraints', 'value': '[]'},... \n", + ".. ... \n", + "90 [{'key': 'access-constraints', 'value': '[]'},... \n", + "91 [{'key': 'access-constraints', 'value': '[]'},... \n", + "92 [{'key': 'access-constraints', 'value': '[]'},... \n", + "93 [{'key': 'access-constraints', 'value': '[]'},... \n", + "94 [{'key': 'access-constraints', 'value': '[]'},... \n", + "\n", + " resources \\\n", + "0 [{'cache_last_updated': None, 'cache_url': Non... \n", + "1 [{'cache_last_updated': None, 'cache_url': Non... \n", + "2 [{'cache_last_updated': None, 'cache_url': Non... \n", + "3 [{'cache_last_updated': None, 'cache_url': Non... \n", + "4 [{'cache_last_updated': None, 'cache_url': Non... \n", + ".. ... \n", + "90 [{'cache_last_updated': None, 'cache_url': Non... \n", + "91 [{'cache_last_updated': None, 'cache_url': Non... \n", + "92 [{'cache_last_updated': None, 'cache_url': Non... \n", + "93 [{'cache_last_updated': None, 'cache_url': Non... \n", + "94 [{'cache_last_updated': None, 'cache_url': Non... \n", + "\n", + " tags groups \\\n", + "0 [{'display_name': 'Backyard Buoys', 'id': 'f80... [] \n", + "1 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + "2 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + "3 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "4 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + ".. ... ... \n", + "90 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "91 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "92 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "93 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "94 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "\n", + " relationships_as_subject relationships_as_object \n", + "0 [] [] \n", + "1 [] [] \n", + "2 [] [] \n", + "3 [] [] \n", + "4 [] [] \n", + ".. ... ... \n", + "90 [] [] \n", + "91 [] [] \n", + "92 [] [] \n", + "93 [] [] \n", + "94 [] [] \n", + "\n", + "[95 rows x 29 columns]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", + "\n", + "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", + "df_ioos_catalog = pd.DataFrame()\n", + "\n", + "orgs = [\"NANOOS\"]\n", + "\n", + "for org in orgs:\n", + " org_ncei = org.lower()\n", + "\n", + " filter_query = f''\n", + "\n", + " free_text_query = f'organization:{org_ncei.lower()} NOT (glider OR model)'\n", + "\n", + " df_search = ioos_ckan_query(ioos_catalog, filter_query, free_text_query)\n", + "\n", + " df_ioos_catalog = pd.concat([df_ioos_catalog, df_search], ignore_index=True)\n", + "\n", + "df_ioos_catalog" + ] + }, + { + "cell_type": "markdown", + "id": "8607197e", + "metadata": {}, + "source": [ + "## Search NCEI" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "5434426a", + "metadata": {}, + "outputs": [], + "source": [ + "# fuzzy_xml_search.py\n", + "# This script performs a fuzzy search on the text content of an XML file.\n", + "\n", + "import xml.etree.ElementTree as ET\n", + "from thefuzz import fuzz\n", + "\n", + "def fuzzy_search_in_xml(tree, search_query, score_cutoff=70):\n", + " \"\"\"\n", + " Performs a fuzzy search for a query string within the text of all elements in an XML file.\n", + "\n", + " Args:\n", + " xml_file_path (str): The path to the XML file.\n", + " search_query (str): The string to search for.\n", + " score_cutoff (int): The minimum similarity score (0-100) to consider a match.\n", + " Defaults to 70.\n", + "\n", + " Returns:\n", + " list: A list of dictionaries, where each dictionary represents a match\n", + " and contains the element's tag, its text, and the similarity score.\n", + " Returns an empty list if no matches are found or if the file cannot be parsed.\n", + " \"\"\"\n", + " matches = []\n", + " try:\n", + " # Parse the XML file\n", + " #iso = _openurl_with_retry(url)\n", + " #tree = ET.parse(iso)\n", + " root = tree.getroot()\n", + "\n", + " # Iterate through every element in the XML tree\n", + " for element in root.iter():\n", + " # Check if the element has text content\n", + " if element.text and element.text.strip():\n", + " element_text = element.text.strip()\n", + " # Calculate the fuzzy match score (partial_ratio is good for finding substrings)\n", + " score = fuzz.partial_ratio(search_query.lower(), element_text.lower())\n", + "\n", + " # If the score is above the cutoff, we have a match\n", + " if score >= score_cutoff:\n", + " matches.append({\n", + " 'tag': element.tag,\n", + " 'text': element_text,\n", + " 'score': score\n", + " })\n", + "\n", + " except ET.ParseError as e:\n", + " print(f\"Error parsing XML file: {e}\")\n", + " except FileNotFoundError:\n", + " print(f\"Error: The file '{url}' was not found.\")\n", + " \n", + " # Sort matches by score in descending order\n", + " matches.sort(key=lambda x: x['score'], reverse=True)\n", + " \n", + " return matches" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3b5eeb1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Searching for '(CMOP) SATURN 1' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 3 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 82\n", + "--------------------\n" + ] + } + ], + "source": [ + "# Example usage\n", + "organization = \"NANOOS\"\n", + "XML_FILE = f\"https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-{organization};view=xml;responseType=text/xml\"\n", + "SEARCH_QUERY = \"(CMOP) SATURN 1\" \n", + "SCORE_CUTOFF = 80 # Adjust this value to make the search more or less strict\n", + "\n", + "print(f\"Searching for '{SEARCH_QUERY}' in '{XML_FILE}' (cutoff score: {SCORE_CUTOFF})...\\n\")\n", + "\n", + "# Perform the search\n", + "results = fuzzy_search_in_xml(XML_FILE, SEARCH_QUERY, SCORE_CUTOFF)\n", + "\n", + "# Display the results\n", + "if results:\n", + " print(f\"Found {len(results)} match(es):\")\n", + " for result in results:\n", + " print(\"-\" * 20)\n", + " print(f\" Tag: {result['tag']}\")\n", + " print(f\" Text: '{result['text']}'\")\n", + " print(f\" Score: {result['score']}\")\n", + " print(\"-\" * 20)\n", + "else:\n", + " print(\"No matches found.\")" + ] + }, + { + "cell_type": "markdown", + "id": "6a8eae87", + "metadata": {}, + "source": [ + "## Bring it all together" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "d99addd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "num_results: 95, result_count: 0\n", + "num_results: 95, result_count: 95\n", + "num_results: 95, result_count: 95, total_result_count: 95\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute - North' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NPBY1 - Point Wells: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NPBY2 - Carr Inlet: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Pt Wells' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NEMO - ChaBa Meteorlogical - Gill Metpak Pro' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 93\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'Se'lhaem, Bellingham Bay Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'ORCA3 - Hansville: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'ORCA1 - Twanoh: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Dabobbay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'OBIS Data - Puget Sound Zooplankton Monitoring Program (Salish Sea, USA), starting in 2014' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmd}DS_InitiativeTypeCode\n", + " Text: 'program'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'ORCA4 - Dabob Bay: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'Se'lhaem, Bellingham Bay Surface Hydrological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute Test' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NEMO - Cha'Ba, CTD Data, Near-realtime (incomplete data)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'time'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Twanoh' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Hoodsport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'Se'lhaem, Bellingham Bay Deep Hydrological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NEMO - Chaba Wind - Vaisala WMT 700 Anemometer' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for 'ORCA2 - Hoodsport: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NANOOS Mooring CB-06' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute - Center' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Hansville' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA NPB2Carr' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute - South' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-02' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-04' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-03' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(APL-UW) UW/NANOOS NEMO Subsurface profiler near La Push' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(PennCoveShellfish) Penn Cove Shellfish, Quilcene - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WhiskeyCrShelfish) PCSGA - Whiskey Creek Shellfish Hatchery, Netarts Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(PSI) PCSGA - Bay Center Port mooring, Willapa Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) SATURN-07' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(SSNERR) SOSNSWQ Station - North Spit-BLM Boat Ramp near North Bend' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'North Head'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-09' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(WADOH) Burley Lagoon site, N end of Henderson Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for 'NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(WADOH) Samish Bay site, mid bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOE) MCH01 Manchester/Clam Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Pickering Passage site, E shore near Graham Point' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(OSU) NH-10 Buoy, Newport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-08 (LOBO Station)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(NERRS) Station SOSECWQ - Elliot Creek, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'eliot'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Henderson Bay site, W shore' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Hood Canal 7 site, N shore of Hood Canal near Tahuya' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(StillaguamishTribe) Port Susan buoy, Stillaguamish Tribe' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(APL-UW) LSG NPB-2 Profiling Buoy at Carr Inlet' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Henderson Inlet site, S end' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(ORCA-UW) Profiling Buoy at Hoodsport - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(King County) Dockton Park Pier Mooring - Inner Quartermaster Harbor, Vashon Island' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOE) MUK01 Mukilteo' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(NERRS) Station SOSWIWQ - Winchester Arm, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(NWIC) Se`lhaem Bellingham Bay buoy' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 89\n", + "--------------------\n", + "Searching for '(NERRS SOS) Tom's Creek (sostcmet), South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(King County) Seattle Aquarium Mooring - Elliott Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'eliot'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Elliott Point' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Elliott Point'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'eliot'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Lower Sand Island light (USCG day mark green 5)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 3 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Lower Sand Island light (USCG day mark green 5)'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Marsh Island (USCG day mark green 21)'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(FHL-UW) UW Friday Harbor Laboratories Cantilever Pt' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'CORIE'\n", + " Score: 80\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'abpoa'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Peale Passage site, W shore near Seafarm Cove' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(HMSC) Hatfield Marine Sci. Ctr. monitoring site, Newport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(NERRS) Station SOSCWQ - Charleston Bridge, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(PennCoveShellfish) Penn Cove Shellfish, Coupeville - Whidbey Island' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(Hakai Institute) Quadra Island Field Station at Hyacinthe Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Desdemona Sands Light' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Desdemona Sands Light'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Waste water outfall (City of Astoria)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Waste water outfall (City of Astoria)'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(WADOH) Oakland Bay site, Rafts by Chapman Cove' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(OSU) CB-06, 6NM W of Coos Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) SATURN River Station 05 (LOBO Station)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN River Station 05'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 8 site, S shore of Hood Canal near Twanoh' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(ORCA-UW) Profiling Buoy at Hansville - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Tenasillahe Island (USFW dock)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Tenasillahe Island (USFW dock)'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Totten Inlet site, N end near Gallagher Cove' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(ORCA-UW) Profiling Buoy at Dabob Bay - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 9 site, N shore of Hood Canal near Belfair' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(VIU-CSR) VIU Deep Bay Marine Field Station, Baynes Sound' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(Hakai Institute) Seaology CO2 Buoy in Kwakshua Channel, BC' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-01' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(ORCA-UW) Profiling Buoy at Twanoh - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(APL-UW) LSG NPB-1 Profiling Buoy at Pt. Wells' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Jetty A' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Jetty A'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'jetta'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Totten Inlet site, S end in Oyster Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) SATURN River Station 06 (USGS Station)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN River Station 05'\n", + " Score: 96\n", + "--------------------\n", + "Searching for '(NERRS) Station SOSVAWQ - Valino Island, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Port Gamble site, E shore' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(King County) Point Williams buoy, South Seattle' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 5 site, S shore of Hood Canal near Hoodsport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Hammersley Inlet site, S shore near Skookum Point' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(PSI) PCSGA - Nahcotta Port hatchery mooring, Willapa Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(King County) Yacht Club Mooring - Inner Quartermaster Harbor, Vashon Island' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(TaylorShellfish) PCSGA - Taylor Shellfish Hatchery intakes, Dabob Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(FannyBayOysters) Fanny Bay Oysters, Baynes Sound' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(APL-UW) Ćháʔba· UW/NANOOS Moored Buoy near La Push' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 1 site, W shore of Hood Canal near Case Shoal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) Grays Point (USCG day mark green 13)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 3 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Grays Point (USCG day mark green 13)'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'grays'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Marsh Island (USCG day mark green 21)'\n", + " Score: 82\n", + "--------------------\n", + "Searching for '(WADOH) Skookum Inlet site, N shore near Deer Harbor' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Eld Inlet site, W shore near Frye Cove Park' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n" + ] + } + ], + "source": [ + "from urllib.request import urlopen\n", + "import urllib.error\n", + "import stamina\n", + "\n", + "\n", + "@stamina.retry(on=urllib.error.HTTPError, attempts=3)\n", + "def _openurl_with_retry(url):\n", + " \"\"\"Thin wrapper around urlopen adding stamina.\"\"\"\n", + " return urlopen(url)\n", + "\n", + "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", + "\n", + "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", + "df_ioos_catalog = pd.DataFrame()\n", + "\n", + "orgs = [\"NANOOS\"]\n", + "\n", + "for org in orgs:\n", + " org_ncei = org.lower()\n", + "\n", + " filter_query = f''\n", + "\n", + " free_text_query = f'organization:{org_ncei.lower()} NOT (glider OR model)'\n", + "\n", + " df_search = ioos_ckan_query(ioos_catalog, filter_query, free_text_query)\n", + "\n", + " df_ioos_catalog = pd.concat([df_ioos_catalog, df_search], ignore_index=True)\n", + "\n", + " XML_FILE = f\"https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-{organization};view=xml;responseType=text/xml\"\n", + "\n", + " iso = _openurl_with_retry(XML_FILE)\n", + " tree = ET.parse(iso)\n", + "\n", + " for index, dataset in df_search.iterrows():\n", + " # Example usage\n", + " organization = org\n", + " \n", + " SEARCH_QUERY = dataset['title'] \n", + " SCORE_CUTOFF = 80 # Adjust this value to make the search more or less strict\n", + "\n", + " print(f\"Searching for '{SEARCH_QUERY}' in '{XML_FILE}' (cutoff score: {SCORE_CUTOFF})...\\n\")\n", + "\n", + " # Perform the search\n", + " results = fuzzy_search_in_xml(tree, SEARCH_QUERY, SCORE_CUTOFF)\n", + "\n", + " # Display the results\n", + " if results:\n", + " print(f\"Found {len(results)} match(es):\")\n", + " for result in results:\n", + " print(\"-\" * 20)\n", + " print(f\" Tag: {result['tag']}\")\n", + " print(f\" Text: '{result['text']}'\")\n", + " print(f\" Score: {result['score']}\")\n", + " print(\"-\" * 20)\n", + " else:\n", + " print(\"No matches found.\")\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "cfcff229", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Backyard Buoys - NANOOS - Washington: Quileute...\n", + "1 NPBY1 - Point Wells: Meteorological Station Data\n", + "2 NPBY2 - Carr Inlet: Meteorological Station Data\n", + "3 NANOOS Mooring ORCA Pt Wells\n", + "4 NEMO - ChaBa Meteorlogical - Gill Metpak Pro\n", + " ... \n", + "90 (APL-UW) Ćháʔba· UW/NANOOS Moore...\n", + "91 (WADOH) Hood Canal 1 site, W shore of Hood Can...\n", + "92 (CMOP) Grays Point (USCG day mark green 13)\n", + "93 (WADOH) Skookum Inlet site, N shore near Deer ...\n", + "94 (WADOH) Eld Inlet site, W shore near Frye Cove...\n", + "Name: title, Length: 95, dtype: object" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_search['title']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d629e788", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "IOOS", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}