Skip to content

Commit

Permalink
Renamed script that dropped out of the index
Browse files Browse the repository at this point in the history
  • Loading branch information
metazool committed Feb 13, 2025
1 parent 91f255f commit d96d257
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 6 deletions.
5 changes: 0 additions & 5 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -1,5 +0,0 @@
[core]
remote = jasmin
['remote "jasmin"']
url = s3://metadata
endpointurl = https://fw-plankton-o.s3-ext.jc.rl.ac.uk
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ vectors/
*.ipynb
*.egg-info/
venv/
data/**
/vectors
/models
5 changes: 5 additions & 0 deletions data/untagged-images-wala.db.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 100680ac6b3573be7008d222b2a1b4da
size: 29085696
hash: md5
path: untagged-images-wala.db
32 changes: 32 additions & 0 deletions scripts/image_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Create a basic index for the images in an s3 collection"""

import yaml
import logging
import os
import s3fs
from dotenv import load_dotenv

logging.basicConfig(level=logging.INFO)
load_dotenv()


def bucket_listing(bucket_name: str):
"""Create an index of images in a bucket,"""
s3 = s3fs.S3FileSystem(anon=True, endpoint_url=os.environ["AWS_URL_ENDPOINT"])

# This returns paths with bucket name pre-pended
# E.g. 'untagged-images-lana/MicrobialMethane_MESO_Tank7_54.0143_-2.7770_18052023_1_17085.tif'
contents = s3.ls(bucket_name)

return [i.split("/")[-1] for i in filter(lambda x: x.endswith("tif"), contents)]


if __name__ == "__main__":
# Expects the bucket name set as "collection" in params.yml (used by DVC)

bucket_name = yaml.safe_load(open("params.yaml"))["collection"]

images = bucket_listing(bucket_name)
# Increment - keep a flat file index locally
with open(f"{bucket_name}.csv", "w") as out:
out.write("\n".join(images))

0 comments on commit d96d257

Please sign in to comment.