-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimage_index.py
32 lines (22 loc) · 996 Bytes
/
image_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""Create a basic index for the images in an s3 collection"""
import yaml
import logging
import os
import s3fs
from dotenv import load_dotenv
logging.basicConfig(level=logging.INFO)
load_dotenv()
def bucket_listing(bucket_name: str):
"""Create an index of images in a bucket,"""
s3 = s3fs.S3FileSystem(anon=True, endpoint_url=os.environ["AWS_URL_ENDPOINT"])
# This returns paths with bucket name pre-pended
# E.g. 'untagged-images-lana/MicrobialMethane_MESO_Tank7_54.0143_-2.7770_18052023_1_17085.tif'
contents = s3.ls(bucket_name)
return [i.split("/")[-1] for i in filter(lambda x: x.endswith("tif"), contents)]
if __name__ == "__main__":
# Expects the bucket name set as "collection" in params.yml (used by DVC)
bucket_name = yaml.safe_load(open("params.yaml"))["collection"]
images = bucket_listing(bucket_name)
# Increment - keep a flat file index locally
with open(f"{bucket_name}.csv", "w") as out:
out.write("\n".join(images))