goeckslab · CameronFRWatson · Aug 13, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/.tt_skip b/.tt_skip
@@ -1 +1,2 @@
-# add paths to tool repositories that should be ignored by CI
+# add paths to tool repositories that should be ignored by CI
+tools/visinity
diff --git a/tools/visinity/archive_generate.py b/tools/visinity/archive_generate.py
@@ -0,0 +1,230 @@
+import csv
+import json
+import os
+import re
+import shutil
+import sys
+import time
+
+
+from zipfile import ZipFile, ZIP_BZIP2
+
+from minerva_analysis import app
+
+
+app.config['IS_DOCKER'] = False
+
+# create a non-serving test client to replace http request
+requests = app.test_client()
+
+args = sys.argv[1:]
+
+input_dict = {}
+with open(args[0], 'r') as f:
+    input_dict = json.load(f)
+
+out_file = args[1]
+
+name_list = []
+with open(input_dict['dataset_names'], 'r') as f:
+    _reader = csv.reader(f)
+    header = next(_reader)
+    assert header[0].lower() == 'names'
+    for row_list in _reader:
+        row = row_list[0]
+        if row in name_list:
+            raise ValueError(f'dataset name {row} is a duplicate!'
+                             ' please ensure names are unique')
+        s = re.sub('[^A-z0-9.~]+', '-', row)
+        if s != row:
+            row = s
+        name_list.append(row)
+
+channel_files = input_dict['channel_files']
+label_files = input_dict['label_files']
+
+assert len(name_list) == len(channel_files) == len(label_files)
+
+radius = input_dict['n_radius']
+celltype_csv = input_dict['celltype_csv']
+
+# ensure env
+data_path = f"{os.getcwd()}/minerva_analysis/data"
+config_path = f"{data_path}/config.json"
+
+if not os.path.isdir(data_path):
+    os.mkdir(data_path)
+
+if not os.path.isfile(config_path):
+    with open(config_path, 'w') as f:
+        f.write('{}')
+
+url = 'http://127.0.0.1:8000'
+
+with open(f'{data_path}/names.csv', 'w') as name_csv_file:
+    name_csv_file.write(
+        ','.join(name_list)
+    )
+
+shutil.copyfile(celltype_csv, 'celltype.csv')
+
+payload_data = {
+    "action": "Upload",
+    "celltype_file": open('celltype.csv', 'rb'),
+    "neighborhood_radius": int(radius),
+}
+
+for i, name in enumerate(name_list):
+    new_quant_csv = f'quant{i}.csv'
+    shutil.copyfile(input_dict['quant_csvs'][i], new_quant_csv)
+    _sub_d = {
+        f'name-{i+1}': name,
+        f'channel_file-{i+1}': channel_files[i],
+        f'label_file-{i+1}': label_files[i],
+        f'csv_file-{i+1}': open(new_quant_csv, 'rb'),
+    }
+    payload_data.update(_sub_d)
+
+
+# payload = {
+#   'data': {
+#     "action": "Upload",
+#     "name-1": dataset_name,
+#     "neighborhood_radius": int(n_radius),
+#     "channel_file-1": channel_file,
+#     "label_file-1": label_file,
+#     "csv_file-1": open(quant_target, 'rb'),
+#     "celltype_file": open(celltype_target, 'rb')
+#   }
+# }
+
+resp = requests.post(f"{url}/upload", data=payload_data)
+
+resp_regex = 'passVariablesToFrontend\((.*)\);'
+json_string = re.search(resp_regex, resp.text).group(1)
+combinedChannelData = json.loads(json_string)
+
+
+# def prep_header(_pre_sorted_headers: List[dict]):
+#     header_list = []
+#     # x_pos_terms = ['x_centroid', 'cellposition_x', 'x_position', 'x']
+#     # y_pos_terms = ['y_centroid', 'cellposition_y', 'y_position', 'y']
+#     # phenotype_terms = ['celltype', 'phenotype']
+
+#     _header_prefix = [None, None, None]
+#     _sorted_headers = []
+#     for header in _pre_sorted_headers[1:]:
+#         if header['fullName'].lower() in x_pos_terms:
+#             assert (_header_prefix[0] is None)
+#             _header_prefix[0] = header
+#         elif header['fullName'].lower() in y_pos_terms:
+#             assert (_header_prefix[1] is None)
+#             _header_prefix[1] = header
+#         elif header['fullName'].lower() in phenotype_terms:
+#             assert (_header_prefix[2] is None)
+#             _header_prefix[2] = header
+#         else:
+#             _sorted_headers.append(header)
+#     _i = 1
+#     all_headers = _header_prefix + _sorted_headers
+#     all_headers: dict
+#     for header in all_headers:
+#         header_list.extend(
+#             chunkify(header['fullName'], _i)
+#         )
+#     return header_list
+
+
+def chunkify(fullname, idx):
+    non_norm_markers = [
+        'majoraxislength', 'number_neighbors', 'minoraxislength', 'neighbor_1',
+        'solidity', 'eccentricity', 'y_position', 'x_position', 'neighbor_2',
+        'percent_touching', 'orientation', 'neighbor_4', 'extent', 'cellid',
+        'field_col', 'eulernumber', 'neighbor_3', 'neighbor_5', 'perimeter',
+        'field_row']
+
+    return [
+        {
+            "name": f"fullName{idx}",
+            "value": fullname
+        },
+        {
+            "name": f"name{idx}",
+            "value": fullname
+        },
+        {
+            "name": f"normalize{idx}",
+            "value": "off" if fullname.lower() in non_norm_markers else "on"
+        },
+    ]
+
+
+x_pos_terms = ['x_centroid', 'cellposition_x', 'x_position', 'x']
+y_pos_terms = ['y_centroid', 'cellposition_y', 'y_position', 'y']
+phenotype_terms = ['celltype', 'phenotype']
+
+# headers should all be the same, so using [0]
+pre_sorted_headers = combinedChannelData[0]["csvHeader"]
+
+# fix issue with filename "*_<digits>" being mistakenly assumed to be 
+# non-segmentation files; weirdly channel names dont seem to matter,
+# only if it has an underscore and number
+for channel in combinedChannelData:
+    channel["labelName"] = 'segmentation'
+
+idfield = chunkify(pre_sorted_headers[0]['fullName'], 0)
+
+header_prefix = [None, None, None]
+sorted_headers = []
+
+for header in pre_sorted_headers[1:]:
+    if header['fullName'].lower() in x_pos_terms:
+        assert (header_prefix[0] is None)
+        header_prefix[0] = header
+    elif header['fullName'].lower() in y_pos_terms:
+        assert (header_prefix[1] is None)
+        header_prefix[1] = header
+    elif header['fullName'].lower() in phenotype_terms:
+        assert (header_prefix[2] is None)
+        header_prefix[2] = header
+    else:
+        sorted_headers.append(header)
+
+
+all_headers = header_prefix + sorted_headers
+all_headers: dict
+
+headerlist = []
+i = 1
+for header in all_headers:
+    headerlist.extend(
+        chunkify(header['fullName'], i)
+    )
+    i += 1
+
+payload = {
+    'originalData': combinedChannelData,
+    'headerList': headerlist,
+    'normalizeCsv': False,
+    'idField': idfield
+}
+
+# execute request
+start = time.time()
+resp = requests.post(f"{url}/save_config", json=payload)
+duration = time.time() - start
+print(f"processing duration:\n\t{duration} sec ({duration/60} mins)")
+
+time.sleep(5)
+
+# extract relevant config.json bit
+with open(config_path, 'r') as innie:
+    config = json.load(innie)
+
+# zip it n ship it
+with ZipFile(f'{out_file}', 'w', ZIP_BZIP2) as zipped:
+    path_len = len(data_path) + 1
+    for base, dirs, files in os.walk(data_path):
+        for file in files:
+            file_name = os.path.join(base, file)
+            zipped.write(file_name, file_name[path_len:])
diff --git a/tools/visinity/archive_ingest.py b/tools/visinity/archive_ingest.py
@@ -0,0 +1,48 @@
+import json
+import sys
+
+from os.path import isfile
+
+from zipfile import ZipFile
+
+
+with open(sys.argv[1], 'r') as f:
+    arg_dict = json.load(f)
+
+arch_zip = arg_dict['vis_archive']
+og_channels = arg_dict['channel_files']
+og_segs = arg_dict['label_files']
+
+
+data_path = '/visinity/minerva_analysis/data'
+conf_path = f'{data_path}/config.json'
+
+
+with ZipFile(arch_zip, 'r') as zippy:
+    zippy.extractall(f'{data_path}')
+
+with open(f'{data_path}/names.csv', 'r') as f:
+    name_list = f.read().split(',')
+
+with open(conf_path, 'r') as f:
+    config = json.load(f)
+
+
+def reconfigure(channel, seg, old_config):
+    subconfig = dict(old_config)
+    if not subconfig['segmentation'].startswith(data_path) or not isfile(subconfig['segmentation']):
+        subconfig['segmentation'] = seg
+    subconfig['channelFile'] = channel
+    return subconfig
+
+
+new_config = {}
+
+for i, name in enumerate(name_list):
+    name = name_list[i]
+    channel = og_channels[i]
+    seg = og_segs[i]
+    new_config[name] = reconfigure(channel, seg, config[name])
+
+with open(conf_path, 'w') as f:
+    f.write(json.dumps(new_config))
diff --git a/tools/visinity/dependencies/Dockerfile b/tools/visinity/dependencies/Dockerfile
@@ -0,0 +1,49 @@
+FROM mambaorg/micromamba
+
+ARG TAG
+ARG PIP_NO_COMPILE="true"
+
+USER root
+
+RUN <<APT-INSTALL
+    set -e
+    apt-get update
+    apt-get install -y libgl1-mesa-glx curl unzip
+    rm -rf /var/lib/apt/lists/*
+APT-INSTALL
+
+RUN <<VISINITY
+    set -e
+    cd /tmp/
+    curl -L https://github.com/labsyspharm/visinity/archive/refs/tags/v$TAG.zip -o $TAG.zip
+    unzip $TAG.zip
+    rm -rf $TAG.zip
+    mv visinity-$TAG /visinity
+    useradd -ms /bin/bash visinity
+    echo -e '[global]\nno-cache-dir = True' >> /etc/pip.conf
+    chmod -R 0777 /visinity
+    chown -R visinity /visinity
+    micromamba install -n base -y -c conda-forge python=3.9 pip=24.0 openslide-python=1.3.1
+    micromamba install -n base -y -f /visinity/requirements.yml
+    micromamba clean --all --force-pkgs-dirs -y
+VISINITY
+
+RUN <<ENVMAKER
+    echo -e '#!/bin/bash\nmicromamba activate base\n$*' >> /bin/vis-env
+    chmod 755 /bin/vis-env
+ENVMAKER
+
+USER visinity
+
+SHELL [ "micromamba", "run", "-n", "base", "/bin/bash", "-c" ]
+
+RUN <<PIPPERY
+    set -e
+    pip install --no-dependencies lightkit pycave
+PIPPERY
+
+ENV BASH_ENV='/usr/local/bin/_activate_current_env.sh'
+
+EXPOSE 8000
+WORKDIR /visinity/
+CMD ["python", "run.py"]
diff --git a/tools/visinity/macros.xml b/tools/visinity/macros.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <container type="docker">quay.io/goeckslab/visinity:1.17v0</container>
+        </requirements>
+    </xml>
+    <xml name="version_cmd">
+        <version_command>echo @TOOL_VERSION@</version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1109/TVCG.2022.3209378</citation>
+        </citations>
+    </xml>
+
+    <token name="@TOOL_VERSION@">1.17</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+</macros>
diff --git a/tools/visinity/static/images/dataset_names_example.png b/tools/visinity/static/images/dataset_names_example.png
diff --git a/tools/visinity/static/images/phenotype_id_example.png b/tools/visinity/static/images/phenotype_id_example.png
diff --git a/tools/visinity/test-data/celltypes.csv b/tools/visinity/test-data/celltypes.csv
@@ -0,0 +1,5 @@
+phenotypeID,phenotype
+0,epithelial
+1,tcell
+2,bcell
+3,macrophage
diff --git a/tools/visinity/test-data/dataset_names.csv b/tools/visinity/test-data/dataset_names.csv
@@ -0,0 +1,2 @@
+names
+img_1
diff --git a/tools/visinity/test-data/test_image.ome.tiff b/tools/visinity/test-data/test_image.ome.tiff
diff --git a/tools/visinity/test-data/test_mask.tiff b/tools/visinity/test-data/test_mask.tiff