Skip to content

Galaxy tool wrapper for Visinity #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .tt_skip
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
# add paths to tool repositories that should be ignored by CI
# add paths to tool repositories that should be ignored by CI
tools/visinity
230 changes: 230 additions & 0 deletions tools/visinity/archive_generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
import csv
import json
import os
import re
import shutil
import sys
import time


from zipfile import ZipFile, ZIP_BZIP2

from minerva_analysis import app


app.config['IS_DOCKER'] = False

# create a non-serving test client to replace http request
requests = app.test_client()

args = sys.argv[1:]

input_dict = {}
with open(args[0], 'r') as f:
input_dict = json.load(f)

out_file = args[1]

name_list = []
with open(input_dict['dataset_names'], 'r') as f:
_reader = csv.reader(f)
header = next(_reader)
assert header[0].lower() == 'names'
for row_list in _reader:
row = row_list[0]
if row in name_list:
raise ValueError(f'dataset name {row} is a duplicate!'
' please ensure names are unique')
s = re.sub('[^A-z0-9.~]+', '-', row)
if s != row:
row = s
name_list.append(row)

channel_files = input_dict['channel_files']
label_files = input_dict['label_files']

assert len(name_list) == len(channel_files) == len(label_files)

radius = input_dict['n_radius']
celltype_csv = input_dict['celltype_csv']

# ensure env
data_path = f"{os.getcwd()}/minerva_analysis/data"
config_path = f"{data_path}/config.json"

if not os.path.isdir(data_path):
os.mkdir(data_path)

if not os.path.isfile(config_path):
with open(config_path, 'w') as f:
f.write('{}')

url = 'http://127.0.0.1:8000'

with open(f'{data_path}/names.csv', 'w') as name_csv_file:
name_csv_file.write(
','.join(name_list)
)

shutil.copyfile(celltype_csv, 'celltype.csv')

payload_data = {
"action": "Upload",
"celltype_file": open('celltype.csv', 'rb'),
"neighborhood_radius": int(radius),
}

for i, name in enumerate(name_list):
new_quant_csv = f'quant{i}.csv'
shutil.copyfile(input_dict['quant_csvs'][i], new_quant_csv)
_sub_d = {
f'name-{i+1}': name,
f'channel_file-{i+1}': channel_files[i],
f'label_file-{i+1}': label_files[i],
f'csv_file-{i+1}': open(new_quant_csv, 'rb'),
}
payload_data.update(_sub_d)


# payload = {
# 'data': {
# "action": "Upload",
# "name-1": dataset_name,
# "neighborhood_radius": int(n_radius),
# "channel_file-1": channel_file,
# "label_file-1": label_file,
# "csv_file-1": open(quant_target, 'rb'),
# "celltype_file": open(celltype_target, 'rb')
# }
# }

resp = requests.post(f"{url}/upload", data=payload_data)

resp_regex = 'passVariablesToFrontend\((.*)\);'
json_string = re.search(resp_regex, resp.text).group(1)
combinedChannelData = json.loads(json_string)


# def prep_header(_pre_sorted_headers: List[dict]):
# header_list = []
# # x_pos_terms = ['x_centroid', 'cellposition_x', 'x_position', 'x']
# # y_pos_terms = ['y_centroid', 'cellposition_y', 'y_position', 'y']
# # phenotype_terms = ['celltype', 'phenotype']

# _header_prefix = [None, None, None]
# _sorted_headers = []
# for header in _pre_sorted_headers[1:]:
# if header['fullName'].lower() in x_pos_terms:
# assert (_header_prefix[0] is None)
# _header_prefix[0] = header
# elif header['fullName'].lower() in y_pos_terms:
# assert (_header_prefix[1] is None)
# _header_prefix[1] = header
# elif header['fullName'].lower() in phenotype_terms:
# assert (_header_prefix[2] is None)
# _header_prefix[2] = header
# else:
# _sorted_headers.append(header)
# _i = 1
# all_headers = _header_prefix + _sorted_headers
# all_headers: dict
# for header in all_headers:
# header_list.extend(
# chunkify(header['fullName'], _i)
# )
# return header_list


def chunkify(fullname, idx):
non_norm_markers = [
'majoraxislength', 'number_neighbors', 'minoraxislength', 'neighbor_1',
'solidity', 'eccentricity', 'y_position', 'x_position', 'neighbor_2',
'percent_touching', 'orientation', 'neighbor_4', 'extent', 'cellid',
'field_col', 'eulernumber', 'neighbor_3', 'neighbor_5', 'perimeter',
'field_row']

return [
{
"name": f"fullName{idx}",
"value": fullname
},
{
"name": f"name{idx}",
"value": fullname
},
{
"name": f"normalize{idx}",
"value": "off" if fullname.lower() in non_norm_markers else "on"
},
]


x_pos_terms = ['x_centroid', 'cellposition_x', 'x_position', 'x']
y_pos_terms = ['y_centroid', 'cellposition_y', 'y_position', 'y']
phenotype_terms = ['celltype', 'phenotype']

# headers should all be the same, so using [0]
pre_sorted_headers = combinedChannelData[0]["csvHeader"]

# fix issue with filename "*_<digits>" being mistakenly assumed to be
# non-segmentation files; weirdly channel names dont seem to matter,
# only if it has an underscore and number
for channel in combinedChannelData:
channel["labelName"] = 'segmentation'

idfield = chunkify(pre_sorted_headers[0]['fullName'], 0)

header_prefix = [None, None, None]
sorted_headers = []

for header in pre_sorted_headers[1:]:
if header['fullName'].lower() in x_pos_terms:
assert (header_prefix[0] is None)
header_prefix[0] = header
elif header['fullName'].lower() in y_pos_terms:
assert (header_prefix[1] is None)
header_prefix[1] = header
elif header['fullName'].lower() in phenotype_terms:
assert (header_prefix[2] is None)
header_prefix[2] = header
else:
sorted_headers.append(header)


all_headers = header_prefix + sorted_headers
all_headers: dict

headerlist = []
i = 1
for header in all_headers:
headerlist.extend(
chunkify(header['fullName'], i)
)
i += 1

payload = {
'originalData': combinedChannelData,
'headerList': headerlist,
'normalizeCsv': False,
'idField': idfield
}

# execute request
start = time.time()
resp = requests.post(f"{url}/save_config", json=payload)
duration = time.time() - start
print(f"processing duration:\n\t{duration} sec ({duration/60} mins)")

time.sleep(5)

# extract relevant config.json bit
with open(config_path, 'r') as innie:
config = json.load(innie)

# zip it n ship it
with ZipFile(f'{out_file}', 'w', ZIP_BZIP2) as zipped:
path_len = len(data_path) + 1
for base, dirs, files in os.walk(data_path):
for file in files:
file_name = os.path.join(base, file)
zipped.write(file_name, file_name[path_len:])
48 changes: 48 additions & 0 deletions tools/visinity/archive_ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
import sys

from os.path import isfile

from zipfile import ZipFile


with open(sys.argv[1], 'r') as f:
arg_dict = json.load(f)

arch_zip = arg_dict['vis_archive']
og_channels = arg_dict['channel_files']
og_segs = arg_dict['label_files']


data_path = '/visinity/minerva_analysis/data'
conf_path = f'{data_path}/config.json'


with ZipFile(arch_zip, 'r') as zippy:
zippy.extractall(f'{data_path}')

with open(f'{data_path}/names.csv', 'r') as f:
name_list = f.read().split(',')

with open(conf_path, 'r') as f:
config = json.load(f)


def reconfigure(channel, seg, old_config):
subconfig = dict(old_config)
if not subconfig['segmentation'].startswith(data_path) or not isfile(subconfig['segmentation']):
subconfig['segmentation'] = seg
subconfig['channelFile'] = channel
return subconfig


new_config = {}

for i, name in enumerate(name_list):
name = name_list[i]
channel = og_channels[i]
seg = og_segs[i]
new_config[name] = reconfigure(channel, seg, config[name])

with open(conf_path, 'w') as f:
f.write(json.dumps(new_config))
49 changes: 49 additions & 0 deletions tools/visinity/dependencies/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
FROM mambaorg/micromamba

ARG TAG
ARG PIP_NO_COMPILE="true"

USER root

RUN <<APT-INSTALL
set -e
apt-get update
apt-get install -y libgl1-mesa-glx curl unzip
rm -rf /var/lib/apt/lists/*
APT-INSTALL

RUN <<VISINITY
set -e
cd /tmp/
curl -L https://github.com/labsyspharm/visinity/archive/refs/tags/v$TAG.zip -o $TAG.zip
unzip $TAG.zip
rm -rf $TAG.zip
mv visinity-$TAG /visinity
useradd -ms /bin/bash visinity
echo -e '[global]\nno-cache-dir = True' >> /etc/pip.conf
chmod -R 0777 /visinity
chown -R visinity /visinity
micromamba install -n base -y -c conda-forge python=3.9 pip=24.0 openslide-python=1.3.1
micromamba install -n base -y -f /visinity/requirements.yml
micromamba clean --all --force-pkgs-dirs -y
VISINITY

RUN <<ENVMAKER
echo -e '#!/bin/bash\nmicromamba activate base\n$*' >> /bin/vis-env
chmod 755 /bin/vis-env
ENVMAKER

USER visinity

SHELL [ "micromamba", "run", "-n", "base", "/bin/bash", "-c" ]

RUN <<PIPPERY
set -e
pip install --no-dependencies lightkit pycave
PIPPERY

ENV BASH_ENV='/usr/local/bin/_activate_current_env.sh'

EXPOSE 8000
WORKDIR /visinity/
CMD ["python", "run.py"]
19 changes: 19 additions & 0 deletions tools/visinity/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0"?>
<macros>
<xml name="requirements">
<requirements>
<container type="docker">quay.io/goeckslab/visinity:1.17v0</container>
</requirements>
</xml>
<xml name="version_cmd">
<version_command>echo @TOOL_VERSION@</version_command>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1109/TVCG.2022.3209378</citation>
</citations>
</xml>

<token name="@TOOL_VERSION@">1.17</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions tools/visinity/test-data/celltypes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
phenotypeID,phenotype
0,epithelial
1,tcell
2,bcell
3,macrophage
2 changes: 2 additions & 0 deletions tools/visinity/test-data/dataset_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
names
img_1
Binary file added tools/visinity/test-data/test_image.ome.tiff
Binary file not shown.
Binary file added tools/visinity/test-data/test_mask.tiff
Binary file not shown.
Loading
Loading