From 629f5e92716171975bf99d3663ab370a0c427d30 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Mon, 6 May 2024 13:42:30 -0400
Subject: [PATCH 01/13] new susie method

---
 ldsc/src/main/scala/MakeSuSiE.scala       | 73 ++++++++++++++++++
 susie/.editorconfig                       | 14 ++++
 susie/.scalafmt.conf                      |  4 +
 susie/LICENSE.txt                         | 29 +++++++
 susie/README.md                           | 14 ++++
 susie/built.sbt                           | 70 +++++++++++++++++
 susie/project/build.properties            |  1 +
 susie/project/plugins.sbt                 |  1 +
 susie/src/main/resources/install-susie.sh | 63 +++++++++++++++
 susie/src/main/resources/makeSuSiE.py     | 93 +++++++++++++++++++++++
 susie/src/main/scala/MakeSuSiE.scala      | 54 +++++++++++++
 susie/src/main/scala/Susie.scala          | 27 +++++++
 susie/version.sbt                         |  1 +
 13 files changed, 444 insertions(+)
 create mode 100644 ldsc/src/main/scala/MakeSuSiE.scala
 create mode 100644 susie/.editorconfig
 create mode 100644 susie/.scalafmt.conf
 create mode 100644 susie/LICENSE.txt
 create mode 100644 susie/README.md
 create mode 100644 susie/built.sbt
 create mode 100644 susie/project/build.properties
 create mode 100644 susie/project/plugins.sbt
 create mode 100644 susie/src/main/resources/install-susie.sh
 create mode 100644 susie/src/main/resources/makeSuSiE.py
 create mode 100644 susie/src/main/scala/MakeSuSiE.scala
 create mode 100644 susie/src/main/scala/Susie.scala
 create mode 100644 susie/version.sbt

diff --git a/ldsc/src/main/scala/MakeSuSiE.scala b/ldsc/src/main/scala/MakeSuSiE.scala
new file mode 100644
index 00000000..c69cf05a
--- /dev/null
+++ b/ldsc/src/main/scala/MakeSuSiE.scala
@@ -0,0 +1,73 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws.emr._
+import org.broadinstitute.dig.aws.Ec2.Strategy
+import org.broadinstitute.dig.aws.MemorySize
+
+class MakeSuSiE(implicit context: Context) extends Stage {
+  import MemorySize.Implicits._
+
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/*/")
+  // val mixedDatasets: Input.Source = Input.Source.Success("variants/*/*/*/")
+
+  /** Source inputs. */
+  override val sources: Seq[Input.Source] = Seq(ancestrySpecific, mixedDatasets)
+
+  /** Map inputs to their outputs. */
+  override val rules: PartialFunction[Input, Outputs] = {
+    case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
+    // case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
+  }
+
+  /** Just need a single machine with no applications, but a good drive. */
+  override def cluster: ClusterDef = super.cluster.copy(
+    instances = 1,
+    applications = Seq.empty,
+    masterVolumeSizeInGB = 100,
+    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-susie.sh")))
+  )
+
+  override def make(output: String): Job = {
+    val input = MakeSuSiEInput.fromString(output)
+    new Job(Job.Script(resourceUri("makeSuSiE.py"), input.flags:_*))
+  }
+
+  /** Before the jobs actually run, perform this operation.
+    */
+  override def prepareJob(output: String): Unit = {
+    val input = MakeSuSiEInput.fromString(output)
+    context.s3.rm(input.outputDirectory + "/")
+  }
+
+  /** On success, write the _SUCCESS file in the output directory.
+    */
+  override def success(output: String): Unit = {
+    val input = MakeSuSiEInput.fromString(output)
+    context.s3.touch(input.outputDirectory + "/_SUCCESS")
+    ()
+  }
+}
+
+case class MakeSuSiEInput(
+  clump: String,
+  varId2rsId: String,
+  ld-folder: String,
+  out-folder: String
+) {
+  def outputDirectory: String = s"out/susie/$phenotype/ancestry=$ancestry"
+
+  def flags: Seq[String] = Seq(s"--clump=$MakeSuSiE.ancestrySpecific($phenotype, $ancestry)", 
+                               s"--varId2rsId=$ancestry",
+                               s"--ld-folder=$ancestry",
+                               s"--out-folder=$outputDirectory")
+}
+
+object MakeSumstatsInput {
+  def fromString(output: String): MakeSumstatsInput = {
+    output.split("/").toSeq match {
+      case Seq(phenotype, ancestry) => MakeSumstatsInput(phenotype, ancestry)
+    }
+  }
+}
+
diff --git a/susie/.editorconfig b/susie/.editorconfig
new file mode 100644
index 00000000..587935a6
--- /dev/null
+++ b/susie/.editorconfig
@@ -0,0 +1,14 @@
+root = true
+
+[*]
+insert_final_newline = true
+
+[*.java]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+
+[*.{scala,sbt}]
+indent_style = space
+indent_size = 2
+trim_trailing_whitespace = true
diff --git a/susie/.scalafmt.conf b/susie/.scalafmt.conf
new file mode 100644
index 00000000..ca683bf8
--- /dev/null
+++ b/susie/.scalafmt.conf
@@ -0,0 +1,4 @@
+version = "2.4.2"
+align=more
+docstrings=ScalaDoc
+maxColumn=120
diff --git a/susie/LICENSE.txt b/susie/LICENSE.txt
new file mode 100644
index 00000000..0d0952ea
--- /dev/null
+++ b/susie/LICENSE.txt
@@ -0,0 +1,29 @@
+Copyright 2020 <COPYRIGHT HOLDER>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
diff --git a/susie/README.md b/susie/README.md
new file mode 100644
index 00000000..b9d83a00
--- /dev/null
+++ b/susie/README.md
@@ -0,0 +1,14 @@
+# susie
+
+This is the documentation about the method.
+
+Please put some details here about the method, what its inputs are, what its
+outputs are, where it reads from, and where it writes to.
+
+## Stages
+
+These are the stages of susie.
+
+### SusieStage
+
+A description of what this stage does.
diff --git a/susie/built.sbt b/susie/built.sbt
new file mode 100644
index 00000000..bda37340
--- /dev/null
+++ b/susie/built.sbt
@@ -0,0 +1,70 @@
+val Versions = new {
+  val Aggregator = "0.3.4-SNAPSHOT"
+  val Scala = "2.13.2"
+}
+
+// set the version of scala to compile with
+scalaVersion := Versions.Scala
+
+// add scala compile flags
+scalacOptions ++= Seq(
+  "-feature",
+  "-deprecation",
+  "-unchecked",
+  "-Ywarn-value-discard"
+)
+
+// add required libraries
+libraryDependencies ++= Seq(
+  "org.broadinstitute.dig" %% "dig-aggregator-core" % Versions.Aggregator
+)
+
+// set the oranization this method belongs to
+organization := "org.broadinstitute.dig"
+
+// entry point when running this method
+mainClass := Some("org.broadinstitute.dig.aggregator.methods.susie.Susie")
+
+// enables buildInfo, which bakes git version info into the jar
+enablePlugins(GitVersioning)
+
+// get the buildInfo task
+val buildInfoTask = taskKey[Seq[File]]("buildInfo")
+
+// define execution code for task
+buildInfoTask := {
+  val file = (resourceManaged in Compile).value / "version.properties"
+
+  // log where the properties will be written to
+  streams.value.log.info(s"Writing version info to $file...")
+
+  // collect git versioning information
+  val branch = git.gitCurrentBranch.value
+  val lastCommit = git.gitHeadCommit.value
+  val describedVersion = git.gitDescribedVersion.value
+  val anyUncommittedChanges = git.gitUncommittedChanges.value
+  val remoteUrl = (scmInfo in ThisBuild).value.map(_.browseUrl.toString)
+  val buildDate = java.time.Instant.now
+
+  // map properties
+  val properties = Map[String, String](
+    "branch" -> branch,
+    "lastCommit" -> lastCommit.getOrElse(""),
+    "remoteUrl" -> remoteUrl.getOrElse(""),
+    "uncommittedChanges" -> anyUncommittedChanges.toString,
+    "buildDate" -> buildDate.toString
+  )
+
+  // build properties content
+  val contents = properties.toList.map {
+    case (key, value) if value.length > 0 => s"$key=$value"
+    case _                                => ""
+  }
+
+  // output the version information from git to versionInfo.properties
+  IO.write(file, contents.mkString("\n"))
+  Seq(file)
+}
+
+// add the build info task output to resources
+(resourceGenerators in Compile) += buildInfoTask.taskValue
diff --git a/susie/project/build.properties b/susie/project/build.properties
new file mode 100644
index 00000000..e67343ae
--- /dev/null
+++ b/susie/project/build.properties
@@ -0,0 +1 @@
+sbt.version=1.5.0
diff --git a/susie/project/plugins.sbt b/susie/project/plugins.sbt
new file mode 100644
index 00000000..23d5057a
--- /dev/null
+++ b/susie/project/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
diff --git a/susie/src/main/resources/install-susie.sh b/susie/src/main/resources/install-susie.sh
new file mode 100644
index 00000000..d54eeba0
--- /dev/null
+++ b/susie/src/main/resources/install-susie.sh
@@ -0,0 +1,63 @@
+#!/bin/bash -xe
+
+# susie method
+## Developed with python 3 and R
+
+SuSiE_ROOT=/mnt/var/susie
+
+# install to the root directory
+sudo mkdir -p "$SuSiE_ROOT"
+cd "$SuSiE_ROOT"
+
+# install yum dependencies
+sudo yum install -y python3-devel
+sudo yum install -y R
+
+# Install R-4.1.0
+# sudo wget https://cdn.rstudio.com/r/centos-7/pkgs/R-4.1.0-1-1.x86_64.rpm
+# sudo yum install -y R-4.1.0-1-1.x86_64.rpm
+# sudo rm R-4.1.0-1-1.x86_64.rpm
+
+# # find R directory 
+# for cmd in $(ls /usr/bin); do
+#     if echo "$cmd" | grep -qi "R"; then
+#         whereis $cmd
+#     fi
+# done
+
+
+# install R dependencies
+sudo R -e "install.packages('dplyr', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('tidyr', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('base', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('stats', repos='http://cran.rstudio.com/')"
+# sudo R -e "install.packages('https://cran.r-project.org/src/contrib/Archive/coloc/coloc_5.1.0.tar.gz', repos = NULL, type = 'source')"
+sudo R -e "install.packages('coloc', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('sjmisc', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('susieR', repos='http://cran.rstudio.com/')"
+# sudo R -e "install.packages('https://cran.hafro.is/contrib/main/00Archive/susieR/susieR_0.11.42.tar.gz', repos = NULL, type = 'source')"
+sudo R -e "install.packages('stringr', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('Matrix', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('jsonlite', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('data.table', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('parallel', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('strengejacke', repos='http://cran.rstudio.com/')"
+sudo R -e "install.packages('http://www.well.ox.ac.uk/~gav/resources/rbgen_v1.1.5.tgz', repos = NULL, type = 'source')"
+
+# install python dependencies
+pip3 install -U pandas
+pip3 install -U numpy
+pip3 install -U fsspec
+
+# pull down LD bfiles
+sudo mkdir -p ./1000G_EUR_plink
+sudo aws s3 cp s3://dig-analysis-bin/susie/1000G_EUR_plink/ ./1000G_EUR_plink/ --recursive
+
+# fetch snps for mapping
+sudo aws s3 cp "s3://dig-analysis-bin/snps/dbSNP_common_GRCh37.csv" ./snps.csv
+
+sudo aws s3 cp s3://dig-analysis-bin/susie/SuSiE.r ./
+sudo aws s3 cp s3://dig-analysis-bin/susie/plink ./
+sudo aws s3 cp s3://dig-analysis-bin/susie/plink_ld_snp_list.sh ./
+sudo chmod 777 ./plink_ld_snp_list.sh
+sudo chmod 777 ./plink
\ No newline at end of file
diff --git a/susie/src/main/resources/makeSuSiE.py b/susie/src/main/resources/makeSuSiE.py
new file mode 100644
index 00000000..7ab2bb3f
--- /dev/null
+++ b/susie/src/main/resources/makeSuSiE.py
@@ -0,0 +1,93 @@
+#!/usr/bin/python3
+from optparse import OptionParser
+import pandas as pd
+import numpy as np
+import shutil
+import subprocess
+import os
+
+s3_in=os.environ['INPUT_PATH']
+s3_out=os.environ['OUTPUT_PATH']
+
+# def finds json files in the directory
+def make_json_files(directory):
+	subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
+	subprocess.run('cat input/*.json > input.json', shell=True)
+	shutil.rmtree('input')
+
+def make_ld_files(directory):
+	subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
+	subprocess.run('cat input/*.ld > snp_ld.ld', shell=True)
+	shutil.rmtree('input')
+
+def main():
+	usage = "usage: %prog [options]"
+	parser = OptionParser(usage)
+	parser.add_option("","--phenotype", default=None)
+	parser.add_option("","--ancestry", default=None)
+
+	(options, args) = parser.parse_args()
+
+	clump_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-clumped/{options.phenotype}/ancestry={options.ancestry}'
+	var2rs_path = '/mnt/var/susie/snps.csv'
+	out_path = f'{s3_out}/out/susie/staging/{options.phenotype}/ancestry={options.ancestry}' 
+	
+	# read all files in the clump path
+	make_json_files(clump_path)
+
+	# read var2rs file
+	df_var_rs_Id = pd.read_csv(var2rs_path,sep='\t')
+
+	# create the tmp out directory
+	out_directory = 'data'
+	if not os.path.exists(out_directory):
+		os.makedirs(out_directory,exist_ok=True)
+
+	# read clump
+	df_clump = pd.read_json('input.json', lines=True)
+
+	# sort clump based on the varId
+	df_clump.sort_values('varId',inplace = True)
+	df_var_filter = df_var_rs_Id[df_var_rs_Id['varId'].isin(df_clump['varId'])]
+
+	# only common variants 
+	df_clump = df_clump[df_clump['varId'].isin(df_var_filter['varId'])]
+	df_clump.sort_values('varId',inplace = True)
+	df_var_filter.sort_values('varId',inplace = True)
+
+
+	# add dbSNP into the clump files
+	df_clump['dbSNP'] = df_var_filter['dbSNP'].to_numpy()
+
+	df_clump = df_clump.rename(columns={'dbSNP':'rsId','reference':'ref'})
+
+	# for loop over clump ids
+	for i in sorted(df_clump['clump'].unique()):
+		# filter gwas based on the clump id
+		df_susie = df_clump[df_clump['clump']==i]
+		chrom = df_susie['chromosome'].to_numpy()[0]
+		gwas_susie_file_name = out_directory+'/'+'clump_'+str(i)+'.csv'
+		df_susie.to_csv(gwas_susie_file_name,sep='\t',index=False)
+		df_susie['rsId'].to_csv(f'{out_directory}/snps.txt',sep='\t',index=False,header=False)
+
+		# calculate LD for snps list
+		subprocess.call(["bash", "/mnt/var/susie/plink_ld_snp_list.sh", f'{chrom}', f'{out_directory}/snps.txt', f'{out_directory}/snps_ld'])
+
+		# Call the Bash script of SuSiE with its arguments 
+		argument1_gwas = gwas_susie_file_name
+		argument2_ld   = f'{out_directory}/snps_ld.ld'
+		argument3_out  = out_directory
+		subprocess.call(['Rscript','/mnt/var/susie/SuSiE.r','--gwas',gwas_susie_file_name,'--ld',argument2_ld, '--out',argument3_out])
+		os.remove(argument1_gwas)
+
+	os.remove(f'{out_directory}/snps.txt')
+	os.remove(f'{out_directory}/snps_ld.ld')
+	os.remove(f'{out_directory}/snps_ld.log')
+	os.remove(f'{out_directory}/snps_ld.nosex')
+	subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])	
+	subprocess.check_call(['aws','s3','cp',f'{out_directory}/',out_path,'--recursive'])
+	os.remove('input.json')
+	shutil.rmtree(out_directory)
+
+if __name__ == '__main__':
+	main()
diff --git a/susie/src/main/scala/MakeSuSiE.scala b/susie/src/main/scala/MakeSuSiE.scala
new file mode 100644
index 00000000..5a989057
--- /dev/null
+++ b/susie/src/main/scala/MakeSuSiE.scala
@@ -0,0 +1,54 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws.emr._
+import org.broadinstitute.dig.aws.Ec2.Strategy
+import org.broadinstitute.dig.aws.MemorySize
+
+class MakeSuSiE(implicit context: Context) extends Stage {
+  import MemorySize.Implicits._
+
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/*/")
+  // val mixedDatasets: Input.Source = Input.Source.Success("variants/*/*/*/")
+
+  /** Source inputs. */
+  override val sources: Seq[Input.Source] = Seq(ancestrySpecific)
+
+  /** Map inputs to their outputs. */
+  override val rules: PartialFunction[Input, Outputs] = {
+    case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
+    // case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
+  }
+
+  /** Just need a single machine with no applications, but a good drive. */
+  override def cluster: ClusterDef = super.cluster.copy(
+    instances = 1,
+    applications = Seq.empty,
+    masterVolumeSizeInGB = 100,
+    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-susie.sh")))
+  )
+
+  override def make(output: String): Job = {
+    val input = MakeSuSiEInput.fromString(output)
+    new Job(Job.Script(resourceUri("makeSuSiE.py"), input.flags:_*))
+  }
+
+}
+  
+
+case class MakeSuSiEInput(
+  phenotype: String,
+  ancestry: String
+) {
+
+  def flags: Seq[String] = Seq(s"--phenotype=$phenotype", s"--ancestry=$ancestry")
+}
+
+object MakeSuSiEInput {
+  def fromString(output: String): MakeSuSiEInput = {
+    output.split("/").toSeq match {
+      case Seq(phenotype, ancestry) => MakeSuSiEInput(phenotype, ancestry)
+    }
+  }
+}
+
diff --git a/susie/src/main/scala/Susie.scala b/susie/src/main/scala/Susie.scala
new file mode 100644
index 00000000..d051bd37
--- /dev/null
+++ b/susie/src/main/scala/Susie.scala
@@ -0,0 +1,27 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws._
+import org.broadinstitute.dig.aws.emr._
+
+/** This is your aggregator method.
+  *
+  * All that needs to be done here is to implement the initStages function,
+  * which adds stages to the method in the order they should be executed.
+  *
+  * When you are ready to run it, use SBT from the CLI:
+  *
+  *   sbt run [args]
+  *
+  * See the README of the dig-aggregator-core project for a complete list of
+  * CLI arguments available.
+  */
+object Susie extends Method {
+
+  /** Add all stages used in this method here. Stages must be added in the
+    * order they should be serially executed.
+    */
+  override def initStages(implicit context: Context) = {
+    addStage(new MakeSuSiE)
+  }
+}
diff --git a/susie/version.sbt b/susie/version.sbt
new file mode 100644
index 00000000..e7654440
--- /dev/null
+++ b/susie/version.sbt
@@ -0,0 +1 @@
+version in ThisBuild := "0.1.0"

From a4572725815a3b8d0be5f10fb81ccb1b14a6d164 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Mon, 6 May 2024 14:27:58 -0400
Subject: [PATCH 02/13] clean code

---
 susie/src/main/resources/install-susie.sh | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/susie/src/main/resources/install-susie.sh b/susie/src/main/resources/install-susie.sh
index d54eeba0..a2e79fd9 100644
--- a/susie/src/main/resources/install-susie.sh
+++ b/susie/src/main/resources/install-susie.sh
@@ -13,29 +13,15 @@ cd "$SuSiE_ROOT"
 sudo yum install -y python3-devel
 sudo yum install -y R
 
-# Install R-4.1.0
-# sudo wget https://cdn.rstudio.com/r/centos-7/pkgs/R-4.1.0-1-1.x86_64.rpm
-# sudo yum install -y R-4.1.0-1-1.x86_64.rpm
-# sudo rm R-4.1.0-1-1.x86_64.rpm
-
-# # find R directory 
-# for cmd in $(ls /usr/bin); do
-#     if echo "$cmd" | grep -qi "R"; then
-#         whereis $cmd
-#     fi
-# done
-
 
 # install R dependencies
 sudo R -e "install.packages('dplyr', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('tidyr', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('base', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('stats', repos='http://cran.rstudio.com/')"
-# sudo R -e "install.packages('https://cran.r-project.org/src/contrib/Archive/coloc/coloc_5.1.0.tar.gz', repos = NULL, type = 'source')"
 sudo R -e "install.packages('coloc', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('sjmisc', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('susieR', repos='http://cran.rstudio.com/')"
-# sudo R -e "install.packages('https://cran.hafro.is/contrib/main/00Archive/susieR/susieR_0.11.42.tar.gz', repos = NULL, type = 'source')"
 sudo R -e "install.packages('stringr', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('Matrix', repos='http://cran.rstudio.com/')"
 sudo R -e "install.packages('jsonlite', repos='http://cran.rstudio.com/')"

From 2fcb8a3550f09dc728bcf667b7ecd559403c127c Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Thu, 16 May 2024 13:06:27 -0400
Subject: [PATCH 03/13] clean up code

---
 susie/built.sbt                       |  2 +-
 susie/project/build.properties        |  2 +-
 susie/src/main/resources/makeSuSiE.py | 41 +++++++++++++--------------
 susie/src/main/scala/MakeSuSiE.scala  |  3 +-
 4 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/susie/built.sbt b/susie/built.sbt
index bda37340..461df957 100644
--- a/susie/built.sbt
+++ b/susie/built.sbt
@@ -1,5 +1,5 @@
 val Versions = new {
-  val Aggregator = "0.3.4-SNAPSHOT"
+  val Aggregator = "0.3.5-SNAPSHOT"
   val Scala = "2.13.2"
 }
 
diff --git a/susie/project/build.properties b/susie/project/build.properties
index e67343ae..46e43a97 100644
--- a/susie/project/build.properties
+++ b/susie/project/build.properties
@@ -1 +1 @@
-sbt.version=1.5.0
+sbt.version=1.8.2
diff --git a/susie/src/main/resources/makeSuSiE.py b/susie/src/main/resources/makeSuSiE.py
index 7ab2bb3f..b1229df6 100644
--- a/susie/src/main/resources/makeSuSiE.py
+++ b/susie/src/main/resources/makeSuSiE.py
@@ -15,69 +15,66 @@ def make_json_files(directory):
 	subprocess.run('cat input/*.json > input.json', shell=True)
 	shutil.rmtree('input')
 
-def make_ld_files(directory):
-	subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
-	subprocess.run('cat input/*.ld > snp_ld.ld', shell=True)
-	shutil.rmtree('input')
-
 def main():
 	usage = "usage: %prog [options]"
 	parser = OptionParser(usage)
-	parser.add_option("","--phenotype", default=None)
-	parser.add_option("","--ancestry", default=None)
+	parser.add_option("", "--phenotype", default=None)
+	parser.add_option("", "--ancestry", default=None)
 
-	(options, args) = parser.parse_args()
+	args = parser.parse_args()
 
-	clump_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-clumped/{options.phenotype}/ancestry={options.ancestry}'
+	clump_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-clumped/{args.phenotype}/ancestry={args.ancestry}'
 	var2rs_path = '/mnt/var/susie/snps.csv'
-	out_path = f'{s3_out}/out/susie/staging/{options.phenotype}/ancestry={options.ancestry}' 
+	out_path = f'{s3_out}/out/susie/staging/{args.phenotype}/ancestry={args.ancestry}' 
 	
 	# read all files in the clump path
 	make_json_files(clump_path)
 
 	# read var2rs file
-	df_var_rs_Id = pd.read_csv(var2rs_path,sep='\t')
+	df_var_rs_Id = pd.read_csv(var2rs_path, sep='\t')
 
 	# create the tmp out directory
 	out_directory = 'data'
 	if not os.path.exists(out_directory):
-		os.makedirs(out_directory,exist_ok=True)
+		os.makedirs(out_directory, exist_ok=True)
 
 	# read clump
 	df_clump = pd.read_json('input.json', lines=True)
 
 	# sort clump based on the varId
-	df_clump.sort_values('varId',inplace = True)
+	df_clump.sort_values('varId', inplace = True)
 	df_var_filter = df_var_rs_Id[df_var_rs_Id['varId'].isin(df_clump['varId'])]
 
 	# only common variants 
 	df_clump = df_clump[df_clump['varId'].isin(df_var_filter['varId'])]
-	df_clump.sort_values('varId',inplace = True)
-	df_var_filter.sort_values('varId',inplace = True)
+	df_clump.sort_values('varId', inplace = True)
+	df_var_filter.sort_values('varId', inplace = True)
 
 
 	# add dbSNP into the clump files
 	df_clump['dbSNP'] = df_var_filter['dbSNP'].to_numpy()
 
-	df_clump = df_clump.rename(columns={'dbSNP':'rsId','reference':'ref'})
+	df_clump = df_clump.rename(columns={'dbSNP':'rsId', 'reference':'ref'})
 
 	# for loop over clump ids
 	for i in sorted(df_clump['clump'].unique()):
 		# filter gwas based on the clump id
 		df_susie = df_clump[df_clump['clump']==i]
 		chrom = df_susie['chromosome'].to_numpy()[0]
-		gwas_susie_file_name = out_directory+'/'+'clump_'+str(i)+'.csv'
-		df_susie.to_csv(gwas_susie_file_name,sep='\t',index=False)
-		df_susie['rsId'].to_csv(f'{out_directory}/snps.txt',sep='\t',index=False,header=False)
+		gwas_susie_file_name = f'{out_directory}/clump_{i}.csv'
+		df_susie.to_csv(gwas_susie_file_name, sep='\t', index=False)
+		df_susie['rsId'].to_csv(f'{out_directory}/snps.txt', sep='\t', index=False, header=False)
 
 		# calculate LD for snps list
-		subprocess.call(["bash", "/mnt/var/susie/plink_ld_snp_list.sh", f'{chrom}', f'{out_directory}/snps.txt', f'{out_directory}/snps_ld'])
+		subprocess.call(["bash", "/mnt/var/susie/plink_ld_snp_list.sh", f'{chrom}', 
+							f'{out_directory}/snps.txt', f'{out_directory}/snps_ld'])
 
 		# Call the Bash script of SuSiE with its arguments 
 		argument1_gwas = gwas_susie_file_name
 		argument2_ld   = f'{out_directory}/snps_ld.ld'
 		argument3_out  = out_directory
-		subprocess.call(['Rscript','/mnt/var/susie/SuSiE.r','--gwas',gwas_susie_file_name,'--ld',argument2_ld, '--out',argument3_out])
+		subprocess.call(['Rscript', '/mnt/var/susie/SuSiE.r', '--gwas', gwas_susie_file_name, 
+						'--ld', argument2_ld, '--out', argument3_out])
 		os.remove(argument1_gwas)
 
 	os.remove(f'{out_directory}/snps.txt')
@@ -85,7 +82,7 @@ def main():
 	os.remove(f'{out_directory}/snps_ld.log')
 	os.remove(f'{out_directory}/snps_ld.nosex')
 	subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])	
-	subprocess.check_call(['aws','s3','cp',f'{out_directory}/',out_path,'--recursive'])
+	subprocess.check_call(['aws', 's3', 'cp', f'{out_directory}/', out_path, '--recursive'])
 	os.remove('input.json')
 	shutil.rmtree(out_directory)
 
diff --git a/susie/src/main/scala/MakeSuSiE.scala b/susie/src/main/scala/MakeSuSiE.scala
index 5a989057..cf55bf92 100644
--- a/susie/src/main/scala/MakeSuSiE.scala
+++ b/susie/src/main/scala/MakeSuSiE.scala
@@ -8,8 +8,7 @@ import org.broadinstitute.dig.aws.MemorySize
 class MakeSuSiE(implicit context: Context) extends Stage {
   import MemorySize.Implicits._
 
-  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/*/")
-  // val mixedDatasets: Input.Source = Input.Source.Success("variants/*/*/*/")
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
 
   /** Source inputs. */
   override val sources: Seq[Input.Source] = Seq(ancestrySpecific)

From 88eb34deab6cc09f9e2a1c7ad337b5f25a9f9237 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Thu, 16 May 2024 13:20:49 -0400
Subject: [PATCH 04/13] clean up code

---
 susie/src/main/scala/Susie.scala | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 susie/src/main/scala/Susie.scala

diff --git a/susie/src/main/scala/Susie.scala b/susie/src/main/scala/Susie.scala
deleted file mode 100644
index d051bd37..00000000
--- a/susie/src/main/scala/Susie.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.broadinstitute.dig.aggregator.methods.susie
-
-import org.broadinstitute.dig.aggregator.core._
-import org.broadinstitute.dig.aws._
-import org.broadinstitute.dig.aws.emr._
-
-/** This is your aggregator method.
-  *
-  * All that needs to be done here is to implement the initStages function,
-  * which adds stages to the method in the order they should be executed.
-  *
-  * When you are ready to run it, use SBT from the CLI:
-  *
-  *   sbt run [args]
-  *
-  * See the README of the dig-aggregator-core project for a complete list of
-  * CLI arguments available.
-  */
-object Susie extends Method {
-
-  /** Add all stages used in this method here. Stages must be added in the
-    * order they should be serially executed.
-    */
-  override def initStages(implicit context: Context) = {
-    addStage(new MakeSuSiE)
-  }
-}

From 13233a9053d862c875bfe6094bfebc8d23f5ba0a Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Thu, 16 May 2024 15:00:46 -0400
Subject: [PATCH 05/13] back susie.scala

---
 susie/src/main/scala/Susie.scala | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 susie/src/main/scala/Susie.scala

diff --git a/susie/src/main/scala/Susie.scala b/susie/src/main/scala/Susie.scala
new file mode 100644
index 00000000..d051bd37
--- /dev/null
+++ b/susie/src/main/scala/Susie.scala
@@ -0,0 +1,27 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws._
+import org.broadinstitute.dig.aws.emr._
+
+/** This is your aggregator method.
+  *
+  * All that needs to be done here is to implement the initStages function,
+  * which adds stages to the method in the order they should be executed.
+  *
+  * When you are ready to run it, use SBT from the CLI:
+  *
+  *   sbt run [args]
+  *
+  * See the README of the dig-aggregator-core project for a complete list of
+  * CLI arguments available.
+  */
+object Susie extends Method {
+
+  /** Add all stages used in this method here. Stages must be added in the
+    * order they should be serially executed.
+    */
+  override def initStages(implicit context: Context) = {
+    addStage(new MakeSuSiE)
+  }
+}

From adf46c8e64700fa7b724f4d06fd728cdb0cf6800 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Thu, 16 May 2024 16:10:20 -0400
Subject: [PATCH 06/13] clean up code - ancestry specific

---
 susie/src/main/scala/MakeSuSiE.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/susie/src/main/scala/MakeSuSiE.scala b/susie/src/main/scala/MakeSuSiE.scala
index cf55bf92..1dacf3e6 100644
--- a/susie/src/main/scala/MakeSuSiE.scala
+++ b/susie/src/main/scala/MakeSuSiE.scala
@@ -15,7 +15,8 @@ class MakeSuSiE(implicit context: Context) extends Stage {
 
   /** Map inputs to their outputs. */
   override val rules: PartialFunction[Input, Outputs] = {
-    case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
+    case ancestrySpecific(phenotype) => Outputs.Named(s"$phenotype/EU")
+    // case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
     // case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
   }
 

From eb34f385412b449d267e074f2029e857edc3f668 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Thu, 16 May 2024 16:52:05 -0400
Subject: [PATCH 07/13] clean up code - ancestry specific

---
 susie/src/main/resources/makeSuSiE.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/susie/src/main/resources/makeSuSiE.py b/susie/src/main/resources/makeSuSiE.py
index b1229df6..dd1817f8 100644
--- a/susie/src/main/resources/makeSuSiE.py
+++ b/susie/src/main/resources/makeSuSiE.py
@@ -21,7 +21,7 @@ def main():
 	parser.add_option("", "--phenotype", default=None)
 	parser.add_option("", "--ancestry", default=None)
 
-	args = parser.parse_args()
+	(args,_) = parser.parse_args()
 
 	clump_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-clumped/{args.phenotype}/ancestry={args.ancestry}'
 	var2rs_path = '/mnt/var/susie/snps.csv'

From b42dd19d2fbf74998eb3769c2b0ba474907e80e9 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Fri, 17 May 2024 10:08:11 -0400
Subject: [PATCH 08/13] add safe_remove function '

---
 susie/src/main/resources/makeSuSiE.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/susie/src/main/resources/makeSuSiE.py b/susie/src/main/resources/makeSuSiE.py
index dd1817f8..788f8d7f 100644
--- a/susie/src/main/resources/makeSuSiE.py
+++ b/susie/src/main/resources/makeSuSiE.py
@@ -15,6 +15,17 @@ def make_json_files(directory):
 	subprocess.run('cat input/*.json > input.json', shell=True)
 	shutil.rmtree('input')
 
+def safe_remove(file_path):
+    try:
+        os.remove(file_path)
+        print(f"File {file_path} successfully removed.")
+    except FileNotFoundError:
+        print(f"File {file_path} does not exist.")
+    except PermissionError:
+        print(f"Permission denied: cannot remove {file_path}.")
+    except Exception as e:
+        print(f"An error occurred while trying to remove {file_path}: {e}")
+
 def main():
 	usage = "usage: %prog [options]"
 	parser = OptionParser(usage)
@@ -75,15 +86,15 @@ def main():
 		argument3_out  = out_directory
 		subprocess.call(['Rscript', '/mnt/var/susie/SuSiE.r', '--gwas', gwas_susie_file_name, 
 						'--ld', argument2_ld, '--out', argument3_out])
-		os.remove(argument1_gwas)
+		safe_remove(argument1_gwas)
 
-	os.remove(f'{out_directory}/snps.txt')
-	os.remove(f'{out_directory}/snps_ld.ld')
-	os.remove(f'{out_directory}/snps_ld.log')
-	os.remove(f'{out_directory}/snps_ld.nosex')
+	safe_remove(f'{out_directory}/snps.txt')
+	safe_remove(f'{out_directory}/snps_ld.ld')
+	safe_remove(f'{out_directory}/snps_ld.log')
+	safe_remove(f'{out_directory}/snps_ld.nosex')
 	subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])	
 	subprocess.check_call(['aws', 's3', 'cp', f'{out_directory}/', out_path, '--recursive'])
-	os.remove('input.json')
+	safe_remove('input.json')
 	shutil.rmtree(out_directory)
 
 if __name__ == '__main__':

From 33aa908b5f456582f72b134a55287fcd05b2eedc Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Wed, 18 Dec 2024 06:30:45 -0500
Subject: [PATCH 09/13] add Finemap into susie branch

---
 susie/.Rapp.history                         |  0
 susie/src/main/resources/install-finemap.sh | 72 +++++++++++++++++++++
 susie/src/main/resources/makeFinemap.py     | 67 +++++++++++++++++++
 susie/src/main/scala/Finemap.scala          | 27 ++++++++
 susie/src/main/scala/MakeFinemap.scala      | 54 ++++++++++++++++
 5 files changed, 220 insertions(+)
 create mode 100644 susie/.Rapp.history
 create mode 100644 susie/src/main/resources/install-finemap.sh
 create mode 100644 susie/src/main/resources/makeFinemap.py
 create mode 100644 susie/src/main/scala/Finemap.scala
 create mode 100644 susie/src/main/scala/MakeFinemap.scala

diff --git a/susie/.Rapp.history b/susie/.Rapp.history
new file mode 100644
index 00000000..e69de29b
diff --git a/susie/src/main/resources/install-finemap.sh b/susie/src/main/resources/install-finemap.sh
new file mode 100644
index 00000000..ea20dbcb
--- /dev/null
+++ b/susie/src/main/resources/install-finemap.sh
@@ -0,0 +1,72 @@
+#!/bin/bash -xe
+
+# susie method
+## Developed with python 3 and R
+
+finemap_ROOT=/mnt/var/cojo
+
+# install to the root directory
+sudo mkdir -p "$finemap_ROOT"
+cd "$finemap_ROOT"
+
+# install yum dependencies
+sudo yum install -y python3-devel
+
+
+# Install conda
+cd $finemap_ROOT
+wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+bash miniconda.sh -b -p $finemap_ROOT/miniconda
+echo export PATH="$finemap_ROOT/miniconda/bin:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install GCTA
+cd $finemap_ROOT
+mkdir -p ~/software/gcta
+cd ~/software/gcta
+# Note that this URL may change - old versions aren't accessible at the same URL
+wget https://cnsgenomics.com/software/gcta/bin/gcta_1.93.2beta.zip
+unzip gcta_1.93.2beta.zip
+cd gcta_1.93.2beta
+echo export PATH="$PWD:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install plink
+mkdir -p ~/software/plink
+cd ~/software/plink
+wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20201019.zip
+unzip plink_linux_x86_64_20201019.zip
+echo export PATH="$PWD:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install FINEMAP
+mkdir -p ~/software/finemap
+cd ~/software/finemap
+wget http://www.christianbenner.com/finemap_v1.4_x86_64.tgz
+tar -zxf finemap_v1.4_x86_64.tgz
+ln -s finemap_v1.4_x86_64/finemap_v1.4_x86_64 finemap
+sudo apt-get install libgomp1 # Not present by default it seems
+echo export PATH="$PWD:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install JRE
+sudo apt install -yf openjdk-8-jre-headless openjdk-8-jdk
+# sudo update-java-alternatives --list
+# sudo update-java-alternatives --set java-1.8.0-openjdk-amd64
+
+# Install parallel
+sudo apt install -yf parallel
+
+echo COMPLETE
+
+
+# pull down LD bfiles
+sudo mkdir -p ./bfiles
+sudo aws s3 cp s3://dig-analysis-bin/cojo/bfiles/ ./bfiles/ --recursive
+
+# pull down finemap dir
+sudo mkdir -p ./finemapping
+sudo aws s3 cp s3://dig-analysis-bin/cojo/finemapping/ ./finemapping/ --recursive
+
+# fetch snps for mapping
+sudo aws s3 cp "s3://dig-analysis-bin/snps/dbSNP_common_GRCh37.csv" ./snps.csv
diff --git a/susie/src/main/resources/makeFinemap.py b/susie/src/main/resources/makeFinemap.py
new file mode 100644
index 00000000..fa81d77b
--- /dev/null
+++ b/susie/src/main/resources/makeFinemap.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python3
+from optparse import OptionParser
+import pandas as pd
+import numpy as np
+import shutil
+import subprocess
+import os
+
+s3_in=os.environ['INPUT_PATH']
+s3_out=os.environ['OUTPUT_PATH']
+
+# def finds json files in the directory
+def make_json_files(directory):
+	subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
+	subprocess.run('zstdcat input/*.json.zst | jq -s '.' > input/input.json', shell=True)
+
+def safe_remove(file_path):
+    try:
+        os.remove(file_path)
+        print(f"File {file_path} successfully removed.")
+    except FileNotFoundError:
+        print(f"File {file_path} does not exist.")
+    except PermissionError:
+        print(f"Permission denied: cannot remove {file_path}.")
+    except Exception as e:
+        print(f"An error occurred while trying to remove {file_path}: {e}")
+
+def main():
+	usage = "usage: %prog [options]"
+	parser = OptionParser(usage)
+	parser.add_option("", "--phenotype", default=None)
+	parser.add_option("", "--ancestry", default=None)
+
+	(args,_) = parser.parse_args()
+
+	pheno_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-specific/{phenotype}/ancestry={ancestry}/'
+	var2rs_path = '/mnt/var/cojo/snps.csv'
+	bfiles = '/mnt/var/cojo/bfiles'
+	finemap_dir = '/mnt/var/cojo/finemapping'
+	config_file = '/mnt/var/cojo/finemapping/analysis.config.yaml'
+	out_path = f'{s3_out}/out/cojo/staging/{args.phenotype}/ancestry={args.ancestry}' 
+
+	# read all files in the clump path
+	make_json_files(pheno_path)
+
+	# create the tmp out directory
+	out_directory = 'data'
+	if not os.path.exists(out_directory):
+		os.makedirs(out_directory, exist_ok=True)
+
+	subprocess.call(['bash', '/mnt/var/cojo/finemapping/run_finemap_pipeline.sh', 
+					'--input','input'
+					'--bfiles', bfiles,
+					'--config_file',config_file,
+					'--dbsnp_file',var2rs_path,
+					'--output', out_directory,
+					'--finemap_dir',finemap_dir
+					])
+	
+	subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])	
+	subprocess.check_call(['aws', 's3', 'cp', f'{out_directory}/', out_path, '--recursive'])
+	safe_remove('input/input.json')
+	shutil.rmtree('input')
+	shutil.rmtree(out_directory)
+
+if __name__ == '__main__':
+	main()
diff --git a/susie/src/main/scala/Finemap.scala b/susie/src/main/scala/Finemap.scala
new file mode 100644
index 00000000..ee8f5dd1
--- /dev/null
+++ b/susie/src/main/scala/Finemap.scala
@@ -0,0 +1,27 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws._
+import org.broadinstitute.dig.aws.emr._
+
+/** This is your aggregator method.
+  *
+  * All that needs to be done here is to implement the initStages function,
+  * which adds stages to the method in the order they should be executed.
+  *
+  * When you are ready to run it, use SBT from the CLI:
+  *
+  *   sbt run [args]
+  *
+  * See the README of the dig-aggregator-core project for a complete list of
+  * CLI arguments available.
+  */
+object Susie extends Method {
+
+  /** Add all stages used in this method here. Stages must be added in the
+    * order they should be serially executed.
+    */
+  override def initStages(implicit context: Context) = {
+    addStage(new MakeFinemap)
+  }
+}
diff --git a/susie/src/main/scala/MakeFinemap.scala b/susie/src/main/scala/MakeFinemap.scala
new file mode 100644
index 00000000..f2d68e6a
--- /dev/null
+++ b/susie/src/main/scala/MakeFinemap.scala
@@ -0,0 +1,54 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws.emr._
+import org.broadinstitute.dig.aws.Ec2.Strategy
+import org.broadinstitute.dig.aws.MemorySize
+
+class MakeSuSiE(implicit context: Context) extends Stage {
+  import MemorySize.Implicits._
+
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
+
+  /** Source inputs. */
+  override val sources: Seq[Input.Source] = Seq(ancestrySpecific)
+
+  /** Map inputs to their outputs. */
+  override val rules: PartialFunction[Input, Outputs] = {
+    case ancestrySpecific(phenotype) => Outputs.Named(s"$phenotype/EU")
+    // case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
+    // case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
+  }
+
+  /** Just need a single machine with no applications, but a good drive. */
+  override def cluster: ClusterDef = super.cluster.copy(
+    instances = 1,
+    applications = Seq.empty,
+    masterVolumeSizeInGB = 100,
+    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-finemap.sh")))
+  )
+
+  override def make(output: String): Job = {
+    val input = MakeSuSiEInput.fromString(output)
+    new Job(Job.Script(resourceUri("makeFinemap.py"), input.flags:_*))
+  }
+
+}
+  
+
+case class MakeSuSiEInput(
+  phenotype: String,
+  ancestry: String
+) {
+
+  def flags: Seq[String] = Seq(s"--phenotype=$phenotype", s"--ancestry=$ancestry")
+}
+
+object MakeSuSiEInput {
+  def fromString(output: String): MakeSuSiEInput = {
+    output.split("/").toSeq match {
+      case Seq(phenotype, ancestry) => MakeSuSiEInput(phenotype, ancestry)
+    }
+  }
+}
+

From 47c8996aab753e5fd02391a7f82ac8282fdaece0 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Wed, 18 Dec 2024 07:06:28 -0500
Subject: [PATCH 10/13] change input in MakeFinemap.scala

---
 susie/src/main/scala/MakeFinemap.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/susie/src/main/scala/MakeFinemap.scala b/susie/src/main/scala/MakeFinemap.scala
index f2d68e6a..35a3d2e5 100644
--- a/susie/src/main/scala/MakeFinemap.scala
+++ b/susie/src/main/scala/MakeFinemap.scala
@@ -8,7 +8,7 @@ import org.broadinstitute.dig.aws.MemorySize
 class MakeSuSiE(implicit context: Context) extends Stage {
   import MemorySize.Implicits._
 
-  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-specific/*/ancestry=EU/")
 
   /** Source inputs. */
   override val sources: Seq[Input.Source] = Seq(ancestrySpecific)

From 313e6809f16b6b8d01c9373ca1e0d6fe259e522c Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Wed, 18 Dec 2024 10:16:18 -0500
Subject: [PATCH 11/13] finemap

---
 finemap/.editorconfig                         | 14 ++++
 finemap/.scalafmt.conf                        |  4 +
 finemap/LICENSE.txt                           | 29 +++++++
 finemap/README.md                             | 14 ++++
 finemap/built.sbt                             | 70 +++++++++++++++++
 finemap/project/build.properties              |  1 +
 finemap/project/plugins.sbt                   |  1 +
 finemap/src/main/resources/install-finemap.sh | 76 +++++++++++++++++++
 finemap/src/main/resources/makeFinemap.py     | 67 ++++++++++++++++
 finemap/src/main/scala/Finemap.scala          | 27 +++++++
 finemap/src/main/scala/MakeFinemap.scala      | 54 +++++++++++++
 finemap/version.sbt                           |  1 +
 12 files changed, 358 insertions(+)
 create mode 100644 finemap/.editorconfig
 create mode 100644 finemap/.scalafmt.conf
 create mode 100644 finemap/LICENSE.txt
 create mode 100644 finemap/README.md
 create mode 100644 finemap/built.sbt
 create mode 100644 finemap/project/build.properties
 create mode 100644 finemap/project/plugins.sbt
 create mode 100644 finemap/src/main/resources/install-finemap.sh
 create mode 100644 finemap/src/main/resources/makeFinemap.py
 create mode 100644 finemap/src/main/scala/Finemap.scala
 create mode 100644 finemap/src/main/scala/MakeFinemap.scala
 create mode 100644 finemap/version.sbt

diff --git a/finemap/.editorconfig b/finemap/.editorconfig
new file mode 100644
index 00000000..587935a6
--- /dev/null
+++ b/finemap/.editorconfig
@@ -0,0 +1,14 @@
+root = true
+
+[*]
+insert_final_newline = true
+
+[*.java]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+
+[*.{scala,sbt}]
+indent_style = space
+indent_size = 2
+trim_trailing_whitespace = true
diff --git a/finemap/.scalafmt.conf b/finemap/.scalafmt.conf
new file mode 100644
index 00000000..ca683bf8
--- /dev/null
+++ b/finemap/.scalafmt.conf
@@ -0,0 +1,4 @@
+version = "2.4.2"
+align=more
+docstrings=ScalaDoc
+maxColumn=120
diff --git a/finemap/LICENSE.txt b/finemap/LICENSE.txt
new file mode 100644
index 00000000..0d0952ea
--- /dev/null
+++ b/finemap/LICENSE.txt
@@ -0,0 +1,29 @@
+Copyright 2020 <COPYRIGHT HOLDER>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
diff --git a/finemap/README.md b/finemap/README.md
new file mode 100644
index 00000000..35b131c6
--- /dev/null
+++ b/finemap/README.md
@@ -0,0 +1,14 @@
+# finemap
+
+This is the documentation about the method.
+
+Please put some details here about the method, what its inputs are, what its
+outputs are, where it reads from, and where it writes to.
+
+## Stages
+
+These are the stages of finemap.
+
+### FinemapStage
+
+A description of what this stage does.
diff --git a/finemap/built.sbt b/finemap/built.sbt
new file mode 100644
index 00000000..c018bd9e
--- /dev/null
+++ b/finemap/built.sbt
@@ -0,0 +1,70 @@
+val Versions = new {
+  val Aggregator = "0.3.1-SNAPSHOT"
+  val Scala = "2.13.2"
+}
+
+// set the version of scala to compile with
+scalaVersion := Versions.Scala
+
+// add scala compile flags
+scalacOptions ++= Seq(
+  "-feature",
+  "-deprecation",
+  "-unchecked",
+  "-Ywarn-value-discard"
+)
+
+// add required libraries
+libraryDependencies ++= Seq(
+  "org.broadinstitute.dig" %% "dig-aggregator-core" % Versions.Aggregator
+)
+
+// set the oranization this method belongs to
+organization := "org.broadinstitute.dig"
+
+// entry point when running this method
+mainClass := Some("org.broadinstitute.dig.aggregator.methods.finemap.Finemap")
+
+// enables buildInfo, which bakes git version info into the jar
+enablePlugins(GitVersioning)
+
+// get the buildInfo task
+val buildInfoTask = taskKey[Seq[File]]("buildInfo")
+
+// define execution code for task
+buildInfoTask := {
+  val file = (resourceManaged in Compile).value / "version.properties"
+
+  // log where the properties will be written to
+  streams.value.log.info(s"Writing version info to $file...")
+
+  // collect git versioning information
+  val branch = git.gitCurrentBranch.value
+  val lastCommit = git.gitHeadCommit.value
+  val describedVersion = git.gitDescribedVersion.value
+  val anyUncommittedChanges = git.gitUncommittedChanges.value
+  val remoteUrl = (scmInfo in ThisBuild).value.map(_.browseUrl.toString)
+  val buildDate = java.time.Instant.now
+
+  // map properties
+  val properties = Map[String, String](
+    "branch" -> branch,
+    "lastCommit" -> lastCommit.getOrElse(""),
+    "remoteUrl" -> remoteUrl.getOrElse(""),
+    "uncommittedChanges" -> anyUncommittedChanges.toString,
+    "buildDate" -> buildDate.toString
+  )
+
+  // build properties content
+  val contents = properties.toList.map {
+    case (key, value) if value.length > 0 => s"$key=$value"
+    case _                                => ""
+  }
+
+  // output the version information from git to versionInfo.properties
+  IO.write(file, contents.mkString("\n"))
+  Seq(file)
+}
+
+// add the build info task output to resources
+(resourceGenerators in Compile) += buildInfoTask.taskValue
diff --git a/finemap/project/build.properties b/finemap/project/build.properties
new file mode 100644
index 00000000..e67343ae
--- /dev/null
+++ b/finemap/project/build.properties
@@ -0,0 +1 @@
+sbt.version=1.5.0
diff --git a/finemap/project/plugins.sbt b/finemap/project/plugins.sbt
new file mode 100644
index 00000000..23d5057a
--- /dev/null
+++ b/finemap/project/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
diff --git a/finemap/src/main/resources/install-finemap.sh b/finemap/src/main/resources/install-finemap.sh
new file mode 100644
index 00000000..21efc858
--- /dev/null
+++ b/finemap/src/main/resources/install-finemap.sh
@@ -0,0 +1,76 @@
+#!/bin/bash -xe
+
+# susie method
+## Developed with python 3 and R
+
+finemap_ROOT=/mnt/var/cojo
+
+# install to the root directory
+sudo mkdir -p "$finemap_ROOT"
+cd "$finemap_ROOT"
+
+# install yum dependencies
+sudo yum install -y python3-devel
+
+
+# Install conda
+cd $finemap_ROOT
+sudo wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+sudo bash miniconda.sh -b -p $finemap_ROOT/miniconda
+echo export PATH="$finemap_ROOT/miniconda/bin:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install GCTA
+cd $finemap_ROOT
+sudo mkdir -p ~/software/gcta
+cd ~/software/gcta
+# Note that this URL may change - old versions aren't accessible at the same URL
+sudo wget https://cnsgenomics.com/software/gcta/bin/gcta_1.93.2beta.zip
+sudo unzip gcta_1.93.2beta.zip
+cd gcta_1.93.2beta
+echo export PATH="$PWD:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install plink
+sudo mkdir -p ~/software/plink
+cd ~/software/plink
+sudo wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20201019.zip
+sudo unzip plink_linux_x86_64_20201019.zip
+echo export PATH="$PWD:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install FINEMAP
+sudo mkdir -p ~/software/finemap
+cd ~/software/finemap
+sudo wget http://www.christianbenner.com/finemap_v1.4_x86_64.tgz
+sudo tar -zxf finemap_v1.4_x86_64.tgz
+sudo ln -s finemap_v1.4_x86_64/finemap_v1.4_x86_64 finemap
+sudo apt-get install libgomp1 # Not present by default it seems
+echo export PATH="$PWD:\$PATH" >> ~/.profile
+. ~/.profile
+
+# Install JRE
+sudo apt install -yf openjdk-8-jre-headless openjdk-8-jdk
+# sudo update-java-alternatives --list
+# sudo update-java-alternatives --set java-1.8.0-openjdk-amd64
+
+# Install parallel
+sudo apt install -yf parallel
+
+echo COMPLETE
+
+
+# pull down LD bfiles
+sudo mkdir -p ./bfiles
+sudo aws s3 cp s3://dig-analysis-bin/cojo/bfiles/ ./bfiles/ --recursive
+
+# pull down finemap dir
+sudo mkdir -p ./finemapping
+sudo aws s3 cp s3://dig-analysis-bin/cojo/finemapping/ ./finemapping/ --recursive
+
+sudo chmod 777 ./finemapping/combine_results.sh
+sudo chmod 777 ./finemapping/run_finemap_pipeline.sh
+
+
+# fetch snps for mapping
+sudo aws s3 cp "s3://dig-analysis-bin/snps/dbSNP_common_GRCh37.csv" ./snps.csv
diff --git a/finemap/src/main/resources/makeFinemap.py b/finemap/src/main/resources/makeFinemap.py
new file mode 100644
index 00000000..fa81d77b
--- /dev/null
+++ b/finemap/src/main/resources/makeFinemap.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python3
+from optparse import OptionParser
+import pandas as pd
+import numpy as np
+import shutil
+import subprocess
+import os
+
+s3_in=os.environ['INPUT_PATH']
+s3_out=os.environ['OUTPUT_PATH']
+
+# def finds json files in the directory
+def make_json_files(directory):
+	subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
+	subprocess.run('zstdcat input/*.json.zst | jq -s '.' > input/input.json', shell=True)
+
+def safe_remove(file_path):
+    try:
+        os.remove(file_path)
+        print(f"File {file_path} successfully removed.")
+    except FileNotFoundError:
+        print(f"File {file_path} does not exist.")
+    except PermissionError:
+        print(f"Permission denied: cannot remove {file_path}.")
+    except Exception as e:
+        print(f"An error occurred while trying to remove {file_path}: {e}")
+
+def main():
+	usage = "usage: %prog [options]"
+	parser = OptionParser(usage)
+	parser.add_option("", "--phenotype", default=None)
+	parser.add_option("", "--ancestry", default=None)
+
+	(args,_) = parser.parse_args()
+
+	pheno_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-specific/{phenotype}/ancestry={ancestry}/'
+	var2rs_path = '/mnt/var/cojo/snps.csv'
+	bfiles = '/mnt/var/cojo/bfiles'
+	finemap_dir = '/mnt/var/cojo/finemapping'
+	config_file = '/mnt/var/cojo/finemapping/analysis.config.yaml'
+	out_path = f'{s3_out}/out/cojo/staging/{args.phenotype}/ancestry={args.ancestry}' 
+
+	# read all files in the clump path
+	make_json_files(pheno_path)
+
+	# create the tmp out directory
+	out_directory = 'data'
+	if not os.path.exists(out_directory):
+		os.makedirs(out_directory, exist_ok=True)
+
+	subprocess.call(['bash', '/mnt/var/cojo/finemapping/run_finemap_pipeline.sh', 
+					'--input','input'
+					'--bfiles', bfiles,
+					'--config_file',config_file,
+					'--dbsnp_file',var2rs_path,
+					'--output', out_directory,
+					'--finemap_dir',finemap_dir
+					])
+	
+	subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])	
+	subprocess.check_call(['aws', 's3', 'cp', f'{out_directory}/', out_path, '--recursive'])
+	safe_remove('input/input.json')
+	shutil.rmtree('input')
+	shutil.rmtree(out_directory)
+
+if __name__ == '__main__':
+	main()
diff --git a/finemap/src/main/scala/Finemap.scala b/finemap/src/main/scala/Finemap.scala
new file mode 100644
index 00000000..ee8f5dd1
--- /dev/null
+++ b/finemap/src/main/scala/Finemap.scala
@@ -0,0 +1,27 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws._
+import org.broadinstitute.dig.aws.emr._
+
+/** This is your aggregator method.
+  *
+  * All that needs to be done here is to implement the initStages function,
+  * which adds stages to the method in the order they should be executed.
+  *
+  * When you are ready to run it, use SBT from the CLI:
+  *
+  *   sbt run [args]
+  *
+  * See the README of the dig-aggregator-core project for a complete list of
+  * CLI arguments available.
+  */
+object Susie extends Method {
+
+  /** Add all stages used in this method here. Stages must be added in the
+    * order they should be serially executed.
+    */
+  override def initStages(implicit context: Context) = {
+    addStage(new MakeFinemap)
+  }
+}
diff --git a/finemap/src/main/scala/MakeFinemap.scala b/finemap/src/main/scala/MakeFinemap.scala
new file mode 100644
index 00000000..35a3d2e5
--- /dev/null
+++ b/finemap/src/main/scala/MakeFinemap.scala
@@ -0,0 +1,54 @@
+package org.broadinstitute.dig.aggregator.methods.susie
+
+import org.broadinstitute.dig.aggregator.core._
+import org.broadinstitute.dig.aws.emr._
+import org.broadinstitute.dig.aws.Ec2.Strategy
+import org.broadinstitute.dig.aws.MemorySize
+
+class MakeSuSiE(implicit context: Context) extends Stage {
+  import MemorySize.Implicits._
+
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-specific/*/ancestry=EU/")
+
+  /** Source inputs. */
+  override val sources: Seq[Input.Source] = Seq(ancestrySpecific)
+
+  /** Map inputs to their outputs. */
+  override val rules: PartialFunction[Input, Outputs] = {
+    case ancestrySpecific(phenotype) => Outputs.Named(s"$phenotype/EU")
+    // case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
+    // case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
+  }
+
+  /** Just need a single machine with no applications, but a good drive. */
+  override def cluster: ClusterDef = super.cluster.copy(
+    instances = 1,
+    applications = Seq.empty,
+    masterVolumeSizeInGB = 100,
+    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-finemap.sh")))
+  )
+
+  override def make(output: String): Job = {
+    val input = MakeSuSiEInput.fromString(output)
+    new Job(Job.Script(resourceUri("makeFinemap.py"), input.flags:_*))
+  }
+
+}
+  
+
+case class MakeSuSiEInput(
+  phenotype: String,
+  ancestry: String
+) {
+
+  def flags: Seq[String] = Seq(s"--phenotype=$phenotype", s"--ancestry=$ancestry")
+}
+
+object MakeSuSiEInput {
+  def fromString(output: String): MakeSuSiEInput = {
+    output.split("/").toSeq match {
+      case Seq(phenotype, ancestry) => MakeSuSiEInput(phenotype, ancestry)
+    }
+  }
+}
+
diff --git a/finemap/version.sbt b/finemap/version.sbt
new file mode 100644
index 00000000..e7654440
--- /dev/null
+++ b/finemap/version.sbt
@@ -0,0 +1 @@
+version in ThisBuild := "0.1.0"

From 8997070bfc635bc29eb6f8049540d46585920c1c Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Wed, 18 Dec 2024 13:40:33 -0500
Subject: [PATCH 12/13] update susie for finemap

---
 susie/src/main/scala/Finemap.scala     | 27 -------------
 susie/src/main/scala/MakeFinemap.scala | 54 --------------------------
 susie/src/main/scala/MakeSuSiE.scala   |  9 +++--
 3 files changed, 6 insertions(+), 84 deletions(-)
 delete mode 100644 susie/src/main/scala/Finemap.scala
 delete mode 100644 susie/src/main/scala/MakeFinemap.scala

diff --git a/susie/src/main/scala/Finemap.scala b/susie/src/main/scala/Finemap.scala
deleted file mode 100644
index ee8f5dd1..00000000
--- a/susie/src/main/scala/Finemap.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.broadinstitute.dig.aggregator.methods.susie
-
-import org.broadinstitute.dig.aggregator.core._
-import org.broadinstitute.dig.aws._
-import org.broadinstitute.dig.aws.emr._
-
-/** This is your aggregator method.
-  *
-  * All that needs to be done here is to implement the initStages function,
-  * which adds stages to the method in the order they should be executed.
-  *
-  * When you are ready to run it, use SBT from the CLI:
-  *
-  *   sbt run [args]
-  *
-  * See the README of the dig-aggregator-core project for a complete list of
-  * CLI arguments available.
-  */
-object Susie extends Method {
-
-  /** Add all stages used in this method here. Stages must be added in the
-    * order they should be serially executed.
-    */
-  override def initStages(implicit context: Context) = {
-    addStage(new MakeFinemap)
-  }
-}
diff --git a/susie/src/main/scala/MakeFinemap.scala b/susie/src/main/scala/MakeFinemap.scala
deleted file mode 100644
index 35a3d2e5..00000000
--- a/susie/src/main/scala/MakeFinemap.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-package org.broadinstitute.dig.aggregator.methods.susie
-
-import org.broadinstitute.dig.aggregator.core._
-import org.broadinstitute.dig.aws.emr._
-import org.broadinstitute.dig.aws.Ec2.Strategy
-import org.broadinstitute.dig.aws.MemorySize
-
-class MakeSuSiE(implicit context: Context) extends Stage {
-  import MemorySize.Implicits._
-
-  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-specific/*/ancestry=EU/")
-
-  /** Source inputs. */
-  override val sources: Seq[Input.Source] = Seq(ancestrySpecific)
-
-  /** Map inputs to their outputs. */
-  override val rules: PartialFunction[Input, Outputs] = {
-    case ancestrySpecific(phenotype) => Outputs.Named(s"$phenotype/EU")
-    // case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
-    // case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
-  }
-
-  /** Just need a single machine with no applications, but a good drive. */
-  override def cluster: ClusterDef = super.cluster.copy(
-    instances = 1,
-    applications = Seq.empty,
-    masterVolumeSizeInGB = 100,
-    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-finemap.sh")))
-  )
-
-  override def make(output: String): Job = {
-    val input = MakeSuSiEInput.fromString(output)
-    new Job(Job.Script(resourceUri("makeFinemap.py"), input.flags:_*))
-  }
-
-}
-  
-
-case class MakeSuSiEInput(
-  phenotype: String,
-  ancestry: String
-) {
-
-  def flags: Seq[String] = Seq(s"--phenotype=$phenotype", s"--ancestry=$ancestry")
-}
-
-object MakeSuSiEInput {
-  def fromString(output: String): MakeSuSiEInput = {
-    output.split("/").toSeq match {
-      case Seq(phenotype, ancestry) => MakeSuSiEInput(phenotype, ancestry)
-    }
-  }
-}
-
diff --git a/susie/src/main/scala/MakeSuSiE.scala b/susie/src/main/scala/MakeSuSiE.scala
index 1dacf3e6..27e7e236 100644
--- a/susie/src/main/scala/MakeSuSiE.scala
+++ b/susie/src/main/scala/MakeSuSiE.scala
@@ -8,7 +8,8 @@ import org.broadinstitute.dig.aws.MemorySize
 class MakeSuSiE(implicit context: Context) extends Stage {
   import MemorySize.Implicits._
 
-  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
+  // val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-specific/*/ancestry=EU/")
 
   /** Source inputs. */
   override val sources: Seq[Input.Source] = Seq(ancestrySpecific)
@@ -25,12 +26,14 @@ class MakeSuSiE(implicit context: Context) extends Stage {
     instances = 1,
     applications = Seq.empty,
     masterVolumeSizeInGB = 100,
-    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-susie.sh")))
+    // bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-susie.sh")))
+    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-finemap.sh")))
   )
 
   override def make(output: String): Job = {
     val input = MakeSuSiEInput.fromString(output)
-    new Job(Job.Script(resourceUri("makeSuSiE.py"), input.flags:_*))
+    // new Job(Job.Script(resourceUri("makeSuSiE.py"), input.flags:_*))
+    new Job(Job.Script(resourceUri("makeFinemap.py"), input.flags:_*))
   }
 
 }

From 7d5b241f93162ef5da3104ef94e7bf6a3addee20 Mon Sep 17 00:00:00 2001
From: szandavi <sm.zandavi@gmail.com>
Date: Wed, 18 Dec 2024 15:54:32 -0500
Subject: [PATCH 13/13] back susie to normal

---
 susie/src/main/resources/install-finemap.sh | 72 ---------------------
 susie/src/main/resources/makeFinemap.py     | 67 -------------------
 susie/src/main/scala/MakeSuSiE.scala        |  9 +--
 3 files changed, 3 insertions(+), 145 deletions(-)
 delete mode 100644 susie/src/main/resources/install-finemap.sh
 delete mode 100644 susie/src/main/resources/makeFinemap.py

diff --git a/susie/src/main/resources/install-finemap.sh b/susie/src/main/resources/install-finemap.sh
deleted file mode 100644
index ea20dbcb..00000000
--- a/susie/src/main/resources/install-finemap.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash -xe
-
-# susie method
-## Developed with python 3 and R
-
-finemap_ROOT=/mnt/var/cojo
-
-# install to the root directory
-sudo mkdir -p "$finemap_ROOT"
-cd "$finemap_ROOT"
-
-# install yum dependencies
-sudo yum install -y python3-devel
-
-
-# Install conda
-cd $finemap_ROOT
-wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-bash miniconda.sh -b -p $finemap_ROOT/miniconda
-echo export PATH="$finemap_ROOT/miniconda/bin:\$PATH" >> ~/.profile
-. ~/.profile
-
-# Install GCTA
-cd $finemap_ROOT
-mkdir -p ~/software/gcta
-cd ~/software/gcta
-# Note that this URL may change - old versions aren't accessible at the same URL
-wget https://cnsgenomics.com/software/gcta/bin/gcta_1.93.2beta.zip
-unzip gcta_1.93.2beta.zip
-cd gcta_1.93.2beta
-echo export PATH="$PWD:\$PATH" >> ~/.profile
-. ~/.profile
-
-# Install plink
-mkdir -p ~/software/plink
-cd ~/software/plink
-wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20201019.zip
-unzip plink_linux_x86_64_20201019.zip
-echo export PATH="$PWD:\$PATH" >> ~/.profile
-. ~/.profile
-
-# Install FINEMAP
-mkdir -p ~/software/finemap
-cd ~/software/finemap
-wget http://www.christianbenner.com/finemap_v1.4_x86_64.tgz
-tar -zxf finemap_v1.4_x86_64.tgz
-ln -s finemap_v1.4_x86_64/finemap_v1.4_x86_64 finemap
-sudo apt-get install libgomp1 # Not present by default it seems
-echo export PATH="$PWD:\$PATH" >> ~/.profile
-. ~/.profile
-
-# Install JRE
-sudo apt install -yf openjdk-8-jre-headless openjdk-8-jdk
-# sudo update-java-alternatives --list
-# sudo update-java-alternatives --set java-1.8.0-openjdk-amd64
-
-# Install parallel
-sudo apt install -yf parallel
-
-echo COMPLETE
-
-
-# pull down LD bfiles
-sudo mkdir -p ./bfiles
-sudo aws s3 cp s3://dig-analysis-bin/cojo/bfiles/ ./bfiles/ --recursive
-
-# pull down finemap dir
-sudo mkdir -p ./finemapping
-sudo aws s3 cp s3://dig-analysis-bin/cojo/finemapping/ ./finemapping/ --recursive
-
-# fetch snps for mapping
-sudo aws s3 cp "s3://dig-analysis-bin/snps/dbSNP_common_GRCh37.csv" ./snps.csv
diff --git a/susie/src/main/resources/makeFinemap.py b/susie/src/main/resources/makeFinemap.py
deleted file mode 100644
index fa81d77b..00000000
--- a/susie/src/main/resources/makeFinemap.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/python3
-from optparse import OptionParser
-import pandas as pd
-import numpy as np
-import shutil
-import subprocess
-import os
-
-s3_in=os.environ['INPUT_PATH']
-s3_out=os.environ['OUTPUT_PATH']
-
-# def finds json files in the directory
-def make_json_files(directory):
-	subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
-	subprocess.run('zstdcat input/*.json.zst | jq -s '.' > input/input.json', shell=True)
-
-def safe_remove(file_path):
-    try:
-        os.remove(file_path)
-        print(f"File {file_path} successfully removed.")
-    except FileNotFoundError:
-        print(f"File {file_path} does not exist.")
-    except PermissionError:
-        print(f"Permission denied: cannot remove {file_path}.")
-    except Exception as e:
-        print(f"An error occurred while trying to remove {file_path}: {e}")
-
-def main():
-	usage = "usage: %prog [options]"
-	parser = OptionParser(usage)
-	parser.add_option("", "--phenotype", default=None)
-	parser.add_option("", "--ancestry", default=None)
-
-	(args,_) = parser.parse_args()
-
-	pheno_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-specific/{phenotype}/ancestry={ancestry}/'
-	var2rs_path = '/mnt/var/cojo/snps.csv'
-	bfiles = '/mnt/var/cojo/bfiles'
-	finemap_dir = '/mnt/var/cojo/finemapping'
-	config_file = '/mnt/var/cojo/finemapping/analysis.config.yaml'
-	out_path = f'{s3_out}/out/cojo/staging/{args.phenotype}/ancestry={args.ancestry}' 
-
-	# read all files in the clump path
-	make_json_files(pheno_path)
-
-	# create the tmp out directory
-	out_directory = 'data'
-	if not os.path.exists(out_directory):
-		os.makedirs(out_directory, exist_ok=True)
-
-	subprocess.call(['bash', '/mnt/var/cojo/finemapping/run_finemap_pipeline.sh', 
-					'--input','input'
-					'--bfiles', bfiles,
-					'--config_file',config_file,
-					'--dbsnp_file',var2rs_path,
-					'--output', out_directory,
-					'--finemap_dir',finemap_dir
-					])
-	
-	subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])	
-	subprocess.check_call(['aws', 's3', 'cp', f'{out_directory}/', out_path, '--recursive'])
-	safe_remove('input/input.json')
-	shutil.rmtree('input')
-	shutil.rmtree(out_directory)
-
-if __name__ == '__main__':
-	main()
diff --git a/susie/src/main/scala/MakeSuSiE.scala b/susie/src/main/scala/MakeSuSiE.scala
index 27e7e236..1dacf3e6 100644
--- a/susie/src/main/scala/MakeSuSiE.scala
+++ b/susie/src/main/scala/MakeSuSiE.scala
@@ -8,8 +8,7 @@ import org.broadinstitute.dig.aws.MemorySize
 class MakeSuSiE(implicit context: Context) extends Stage {
   import MemorySize.Implicits._
 
-  // val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
-  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-specific/*/ancestry=EU/")
+  val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-clumped/*/ancestry=EU/")
 
   /** Source inputs. */
   override val sources: Seq[Input.Source] = Seq(ancestrySpecific)
@@ -26,14 +25,12 @@ class MakeSuSiE(implicit context: Context) extends Stage {
     instances = 1,
     applications = Seq.empty,
     masterVolumeSizeInGB = 100,
-    // bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-susie.sh")))
-    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-finemap.sh")))
+    bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-susie.sh")))
   )
 
   override def make(output: String): Job = {
     val input = MakeSuSiEInput.fromString(output)
-    // new Job(Job.Script(resourceUri("makeSuSiE.py"), input.flags:_*))
-    new Job(Job.Script(resourceUri("makeFinemap.py"), input.flags:_*))
+    new Job(Job.Script(resourceUri("makeSuSiE.py"), input.flags:_*))
   }
 
 }