Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions finemap/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
root = true

[*]
insert_final_newline = true

[*.java]
indent_style = space
indent_size = 4
trim_trailing_whitespace = true

[*.{scala,sbt}]
indent_style = space
indent_size = 2
trim_trailing_whitespace = true
4 changes: 4 additions & 0 deletions finemap/.scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
version = "2.4.2"
align=more
docstrings=ScalaDoc
maxColumn=120
29 changes: 29 additions & 0 deletions finemap/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Copyright 2020 <COPYRIGHT HOLDER>

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
14 changes: 14 additions & 0 deletions finemap/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# finemap

This is the documentation about the method.

Please put some details here about the method, what its inputs are, what its
outputs are, where it reads from, and where it writes to.

## Stages

These are the stages of finemap.

### FinemapStage

A description of what this stage does.
70 changes: 70 additions & 0 deletions finemap/built.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
val Versions = new {
val Aggregator = "0.3.1-SNAPSHOT"
val Scala = "2.13.2"
}

// set the version of scala to compile with
scalaVersion := Versions.Scala

// add scala compile flags
scalacOptions ++= Seq(
"-feature",
"-deprecation",
"-unchecked",
"-Ywarn-value-discard"
)

// add required libraries
libraryDependencies ++= Seq(
"org.broadinstitute.dig" %% "dig-aggregator-core" % Versions.Aggregator
)

// set the oranization this method belongs to
organization := "org.broadinstitute.dig"

// entry point when running this method
mainClass := Some("org.broadinstitute.dig.aggregator.methods.finemap.Finemap")

// enables buildInfo, which bakes git version info into the jar
enablePlugins(GitVersioning)

// get the buildInfo task
val buildInfoTask = taskKey[Seq[File]]("buildInfo")

// define execution code for task
buildInfoTask := {
val file = (resourceManaged in Compile).value / "version.properties"

// log where the properties will be written to
streams.value.log.info(s"Writing version info to $file...")

// collect git versioning information
val branch = git.gitCurrentBranch.value
val lastCommit = git.gitHeadCommit.value
val describedVersion = git.gitDescribedVersion.value
val anyUncommittedChanges = git.gitUncommittedChanges.value
val remoteUrl = (scmInfo in ThisBuild).value.map(_.browseUrl.toString)
val buildDate = java.time.Instant.now

// map properties
val properties = Map[String, String](
"branch" -> branch,
"lastCommit" -> lastCommit.getOrElse(""),
"remoteUrl" -> remoteUrl.getOrElse(""),
"uncommittedChanges" -> anyUncommittedChanges.toString,
"buildDate" -> buildDate.toString
)

// build properties content
val contents = properties.toList.map {
case (key, value) if value.length > 0 => s"$key=$value"
case _ => ""
}

// output the version information from git to versionInfo.properties
IO.write(file, contents.mkString("\n"))
Seq(file)
}

// add the build info task output to resources
(resourceGenerators in Compile) += buildInfoTask.taskValue
1 change: 1 addition & 0 deletions finemap/project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=1.5.0
1 change: 1 addition & 0 deletions finemap/project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
76 changes: 76 additions & 0 deletions finemap/src/main/resources/install-finemap.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash -xe

# susie method
## Developed with python 3 and R

finemap_ROOT=/mnt/var/cojo

# install to the root directory
sudo mkdir -p "$finemap_ROOT"
cd "$finemap_ROOT"

# install yum dependencies
sudo yum install -y python3-devel


# Install conda
cd $finemap_ROOT
sudo wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
sudo bash miniconda.sh -b -p $finemap_ROOT/miniconda
echo export PATH="$finemap_ROOT/miniconda/bin:\$PATH" >> ~/.profile
. ~/.profile

# Install GCTA
cd $finemap_ROOT
sudo mkdir -p ~/software/gcta
cd ~/software/gcta
# Note that this URL may change - old versions aren't accessible at the same URL
sudo wget https://cnsgenomics.com/software/gcta/bin/gcta_1.93.2beta.zip
sudo unzip gcta_1.93.2beta.zip
cd gcta_1.93.2beta
echo export PATH="$PWD:\$PATH" >> ~/.profile
. ~/.profile

# Install plink
sudo mkdir -p ~/software/plink
cd ~/software/plink
sudo wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20201019.zip
sudo unzip plink_linux_x86_64_20201019.zip
echo export PATH="$PWD:\$PATH" >> ~/.profile
. ~/.profile

# Install FINEMAP
sudo mkdir -p ~/software/finemap
cd ~/software/finemap
sudo wget http://www.christianbenner.com/finemap_v1.4_x86_64.tgz
sudo tar -zxf finemap_v1.4_x86_64.tgz
sudo ln -s finemap_v1.4_x86_64/finemap_v1.4_x86_64 finemap
sudo apt-get install libgomp1 # Not present by default it seems
echo export PATH="$PWD:\$PATH" >> ~/.profile
. ~/.profile

# Install JRE
sudo apt install -yf openjdk-8-jre-headless openjdk-8-jdk
# sudo update-java-alternatives --list
# sudo update-java-alternatives --set java-1.8.0-openjdk-amd64

# Install parallel
sudo apt install -yf parallel

echo COMPLETE


# pull down LD bfiles
sudo mkdir -p ./bfiles
sudo aws s3 cp s3://dig-analysis-bin/cojo/bfiles/ ./bfiles/ --recursive

# pull down finemap dir
sudo mkdir -p ./finemapping
sudo aws s3 cp s3://dig-analysis-bin/cojo/finemapping/ ./finemapping/ --recursive

sudo chmod 777 ./finemapping/combine_results.sh
sudo chmod 777 ./finemapping/run_finemap_pipeline.sh


# fetch snps for mapping
sudo aws s3 cp "s3://dig-analysis-bin/snps/dbSNP_common_GRCh37.csv" ./snps.csv
67 changes: 67 additions & 0 deletions finemap/src/main/resources/makeFinemap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/python3
from optparse import OptionParser
import pandas as pd
import numpy as np
import shutil
import subprocess
import os

s3_in=os.environ['INPUT_PATH']
s3_out=os.environ['OUTPUT_PATH']

# def finds json files in the directory
def make_json_files(directory):
subprocess.check_call(['aws', 's3', 'cp', directory, 'input/', '--recursive'])
subprocess.run('zstdcat input/*.json.zst | jq -s '.' > input/input.json', shell=True)

def safe_remove(file_path):
try:
os.remove(file_path)
print(f"File {file_path} successfully removed.")
except FileNotFoundError:
print(f"File {file_path} does not exist.")
except PermissionError:
print(f"Permission denied: cannot remove {file_path}.")
except Exception as e:
print(f"An error occurred while trying to remove {file_path}: {e}")

def main():
usage = "usage: %prog [options]"
parser = OptionParser(usage)
parser.add_option("", "--phenotype", default=None)
parser.add_option("", "--ancestry", default=None)

(args,_) = parser.parse_args()

pheno_path = f'{s3_in}/out/metaanalysis/bottom-line/ancestry-specific/{phenotype}/ancestry={ancestry}/'
var2rs_path = '/mnt/var/cojo/snps.csv'
bfiles = '/mnt/var/cojo/bfiles'
finemap_dir = '/mnt/var/cojo/finemapping'
config_file = '/mnt/var/cojo/finemapping/analysis.config.yaml'
out_path = f'{s3_out}/out/cojo/staging/{args.phenotype}/ancestry={args.ancestry}'

# read all files in the clump path
make_json_files(pheno_path)

# create the tmp out directory
out_directory = 'data'
if not os.path.exists(out_directory):
os.makedirs(out_directory, exist_ok=True)

subprocess.call(['bash', '/mnt/var/cojo/finemapping/run_finemap_pipeline.sh',
'--input','input'
'--bfiles', bfiles,
'--config_file',config_file,
'--dbsnp_file',var2rs_path,
'--output', out_directory,
'--finemap_dir',finemap_dir
])

subprocess.check_call(['touch', f'{out_directory}/_SUCCESS'])
subprocess.check_call(['aws', 's3', 'cp', f'{out_directory}/', out_path, '--recursive'])
safe_remove('input/input.json')
shutil.rmtree('input')
shutil.rmtree(out_directory)

if __name__ == '__main__':
main()
27 changes: 27 additions & 0 deletions finemap/src/main/scala/Finemap.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.broadinstitute.dig.aggregator.methods.susie

import org.broadinstitute.dig.aggregator.core._
import org.broadinstitute.dig.aws._
import org.broadinstitute.dig.aws.emr._

/** This is your aggregator method.
*
* All that needs to be done here is to implement the initStages function,
* which adds stages to the method in the order they should be executed.
*
* When you are ready to run it, use SBT from the CLI:
*
* sbt run [args]
*
* See the README of the dig-aggregator-core project for a complete list of
* CLI arguments available.
*/
object Susie extends Method {

/** Add all stages used in this method here. Stages must be added in the
* order they should be serially executed.
*/
override def initStages(implicit context: Context) = {
addStage(new MakeFinemap)
}
}
54 changes: 54 additions & 0 deletions finemap/src/main/scala/MakeFinemap.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.broadinstitute.dig.aggregator.methods.susie

import org.broadinstitute.dig.aggregator.core._
import org.broadinstitute.dig.aws.emr._
import org.broadinstitute.dig.aws.Ec2.Strategy
import org.broadinstitute.dig.aws.MemorySize

class MakeSuSiE(implicit context: Context) extends Stage {
import MemorySize.Implicits._

val ancestrySpecific: Input.Source = Input.Source.Success("out/metaanalysis/bottom-line/ancestry-specific/*/ancestry=EU/")

/** Source inputs. */
override val sources: Seq[Input.Source] = Seq(ancestrySpecific)

/** Map inputs to their outputs. */
override val rules: PartialFunction[Input, Outputs] = {
case ancestrySpecific(phenotype) => Outputs.Named(s"$phenotype/EU")
// case ancestrySpecific(phenotype, ancestry) => Outputs.Named(s"$phenotype/${ancestry.split('=').last}")
// case mixedDatasets(_, _, phenotype) => Outputs.Named(s"$phenotype/Mixed")
}

/** Just need a single machine with no applications, but a good drive. */
override def cluster: ClusterDef = super.cluster.copy(
instances = 1,
applications = Seq.empty,
masterVolumeSizeInGB = 100,
bootstrapScripts = Seq(new BootstrapScript(resourceUri("install-finemap.sh")))
)

override def make(output: String): Job = {
val input = MakeSuSiEInput.fromString(output)
new Job(Job.Script(resourceUri("makeFinemap.py"), input.flags:_*))
}

}


case class MakeSuSiEInput(
phenotype: String,
ancestry: String
) {

def flags: Seq[String] = Seq(s"--phenotype=$phenotype", s"--ancestry=$ancestry")
}

object MakeSuSiEInput {
def fromString(output: String): MakeSuSiEInput = {
output.split("/").toSeq match {
case Seq(phenotype, ancestry) => MakeSuSiEInput(phenotype, ancestry)
}
}
}

1 change: 1 addition & 0 deletions finemap/version.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
version in ThisBuild := "0.1.0"
Loading
Loading