Skip to content

Commit c806366

Browse files
authored
Merge pull request sa501428#27 from sa501428/b7
Update for anchors
2 parents e3b61dd + e83b09a commit c806366

File tree

7 files changed

+434
-25
lines changed

7 files changed

+434
-25
lines changed

src/cli/Main.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
public class Main {
1414

15-
public static final String VERSION_NUM = "0.98.0";
15+
public static final String VERSION_NUM = "0.102.0";
1616
public static boolean printVerboseComments = false;
1717

1818
public static void printGeneralUsageAndExit(int exitCode, String cUsage) {
@@ -27,7 +27,7 @@ public static void printGeneralUsageAndExit(int exitCode, String cUsage) {
2727
GenerateBedpe.usage, Split.usage, IntersectBedpe.usage, FilterBedpe.usage,
2828
Pinpoint.usage, Sieve.usage, SimplePeak.usage, SimpleMax.usage, UnWrap.usage,
2929
Flags.usage, Sift.usage, NormHack.usage, Recap.usage, HotSpot.usage,
30-
AnchorAPA.usage, Expand.usage, Clique.usage}) {
30+
AnchorAPA.usage, Expand.usage, Clique.usage, AnchorFix.usage}) {
3131
System.out.println("\t" + usage + "\n\n");
3232
}
3333
} else {
@@ -61,6 +61,8 @@ public static void main(String[] argv) throws CmdLineParser.UnknownOptionExcepti
6161
Enhance.run(args, parser);
6262
} else if (command.equals("pinpoint")) {
6363
Pinpoint.run(args, parser);
64+
} else if (command.startsWith("anchor-fix")) {
65+
AnchorFix.run(args, parser, command);
6466
} else if (command.startsWith("clique")) {
6567
Clique.run(args, parser, command);
6668
} else if (command.startsWith("clean")) {

src/cli/clt/bedpe/AnchorFix.java

+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
package cli.clt.bedpe;
2+
3+
import cli.Main;
4+
import cli.clt.CommandLineParser;
5+
import cli.utils.clique.Node95;
6+
import cli.utils.clique.SimpleClustering;
7+
import cli.utils.peaks.Point1D;
8+
import javastraw.feature2D.Feature2D;
9+
import javastraw.feature2D.Feature2DList;
10+
import javastraw.feature2D.Feature2DParser;
11+
import javastraw.reader.basics.Chromosome;
12+
import javastraw.reader.basics.ChromosomeHandler;
13+
import javastraw.reader.basics.ChromosomeTools;
14+
import org.apache.commons.math3.ml.clustering.Cluster;
15+
import org.apache.commons.math3.ml.clustering.DBSCANClusterer;
16+
17+
import java.io.File;
18+
import java.util.*;
19+
20+
public class AnchorFix {
21+
22+
private static final int widthToConnect = 2;
23+
public static String usage = "anchor-fix[-clean] [-r resolution] <genomeID> <input.bedpe> <output.stem>\n" +
24+
"\t\tdefault behavior will fix the hi-res shared anchors for loops\n" +
25+
"\t\tclean avoids saving old attributes";
26+
private static int resolution = 100;
27+
public static int MAX_DIST = 250;
28+
public static int CLUSTER_DIST = 100;
29+
30+
public static void run(String[] args, CommandLineParser parser, String command) {
31+
if (args.length != 4) {
32+
Main.printGeneralUsageAndExit(57, usage);
33+
}
34+
resolution = parser.getResolutionOption(resolution);
35+
String genomeID = args[1];
36+
String inFile = args[2];
37+
String outStem = args[3];
38+
fixAnchors(inFile, genomeID, outStem, command.contains("clean"));
39+
}
40+
41+
42+
private static void fixAnchors(String inputBedpe, String genomeID, String outStem, boolean noAttributes) {
43+
Feature2DList output = new Feature2DList();
44+
ChromosomeHandler handler = ChromosomeTools.loadChromosomes(genomeID);
45+
Feature2DList loopList = Feature2DParser.loadFeatures(inputBedpe, handler, !noAttributes,
46+
null, false);
47+
for (Chromosome chrom : handler.getChromosomeArrayWithoutAllByAll()) {
48+
if (Main.printVerboseComments) System.out.println("Processing " + chrom.getName());
49+
List<Feature2D> loops = loopList.get(chrom.getIndex(), chrom.getIndex());
50+
if (loops.size() > 0) {
51+
if (true) System.out.println("Processing " + chrom.getName());
52+
List<Feature2D> newLoops = recoverLoops(loops);
53+
output.addByKey(Feature2DList.getKey(chrom, chrom), newLoops);
54+
}
55+
}
56+
57+
output.exportFeatureList(new File(outStem + ".fixed.anchors.bedpe"), false, Feature2DList.ListFormat.NA);
58+
}
59+
60+
private static List<Feature2D> recoverLoops(List<Feature2D> loops) {
61+
List<Long> upStreamAnchorBins = getAllOfFeature(loops, "localX");
62+
List<Long> downStreamAnchorBins = getAllOfFeature(loops, "localY");
63+
List<Long> allAnchorBins = new ArrayList<>(2 * loops.size());
64+
allAnchorBins.addAll(upStreamAnchorBins);
65+
allAnchorBins.addAll(downStreamAnchorBins);
66+
67+
List<Node95> upStreamNodes = getNodes(upStreamAnchorBins);
68+
List<Node95> downStreamNodes = getNodes(downStreamAnchorBins);
69+
List<Node95> allNodes = getNodes(allAnchorBins);
70+
71+
return fixedList(loops, upStreamNodes, downStreamNodes, allNodes);
72+
}
73+
74+
private static List<Feature2D> fixedList(List<Feature2D> loops, List<Node95> upStreamNodes,
75+
List<Node95> downStreamNodes, List<Node95> allNodes) {
76+
Map<Long, Node95> upStreamBinToNode = buildPositionToNodeMapping(upStreamNodes);
77+
Map<Long, Node95> downStreamBinToNode = buildPositionToNodeMapping(downStreamNodes);
78+
Map<Long, Node95> anyBinToNode = buildPositionToNodeMapping(allNodes);
79+
80+
for (Feature2D loop : loops) {
81+
long x = Long.parseLong(loop.getAttribute("localX"));
82+
long y = Long.parseLong(loop.getAttribute("localY"));
83+
Node95 xNodeUp = upStreamBinToNode.get(x);
84+
Node95 yNodeDown = downStreamBinToNode.get(y);
85+
Node95 xNodeAny = anyBinToNode.get(x);
86+
Node95 yNodeAny = anyBinToNode.get(y);
87+
if (xNodeUp == null || yNodeDown == null || xNodeAny == null || yNodeAny == null) {
88+
System.err.println("Error: null node : " + x + " " + y + "\n " + xNodeUp + "\n" + yNodeDown + "\n" + xNodeAny + "\n" + yNodeAny);
89+
}
90+
setAttributes(loop, xNodeUp, yNodeDown, xNodeAny, yNodeAny);
91+
}
92+
return loops;
93+
}
94+
95+
private static void setAttributes(Feature2D loop, Node95 xNodeUp, Node95 yNodeDown,
96+
Node95 xNodeAny, Node95 yNodeAny) {
97+
if (xNodeUp == null || yNodeDown == null || xNodeAny == null || yNodeAny == null) {
98+
//System.err.println("Error: null node\n "+xNodeUp+"\n"+yNodeDown+"\n"+xNodeAny+"\n"+yNodeAny);
99+
} else {
100+
setAttributes(loop, xNodeUp, "upstream_start_1", "upstream_end_1");
101+
setAttributes(loop, yNodeDown, "downstream_start_2", "downstream_end_2");
102+
setAttributes(loop, xNodeAny, "highRes_start_1", "highRes_end_1");
103+
setAttributes(loop, yNodeAny, "highRes_start_2", "highRes_end_2");
104+
}
105+
}
106+
107+
private static void setAttributes(Feature2D loop, Node95 node, String startName, String endName) {
108+
if (node == null) {
109+
System.err.println("Node is null _ " + startName + " - " + endName);
110+
}
111+
long[] bounds = node.getBounds95();
112+
loop.setAttribute(startName, String.valueOf(bounds[0]));
113+
loop.setAttribute(endName, String.valueOf(bounds[1]));
114+
}
115+
116+
117+
private static List<Long> getAllOfFeature(List<Feature2D> loops, String attribute) {
118+
List<Long> positions = new ArrayList<>(loops.size());
119+
for (Feature2D feature : loops) {
120+
positions.add(Long.parseLong(feature.getAttribute(attribute)));
121+
}
122+
return positions;
123+
}
124+
125+
126+
public static List<Node95> getNodes(List<Long> genomePositions) {
127+
List<List<Long>> clusters = SimpleClustering.cluster(genomePositions, 100);
128+
List<Long> pointsToReassign = new ArrayList<>(clusters.size() / 2);
129+
List<List<Long>> clustersToKeep = new ArrayList<>(clusters.size() / 2);
130+
for (List<Long> cluster : clusters) {
131+
if (cluster.size() > 1) {
132+
clustersToKeep.add(cluster);
133+
} else {
134+
pointsToReassign.add(cluster.get(0));
135+
}
136+
}
137+
138+
List<Node95> cleanedUpClusters = assignToNearestClusterOrMakeSingleton(clustersToKeep, pointsToReassign);
139+
clustersToKeep.clear();
140+
pointsToReassign.clear();
141+
return cleanedUpClusters;
142+
}
143+
144+
private static List<Node95> assignToNearestClusterOrMakeSingleton(List<List<Long>> clustersToKeep, List<Long> pointsToReassign) {
145+
List<Node95> nodes = Node95.convert(clustersToKeep);
146+
for (Node95 node : nodes) {
147+
node.calculate95();
148+
}
149+
for (long point : pointsToReassign) {
150+
Node95 nearest = getNearestNode(point, nodes, MAX_DIST);
151+
if (nearest == null) {
152+
nodes.add(new Node95(point, true));
153+
} else {
154+
nearest.addWeak(point);
155+
}
156+
}
157+
return nodes;
158+
}
159+
160+
private static Node95 getNearestNode(long point, List<Node95> nodes, int maxDist) {
161+
double currDist = Double.MAX_VALUE;
162+
Node95 nearest = null;
163+
for (Node95 node : nodes) {
164+
double dist = Math.abs(node.getMu() - point);
165+
if (dist < currDist) {
166+
currDist = dist;
167+
nearest = node;
168+
}
169+
}
170+
if (currDist < maxDist) {
171+
return nearest;
172+
}
173+
return null;
174+
}
175+
176+
private static List<Cluster<Point1D>> dbscan1D(List<Long> points) {
177+
DBSCANClusterer<Point1D> dbscan = new DBSCANClusterer<>(CLUSTER_DIST, 1);
178+
return dbscan.cluster(convert(points));
179+
}
180+
181+
private static Collection<Point1D> convert(List<Long> points) {
182+
List<Point1D> converted = new ArrayList<>(points.size());
183+
for (Long point : points) {
184+
converted.add(new Point1D(point));
185+
}
186+
return converted;
187+
}
188+
189+
private static int[] getCounts(Map<Integer, List<Long>> counts, int maxBin) {
190+
int[] countsTrack = new int[maxBin];
191+
for (int i = 0; i < maxBin; i++) {
192+
if (counts.containsKey(i)) {
193+
countsTrack[i] = counts.get(i).size();
194+
}
195+
}
196+
return countsTrack;
197+
}
198+
199+
public static Map<Long, Node95> buildPositionToNodeMapping(List<Node95> nodes) {
200+
Map<Long, Node95> mapping = new HashMap<>();
201+
for (Node95 node : nodes) {
202+
for (long val : node.getPositions()) {
203+
mapping.put(val, node);
204+
}
205+
for (long val : node.getWeakPositions()) {
206+
mapping.put(val, node);
207+
}
208+
}
209+
return mapping;
210+
}
211+
}

src/cli/clt/bedpe/Clique.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
public class Clique {
2424

2525
// [-rescue]
26-
public static String usage = "clique[-clean][-rescue] [-r resolution] <genomeID> <input.bedpe> <output.stem>\n" +
27-
"\t\tdefault behavior finds the cliques using midpoints of the anchors at the resolution specified\n" +
26+
public static String usage = "clique[-rescue][-split][-clean] [-r resolution] <genomeID> <input.bedpe> <output.stem>\n" +
27+
"\t\tsplit will partition the loops into cliques using midpoints of the anchors at the resolution specified\n" +
2828
"\t\trescue will predict loops that were potentially missed\n" +
2929
"\t\tclean avoids saving old attributes";
3030
private static int resolution = 200;
@@ -40,7 +40,7 @@ public static void run(String[] args, CommandLineParser parser, String command)
4040
if (command.contains("rescue")) {
4141
rescueLoops(inFile, genomeID, outStem, command.contains("clean"));
4242
System.out.println("clique rescue complete");
43-
} else {
43+
} else if (command.contains("split")) {
4444
splitFilesIntoCliques(inFile, genomeID, outStem, command.contains("clean"));
4545
}
4646
}

src/cli/clt/bedpe/UnWrap.java

+61-20
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
public class UnWrap {
2020

21-
public static String usage = "unwrap[-filter][-legacy] [-r resolution] <genomeID> <loops.bedpe> <output_stem_>\n" +
21+
public static String usage = "unwrap[2][-filter][-legacy] [-r resolution] <genomeID> <loops.bedpe> <output_stem_>\n" +
2222
"\t\tunwrap localizer output to proper hi-res inverted bounds list\n" +
2323
"\t\tlegacy mode will use the old method of unwrapping the combined anchors\n";
2424
private static final int numLists = 6;
@@ -38,23 +38,52 @@ public static void run(String[] args, CommandLineParser parser, String command)
3838
boolean doFilter = command.contains("filter");
3939
boolean useLegacy = command.contains("legacy");
4040

41-
Feature2DList[] invertedLists = unwrap(loopList, doFilter, useLegacy);
42-
invertedLists[0].exportFeatureList(new File(outFile + "unwrapped.anchors.bedpe"), false, Feature2DList.ListFormat.NA);
43-
invertedLists[1].exportFeatureList(new File(outFile + "unwrapped.best.anchors.bedpe"), false, Feature2DList.ListFormat.NA);
44-
invertedLists[2].exportFeatureList(new File(outFile + "unwrapped.local.bedpe"), false, Feature2DList.ListFormat.NA);
41+
if (command.contains("2")) {
42+
Feature2DList invertedList = unwrap2(loopList);
43+
invertedList.exportFeatureList(new File(outFile + "unwrapped2.local.bedpe"), false, Feature2DList.ListFormat.NA);
44+
} else {
45+
Feature2DList[] invertedLists = unwrap(loopList, doFilter, useLegacy);
46+
invertedLists[0].exportFeatureList(new File(outFile + "unwrapped.anchors.bedpe"), false, Feature2DList.ListFormat.NA);
47+
invertedLists[1].exportFeatureList(new File(outFile + "unwrapped.best.anchors.bedpe"), false, Feature2DList.ListFormat.NA);
48+
invertedLists[2].exportFeatureList(new File(outFile + "unwrapped.local.bedpe"), false, Feature2DList.ListFormat.NA);
4549

46-
if (doFilter) {
47-
invertedLists[3].exportFeatureList(new File(outFile + "optimal.bedpe"), false, Feature2DList.ListFormat.NA);
48-
invertedLists[4].exportFeatureList(new File(outFile + "suboptimal.anchor.bedpe"), false, Feature2DList.ListFormat.NA);
49-
invertedLists[5].exportFeatureList(new File(outFile + "suboptimal.local.bedpe"), false, Feature2DList.ListFormat.NA);
50+
if (doFilter) {
51+
invertedLists[3].exportFeatureList(new File(outFile + "optimal.bedpe"), false, Feature2DList.ListFormat.NA);
52+
invertedLists[4].exportFeatureList(new File(outFile + "suboptimal.anchor.bedpe"), false, Feature2DList.ListFormat.NA);
53+
invertedLists[5].exportFeatureList(new File(outFile + "suboptimal.local.bedpe"), false, Feature2DList.ListFormat.NA);
54+
}
5055
}
5156
}
5257

53-
private static Feature2DList[] unwrap(Feature2DList loopList, boolean doFilter, boolean useLegacy) {
54-
Feature2DList[] unwrapped = new Feature2DList[numLists];
55-
for (int k = 0; k < unwrapped.length; k++) {
56-
unwrapped[k] = new Feature2DList();
58+
private static Feature2DList unwrap2(Feature2DList loopList) {
59+
Feature2DList unwrapped = new Feature2DList();
60+
61+
loopList.processLists((s, list) -> {
62+
List<Feature2D> invLocalList = new ArrayList<>(list.size() / 2);
63+
64+
for (Feature2D feature2D : list) {
65+
Feature2D inv = unwrapLocal(feature2D);
66+
if (inv != null) {
67+
if (localXYNotNearDiagonal(inv, 5000)) {
68+
invLocalList.add(inv);
69+
}
70+
}
71+
}
72+
unwrapped.addByKey(s, invLocalList);
73+
});
74+
return unwrapped;
75+
}
76+
77+
private static Feature2DList[] initializeF2DArray(int n) {
78+
Feature2DList[] array = new Feature2DList[n];
79+
for (int k = 0; k < array.length; k++) {
80+
array[k] = new Feature2DList();
5781
}
82+
return array;
83+
}
84+
85+
private static Feature2DList[] unwrap(Feature2DList loopList, boolean doFilter, boolean useLegacy) {
86+
Feature2DList[] unwrapped = initializeF2DArray(numLists);
5887

5988
loopList.processLists((s, list) -> {
6089
List<Feature2D> invAnchorsList = new ArrayList<>(list.size() / 2);
@@ -107,14 +136,26 @@ private static boolean containsLocalAndNotOnDiagonal(Feature2D feature2D) {
107136
return false;
108137
}
109138

139+
private static boolean localXYNotNearDiagonal(Feature2D loop, int minDist) {
140+
long x = Long.parseLong(loop.getAttribute("localX"));
141+
long y = Long.parseLong(loop.getAttribute("localY"));
142+
int dist = (int) Math.min(Math.abs(x - y),
143+
Math.abs(loop.getMidPt1() - loop.getMidPt2()));
144+
return dist > minDist;
145+
}
146+
110147
private static Feature2D unwrapLocal(Feature2D feature2D) {
111-
long start1 = Long.parseLong(feature2D.getAttribute("localX"));
112-
long start2 = Long.parseLong(feature2D.getAttribute("localY"));
113-
long end1 = start1 + resolution;
114-
long end2 = start2 + resolution;
115-
Map<String, String> attrs = new HashMap<>(feature2D.getAttributes());
116-
return new Feature2D(Feature2D.FeatureType.PEAK, feature2D.getChr1(), start1, end1,
117-
feature2D.getChr2(), start2, end2, Color.BLUE, attrs);
148+
try {
149+
long start1 = Long.parseLong(feature2D.getAttribute("localX"));
150+
long start2 = Long.parseLong(feature2D.getAttribute("localY"));
151+
long end1 = start1 + resolution;
152+
long end2 = start2 + resolution;
153+
Map<String, String> attrs = new HashMap<>(feature2D.getAttributes());
154+
return new Feature2D(Feature2D.FeatureType.PEAK, feature2D.getChr1(), start1, end1,
155+
feature2D.getChr2(), start2, end2, Color.BLUE, attrs);
156+
} catch (Exception ignored) {
157+
}
158+
return null;
118159
}
119160

120161
private static int getDistanceBetweenAnchorsAndLocal(Feature2D loop) {

0 commit comments

Comments
 (0)