Skip to content

[WIP] Implement scoring for independent mutations. #173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,22 @@ public void execute(GraphDatabaseService service) {
// It is automatically closed after the try block, which frees the allocated memory.
PrimitiveLongSet processed = Primitive.offHeapLongSet(INIT_CAP)
) {
System.out.println("executing");
long start = System.nanoTime();
for (Node n : topologicalOrder(service, processed)) {
rankDest(n);
scoreIndependentMutation(service, n);
}
System.out.println((System.nanoTime() - start) * 1e-6 + " ms");
scoreDRMutations(service);
tx.success();
}
}

private void scoreIndependentMutation(GraphDatabaseService service, Node n) {
new MutationFinderCommand(n).execute(service);
}

/**
* Rank the destination nodes of the outgoing edges of the given node.
* @param n the source node of the destination nodes to be ranked.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package nl.tudelft.dnainator.graph.impl.command;

import nl.tudelft.dnainator.graph.impl.RelTypes;
import nl.tudelft.dnainator.graph.interestingness.Scores;

import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Path;
import org.neo4j.graphdb.Transaction;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class MutationFinderCommand implements Command {
private Map<Node, Set<Node>> sources;
private Node mutation;

public MutationFinderCommand(Node mutation) {
this.mutation = mutation;
this.sources = new HashMap<>();

}

@Override
public void execute(GraphDatabaseService service) {
Set<Node> commonancestors = new HashSet<>();
try (Transaction tx = service.beginTx()) {
for (Path p : service.traversalDescription()
.breadthFirst()
.relationships(RelTypes.SOURCE, Direction.OUTGOING)
.relationships(RelTypes.ANCESTOR_OF, Direction.INCOMING)
.evaluator(new PhyloEvaluator())
.traverse(mutation)
) {
commonancestors.add(p.endNode());
}
mutation.setProperty(Scores.INDEP_MUT.name(), commonancestors.size());
tx.success();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package nl.tudelft.dnainator.graph.impl.command;

import nl.tudelft.dnainator.graph.impl.NodeLabels;
import nl.tudelft.dnainator.graph.impl.RelTypes;

import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Path;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.traversal.Evaluation;
import org.neo4j.graphdb.traversal.Evaluator;

import java.util.HashSet;
import java.util.Set;

public class PhyloEvaluator implements Evaluator {
private Set<Node> clusters = new HashSet<>();

@Override
public Evaluation evaluate(Path path) {
if (path.endNode().hasLabel(NodeLabels.NODE)) {
return Evaluation.EXCLUDE_AND_CONTINUE;
} else if (path.endNode().hasLabel(NodeLabels.SOURCE)) {
clusters.add(path.endNode());
return Evaluation.EXCLUDE_AND_CONTINUE;
}

for (Relationship rel : path.endNode().getRelationships(Direction.OUTGOING,
RelTypes.ANCESTOR_OF)) {
if (!clusters.contains(rel.getEndNode())) {
return Evaluation.INCLUDE_AND_PRUNE;
}
}

clusters.add(path.endNode());
return Evaluation.EXCLUDE_AND_CONTINUE;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ public int applyImportanceModifier(int rawScore) {
}
return multipliers[rawScore];
}
},
INDEP_MUT("independentMutation") {
@Override
public int applyImportanceModifier(int rawScore) {
return rawScore * 100;
}
};

private String name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import java.util.stream.Collectors;

import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.ID;

import static org.hamcrest.Matchers.lessThan;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
Expand Down Expand Up @@ -111,9 +110,10 @@ private static File getTreeFile() throws URISyntaxException {
@Test
public void testNodeLookup() {
// CHECKSTYLE.OFF: MagicNumber
SequenceNode node1 = new SequenceNodeImpl("2", Arrays.asList("ASDF", "ASD"), 1, 5, "TATA");
SequenceNode node2 = new SequenceNodeImpl("3", Arrays.asList("ASDF"), 5, 9, "TATA");
SequenceNode node3 = new SequenceNodeImpl("5", Arrays.asList("ASDF"), 4, 8, "TATA");
SequenceNode node1 = new SequenceNodeImpl("2", Arrays.asList("TKK_001",
"TKK_002"), 1, 5, "TATA");
SequenceNode node2 = new SequenceNodeImpl("3", Arrays.asList("TKK_001"), 5, 9, "TATA");
SequenceNode node3 = new SequenceNodeImpl("5", Arrays.asList("TKK_001"), 4, 8, "TATA");
assertEquals(node1, db.getNode("2"));
assertEquals(node2, db.getNode("3"));
assertEquals(node3, db.getNode("5"));
Expand All @@ -126,7 +126,7 @@ public void testNodeLookup() {
@Test
public void testRootLookup() {
// CHECKSTYLE.OFF: MagicNumber
SequenceNode root = new SequenceNodeImpl("5", Arrays.asList("ASDF"), 4, 8, "TATA");
SequenceNode root = new SequenceNodeImpl("5", Arrays.asList("TKK_001"), 4, 8, "TATA");
assertEquals(root, db.getRootNode());
// CHECKSTYLE.ON: MagicNumber
}
Expand Down Expand Up @@ -239,13 +239,13 @@ public void testQueryFilter() {
@Test
public void testQuerySources() {
GraphQueryDescription qd = new GraphQueryDescription()
.containsSource("ASDF");
.containsSource("TKK_001");
Set<String> expect = new HashSet<>();
Collections.addAll(expect, "2", "5", "3", "7", "8", "11");
assertUnorderedIDEquals(expect, db.queryNodes(qd));

// Also test for multiple sources (reusing the old one)
qd = qd.containsSource("ASD");
qd = qd.containsSource("TKK_002");
Collections.addAll(expect, "9", "10");
assertUnorderedIDEquals(expect, db.queryNodes(qd));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package nl.tudelft.dnainator.graph.impl.command;

import nl.tudelft.dnainator.annotation.impl.AnnotationCollectionImpl;
import nl.tudelft.dnainator.annotation.impl.AnnotationImpl;
import nl.tudelft.dnainator.core.EnrichedSequenceNode;
import nl.tudelft.dnainator.core.impl.SequenceNodeFactoryImpl;
import nl.tudelft.dnainator.graph.impl.Neo4jBatchBuilder;
import nl.tudelft.dnainator.graph.impl.Neo4jGraph;
import nl.tudelft.dnainator.graph.impl.NodeLabels;
import nl.tudelft.dnainator.graph.impl.command.MutationFinderCommand;
import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties;
import nl.tudelft.dnainator.parser.EdgeParser;
import nl.tudelft.dnainator.parser.NodeParser;
import nl.tudelft.dnainator.parser.TreeParser;
import nl.tudelft.dnainator.parser.exceptions.ParseException;
import nl.tudelft.dnainator.parser.impl.EdgeParserImpl;
import nl.tudelft.dnainator.parser.impl.NodeParserImpl;
import nl.tudelft.dnainator.tree.TreeNode;

import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.neo4j.graphdb.Node;
import org.neo4j.io.fs.FileUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.stream.Collectors;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

public class MutationFinderCommandTest {
private static final String DB_PATH = "target/neo4j-tree-junit";
private static Neo4jGraph db;
private static InputStream nodeFile;
private static InputStream edgeFile;
private static AnnotationImpl first;
private static AnnotationImpl middle;
private static AnnotationImpl last;

/**
* Setup the database and construct the graph.
* @throws URISyntaxException
*/
@BeforeClass
public static void setUp() throws URISyntaxException {
try {
FileUtils.deleteRecursively(new File(DB_PATH));
nodeFile = getNodeFile();
edgeFile = getEdgeFile();
NodeParser np = new NodeParserImpl(new SequenceNodeFactoryImpl(),
new BufferedReader(new InputStreamReader(nodeFile, "UTF-8")));
EdgeParser ep = new EdgeParserImpl(new BufferedReader(
new InputStreamReader(edgeFile, "UTF-8")));
TreeNode phylo = new TreeParser(getTreeFile()).parse();
db = (Neo4jGraph) new Neo4jBatchBuilder(DB_PATH, new AnnotationCollectionImpl(), phylo)
.constructGraph(np, ep)
.build();
} catch (IOException e) {
fail("Couldn't initialize DB");
} catch (ParseException e) {
fail("Couldn't parse file: " + e.getMessage());
}
//CHECKSTYLE.OFF: MagicNumber
first = new AnnotationImpl("first", 0, 10, true);
middle = new AnnotationImpl("middle", 5, 25, true);
last = new AnnotationImpl("last", 20, 30, true);
//CHECKSTYLE.ON: MagicNumber
db.addAnnotation(first);
db.addAnnotation(middle);
db.addAnnotation(last);
}

private static InputStream getNodeFile() {
return MutationFinderCommandTest.class.getResourceAsStream("/strains/advancedtopo.node.graph");
}

private static InputStream getEdgeFile() {
return MutationFinderCommandTest.class.getResourceAsStream("/strains/advancedtopo.edge.graph");
}

private static File getTreeFile() throws URISyntaxException {
return new File(MutationFinderCommandTest.class.getResource("/strains/advancedtopo.nwk")
.toURI());
}

/**
* Test returning a source set.
* @param expected
* @param actual
*/
@Test
public void testIndependentMutations() {
db.execute(e -> {
Node node = e.findNode(NodeLabels.NODE, SequenceProperties.ID.name(), "6");
new MutationFinderCommand(node).execute(e);
});
}

/**
* Clean up after ourselves.
* @throws IOException when the database could not be deleted
*/
@AfterClass
public static void cleanUp() throws IOException {
db.shutdown();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
1 2
2 3
3 4
5 6
6 7
7 8
16 changes: 16 additions & 0 deletions dnainator-core/src/test/resources/strains/advancedtopo.node.graph
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
> 1 | TKK_001 | 1 | 5
TATA
> 2 | TKK_001,TKK_002 | 2 | 6
TATA
> 3 | TKK_001,TKK_002,TKK_003 | 3 | 7
TATA
> 4 | TKK_003,TKK_004,TKK_005,TKK_006 | 4 | 8
TATA
> 5 | TKK_006 | 5 | 9
TATA
> 6 | TKK_001,TKK_003,TKK_005 | 6 | 10
TATA
> 7 | TKK_004,TKK_002,TKK_003 | 7 | 11
TATA
> 8 | TKK_001,TKK_002,TKK_003,TKK_004,TKK_005,TKK_006 | 8 | 12
TATA
1 change: 1 addition & 0 deletions dnainator-core/src/test/resources/strains/advancedtopo.nwk
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
((TKK_001:0.1,TKK_002:0.2),(TKK_003:0.3,((TKK_004:0.4,TKK_005:0.5),TKK_005:0.6)))
16 changes: 8 additions & 8 deletions dnainator-core/src/test/resources/strains/topo.node.graph
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
> 2 | ASDF,ASD | 1 | 5
> 2 | TKK_001,TKK_002 | 1 | 5
TATA
> 9 | ASD | 2 | 6
> 9 | TKK_002 | 2 | 6
TATA
> 10 | ASD | 3 | 7
> 10 | TKK_002 | 3 | 7
TATA
> 5 | ASDF | 4 | 8
> 5 | TKK_001 | 4 | 8
TATA
> 3 | ASDF | 5 | 9
> 3 | TKK_001 | 5 | 9
TATA
> 7 | ASDF | 6 | 10
> 7 | TKK_001,TKK_004 | 6 | 10
TATA
> 11 | ASD,FDSA,ASDF | 7 | 11
> 11 | TKK_001,TKK_002,TKK_003 | 7 | 11
TATA
> 8 | ASDF | 8 | 12
> 8 | TKK_001,TKK_002 | 8 | 12
TATA
2 changes: 1 addition & 1 deletion dnainator-core/src/test/resources/strains/topo.nwk
Original file line number Diff line number Diff line change
@@ -1 +1 @@
(FDSA:0.1,(ASDF:0.2,ASD:0.3))
((TKK_001:0.1,TKK_002:0.2),(TKK_003:0.3,TKK_004:0.4))