Skip to content

Commit 9dd8a14

Browse files
committed
Updated the sequence reader
1 parent c0a4a11 commit 9dd8a14

File tree

5 files changed

+29
-30
lines changed

5 files changed

+29
-30
lines changed

nbproject/project.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,4 @@ run.test.classpath=\
9191
source.encoding=UTF-8
9292
src.dir=src
9393
test.src.dir=test
94-
project.license=gpl20
94+
project.license=gpl30

src/edu/msu/cme/rdp/classifier/Classifier.java

+10-6
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public ClassificationResult classify(Sequence seq) throws IOException {
6363
return classify(new ClassifierSequence(seq));
6464
}
6565

66-
public ClassificationResult classify(ClassifierSequence seq) throws IOException {
66+
public ClassificationResult classify(ClassifierSequence seq) {
6767
return classify(seq, MIN_BOOTSTRSP_WORDS );
6868
}
6969

@@ -74,15 +74,16 @@ public ClassificationResult classify(ClassifierSequence seq) throws IOException
7474
* the number of bootstrap trials was used as an estimate of confidence in the assignment to that genus.
7575
* @throws ShortSequenceException if the sequence length is less than the minimum sequence length.
7676
*/
77-
public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_words) throws IOException {
77+
public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_words) {
7878
GenusWordConditionalProb gProb = null;
7979
int nodeListSize = trainingInfo.getGenusNodeListSize();
8080
boolean reversed = false;
8181

82-
int [] wordIndexArr = seq.createWordIndexArr();
82+
try {
83+
int [] wordIndexArr = seq.getWordIndexArr();
8384
if (trainingInfo.isSeqReversed(wordIndexArr, seq.getGoodWordCount())) {
8485
seq = seq.getReversedSeq();
85-
wordIndexArr = seq.createWordIndexArr();
86+
wordIndexArr = seq.getWordIndexArr();
8687
reversed = true;
8788
}
8889

@@ -99,8 +100,7 @@ public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_w
99100
}
100101

101102
if (goodWordCount > MAX_NUM_OF_WORDS) {
102-
querySeq_wordProbArr = new float[goodWordCount][nodeListSize];
103-
System.err.println("increase the array size to " + goodWordCount);
103+
querySeq_wordProbArr = new float[goodWordCount][nodeListSize];
104104
}
105105

106106
int NUM_OF_SELECTIONS = Math.max( goodWordCount / GoodWordIterator.getWordsize(), min_bootstrap_words);
@@ -199,6 +199,10 @@ public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_w
199199
ClassificationResult finalResult = new ClassificationResult(seq, reversed, finalAssigns, trainingInfo.getHierarchyInfo());
200200

201201
return finalResult;
202+
} catch (IOException ex){
203+
throw new ShortSequenceException(seq.getSeqName(), "ShortSequenceException: The length of sequence with recordID="
204+
+ seq.getSeqName() + " is less than " + MIN_SEQ_LEN);
205+
}
202206
}
203207

204208
/**

src/edu/msu/cme/rdp/classifier/TrainingInfo.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ private void createGenusNodeList(HierarchyTree root) {
258258
* less that zero, the query sequence is in reverse orientation.
259259
*/
260260
public boolean isSeqReversed(ClassifierSequence seq) throws IOException {
261-
int[] wordIndexArr = seq.createWordIndexArr();
261+
int[] wordIndexArr = seq.getWordIndexArr();
262262
boolean reverse = false;
263263
float priorDiff = 0;
264264
for (int offset = 0; offset < wordIndexArr.length; offset++) {

src/edu/msu/cme/rdp/classifier/utils/ClassifierSequence.java

+15-20
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,23 @@
2222
public class ClassifierSequence extends Sequence {
2323
private boolean reverse = false;
2424
private Integer goodWordCount = null; // the number of words with only valid bases
25-
25+
private int [] wordIndexArr = null;
2626
/**
2727
* Creates new ParsedSequence.
2828
*/
29-
public ClassifierSequence(Sequence seq) {
29+
public ClassifierSequence(Sequence seq) throws IOException{
3030
this(seq.getSeqName(), seq.getDesc(), seq.getSeqString());
3131
}
3232

33-
public ClassifierSequence(String seqName, String desc, String seqString) {
33+
public ClassifierSequence(String seqName, String desc, String seqString) throws IOException {
3434
super(seqName, desc, SeqUtils.getUnalignedSeqString(seqString));
35+
/**
36+
* Fetches every overlapping word from the sequence string, changes each
37+
* word to integer format and saves in an array.
38+
*/
39+
GoodWordIterator iterator = new GoodWordIterator(this.getSeqString());
40+
this.wordIndexArr = iterator.getWordArr();
41+
this.goodWordCount = wordIndexArr.length;
3542
}
3643

3744
/**
@@ -41,6 +48,9 @@ protected void setSeqString(String s) {
4148
seqString = s;
4249
}
4350

51+
public int[] getWordIndexArr(){
52+
return this.wordIndexArr;
53+
}
4454
/**
4555
* Returns true if the sequence string is a minus strand.
4656
*/
@@ -52,31 +62,16 @@ public boolean isReverse() {
5262
* Returns a Sequence object whose sequence string is the reverse complement
5363
* of the current rRNA sequence string.
5464
*/
55-
public ClassifierSequence getReversedSeq() {
65+
public ClassifierSequence getReversedSeq() throws IOException {
5666
ClassifierSequence retval = new ClassifierSequence(seqName, desc, IUBUtilities.reverseComplement(seqString));
5767
retval.reverse = true;
5868
return retval;
5969
}
6070

61-
/**
62-
* Fetches every overlapping word from the sequence string, changes each
63-
* word to integer format and saves in an array.
64-
*/
65-
public int[] createWordIndexArr() throws IOException {
66-
GoodWordIterator iterator = new GoodWordIterator(this.getSeqString());
67-
int [] wordIndexArr = iterator.getWordArr();
68-
69-
this.goodWordCount = wordIndexArr.length;
70-
return wordIndexArr;
71-
}
72-
7371
/**
7472
* Returns the number of words with valid bases.
7573
*/
76-
public int getGoodWordCount() throws IOException {
77-
if (goodWordCount == null) {
78-
this.createWordIndexArr();
79-
}
74+
public int getGoodWordCount() {
8075
return goodWordCount;
8176
}
8277
}

test/edu/msu/cme/rdp/classifier/rrnaclassifier/ParsedSequenceTest.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public static Test suite() {
3838
* Test of getReversedSeq method, of class
3939
* edu.msu.cme.rdp.classifier.readseqwrapper.ParsedSequence.
4040
*/
41-
public void testGetReversedSeq() {
41+
public void testGetReversedSeq() throws IOException {
4242
System.out.println("testGetReversedSeq");
4343

4444
// "AAAAAAAAAG-CCCCCCCCUGAGGGUUACnAA";
@@ -107,7 +107,7 @@ public void testCreateWordIndexArr() throws IOException {
107107
String seqString = "AAAAAAAAAG-CCCCCCCCUGAGGGUUACnAA";
108108
ClassifierSequence aSeq = new ClassifierSequence("test", "", seqString);
109109

110-
int[] wordIndexArr = aSeq.createWordIndexArr();
110+
int[] wordIndexArr = aSeq.getWordIndexArr();
111111

112112
assertEquals(0, wordIndexArr[0]); //AAAAAAAA
113113

0 commit comments

Comments
 (0)