Skip to content

Commit 3091016

Browse files
committed
Imporve property handling in GFFRec
1 parent a51e505 commit 3091016

File tree

6 files changed

+81
-8
lines changed

6 files changed

+81
-8
lines changed

copy-test-data-to-hadoop.sh

100644100755
File mode changed.

src/main/java/com/lifetech/hadoop/alignment/GFFParser.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
public class GFFParser {
1515

1616
public MapWritable readProperties(byte[] buff) {
17+
System.out.println("|" + (new String(buff)) + "|");
1718
MapWritable map = new MapWritable();
18-
for(byte[] x: ByteArray.splitIterable(buff, (byte) ';', 0, buff.length)) {
19-
byte[][] pair = ByteArray.split2(x, (byte) ' ');
20-
map.put(new Text(pair[0]), new Text(pair[1]));
19+
for(byte[] x: ByteArray.splitIterable(buff, (byte) ';', 0, buff.length)) {
20+
byte[][] pair = ByteArray.split2(ByteArray.trim(x), (byte) ' ');
21+
System.out.println(new String(pair[0]) + ":" + new String(pair[1]));
22+
map.put(new Text(pair[0]), new Text(ByteArray.unquote(pair[1])));
2123
}
2224

2325
return map;
@@ -45,7 +47,7 @@ public GFFRec parse(byte [] buff,int buffStart,int buffLen) {
4547

4648
ByteWritable strand = new ByteWritable(it.next()[0]);
4749
ByteWritable frame = new ByteWritable(it.next()[0]);
48-
MapWritable properties = readProperties(it.next());
50+
MapWritable properties = readProperties(ByteArray.trim(it.next()));
4951

5052
return new GFFRec(seqname,source, feature,start,
5153
end, score, strand,

src/main/java/com/lifetech/hadoop/alignment/GFFRec.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ public String toString() {
8585

8686
for( Entry<Writable,Writable> pair: properties.entrySet()) {
8787
b.append(pair.getKey().toString());
88-
b.append(' ');
88+
b.append(" \"");
8989
b.append(pair.getValue().toString());
90-
b.append("; ");
90+
b.append("\"; ");
9191
}
9292
return b.toString();
9393
}
@@ -127,4 +127,8 @@ public ByteWritable getFrame() {
127127
public MapWritable getProperties() {
128128
return properties;
129129
}
130+
131+
public Text getProperty(Text name) {
132+
return (Text)properties.get(name);
133+
}
130134
}

src/main/java/com/lifetech/utils/ByteArray.java

+37-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
public class ByteArray {
88

9+
/*
10+
* Split Iterator and Iterable
11+
*/
12+
913
static class SplitIterator implements Iterator<byte[]> {
1014
private byte[] b;
1115
private byte c;
@@ -90,7 +94,11 @@ public static Iterator<byte[]> splitIterator(byte [] b,byte c,int start,int len)
9094
return new SplitIterator(b,c,start,len);
9195
}
9296

93-
public static void printBytes(byte[] x) {
97+
/*
98+
* =======================================================================================
99+
*/
100+
101+
static public void printBytes(byte[] x) {
94102
ByteArray.printBytes(x,x.length);
95103
}
96104

@@ -99,4 +107,32 @@ static public void printBytes(byte[] x,int size) {
99107
System.out.printf("\\x%x",x[i]);
100108
}
101109
}
110+
111+
/*
112+
* =======================================================================================
113+
*/
114+
115+
static public byte[] trim(byte[] b) {
116+
if(b.length == 0) return b;
117+
118+
int i=0;
119+
int j=b.length-1;
120+
for(;i<b.length & (b[i] == (byte)' ');i++) {}
121+
for(;j>i & (b[j] == (byte)' ');j--) { };
122+
return Arrays.copyOfRange(b, i, j+1);
123+
}
124+
125+
static public byte[] unquote(byte[] b) {
126+
if (b.length < 2) return b;
127+
if (b[0] == (byte) '"' || b[0] == (byte) '\'') {
128+
if (b[0] == b[b.length-1]) {
129+
return Arrays.copyOfRange(b, 1, b.length-1);
130+
} else {
131+
return b;
132+
}
133+
} else {
134+
return b;
135+
}
136+
137+
}
102138
}

src/test/java/com/lifetech/hadoop/alignment/TestGFFRec.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,13 @@ public void testGFFParser() {
4242
assertEquals('0',(char)gff.getFrame().get());
4343
out.println((char)gff.getFrame().get());
4444

45+
out.println(gff.getProperty(new Text("gene_id")));
46+
assertEquals(new Text("001"),gff.getProperty(new Text("gene_id")));
47+
48+
out.println(gff.getProperty(new Text("transcript_id")));
49+
assertEquals(new Text("001"),gff.getProperty(new Text("transcript_id")));
50+
4551
out.println(gff.toString());
46-
assertEquals("AB000381\tTwinscan\tCDS\t380\t401\t.\t+\t0\t transcript_id \"001.1\"; gene_id \"001\"; ",gff.toString());
52+
assertEquals("AB000381\tTwinscan\tCDS\t380\t401\t.\t+\t0\tgene_id \"001\"; transcript_id \"001.1\"; ",gff.toString());
4753
}
4854
}

src/test/java/com/lifetech/utils/TestByteArray.java

+25
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,29 @@ public void testSplitIterable() {
3737
testSplitIterable0("1024","[1024]");
3838
testSplitIterable0("1024 13","[1024, 13]");
3939
}
40+
41+
private void testTrim0(String target,String expected) {
42+
byte[] b = target.getBytes();
43+
byte[] r = ByteArray.trim(b);
44+
assertEquals(expected,new String(r));
45+
}
46+
47+
@Test
48+
public void testTrim() {
49+
testTrim0(" ABC","ABC");
50+
testTrim0(" ABC ","ABC");
51+
}
52+
53+
private void testUnquote0(String target,String expected) {
54+
byte[] b = target.getBytes();
55+
byte[] r = ByteArray.unquote(b);
56+
assertEquals(expected,new String(r));
57+
}
58+
59+
@Test
60+
public void testUnquote() {
61+
testUnquote0("\"abc\"","abc");
62+
testUnquote0("\"\"","");
63+
testUnquote0("\"abc","\"abc");
64+
}
4065
}

0 commit comments

Comments
 (0)