Skip to content

Commit 594d811

Browse files
committed
Additional problems solved.
1 parent 22120cc commit 594d811

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

playtime/rosalind/CONS.scala

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
2+
3+
object CONS extends App {
4+
val input = """>Rosalind_1
5+
ATCCAGCT
6+
>Rosalind_2
7+
GGGCAACT
8+
>Rosalind_3
9+
ATGGATCT
10+
>Rosalind_4
11+
AAGCAACC
12+
>Rosalind_5
13+
TTGGAACT
14+
>Rosalind_6
15+
ATGCCATT
16+
>Rosalind_7
17+
ATGGCACT
18+
"""
19+
// Outputs:
20+
// ATGCAACT
21+
// A: 5 1 0 0 5 5 0 0
22+
// C: 0 0 1 4 2 0 6 1
23+
// G: 1 1 6 3 0 1 0 0
24+
// T: 1 5 0 0 0 1 1 6
25+
26+
outputConsensusAndProfile(input)
27+
28+
def outputConsensusAndProfile(input: String) {
29+
val summary : IndexedSeq[Vector[Int]] = summarize(input)
30+
val consensus = getConsensus(summary)
31+
println(consensus.mkString(""))
32+
val profile = getProfile(summary)
33+
Vector("A", "C", "G", "T").zip(profile).foreach((x) =>
34+
println(String.format("%s: %s", x._1, x._2.mkString(" "))))
35+
}
36+
37+
def getConsensus(input: IndexedSeq[Vector[Int]]) = {
38+
input.map((x) =>
39+
x.zipWithIndex.maxBy(_._1)._2 match {
40+
case 0 => "A"
41+
case 1 => "C"
42+
case 2 => "G"
43+
case 3 => "T"
44+
}
45+
)
46+
}
47+
48+
def getProfile(input: IndexedSeq[Vector[Int]]) = {
49+
Range(0, 4).map((i) => input.map((x) => x(i)))
50+
}
51+
52+
def summarize(input: String) = {
53+
val strings = extractDnaStrings(input)
54+
val tuples = strings.map(getNucleotideCountTuple)
55+
tuples.reduce((x, y) => x.zip(y).map((z) => z._1.zip(z._2).map((w) => w._1 + w._2)))
56+
}
57+
58+
def getNucleotideCountTuple(dna: String) = {
59+
dna.map((x) =>
60+
x match {
61+
case 'A' => Vector(1, 0, 0, 0)
62+
case 'C' => Vector(0, 1, 0, 0)
63+
case 'G' => Vector(0, 0, 1, 0)
64+
case 'T' => Vector(0, 0, 0, 1)
65+
}
66+
)
67+
}
68+
69+
def extractDnaStrings(input: String) = {
70+
val re = """>(Rosalind_\d+)\s*([TACG\s]+)\n""".r
71+
for(m <- re.findAllIn(input))
72+
yield m match {
73+
case re(name, seq) => seq.replaceAll("""\s+""", "")
74+
}
75+
}
76+
}

playtime/rosalind/MRNA.scala

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
2+
object MRNA extends App {
3+
val example1 = "MA"
4+
val example2 = "MQAVNGKGFSRPWSIWYPKQLTYGLNVQAMCWGYNAITGHPQHDEKFCVKVTTVYVNTSRKQSYRMWSMSQCQFAPEFHLGGEKYEHGYFPRDCWFAKKKQMLSNNIEVGRGKHSQRWKGFGETIVTVKFTTFHWCREDFEYPFAHDDMRHNTMVQHIYRHTYYPNDRMQWLTFFCFLHEYMWDRQGFNTHDCCDKPDMINNDAHQYAPIQLTNWWGKYTHGLIPHTLRMSTFTHFNTMLRQKRCVVVDHMWQVNSPCGAHCYTIMDVWFMWKDAHQPVYYRLGWLEAVDWRSHMIARMKVWSVCYFTWGWPHLYAGCQRYDTALDMFGAHTMPRGGGCHIKEYQLFTAHQVKTSIGRCLNWGHHGADTGAYLCLADNVESSFIFAFIYAVNMGDRQKFLCELEHMIMIAQCTDTRGYMKRDVVHSKHFNLPIVWQSFKVRTWNGLYPERAYHIINSDSPNSPVHFCWGSWATEGVYLVCKQLMAWTALAQLPQRRCGGRLEQVITDRMNKYILTLKCYFYPCEVTLIKMNFMTYFWCDPRHPRFWDAPPRAIRDHVHEWQSFLMYNTYRVPRFDYWDSNRHWICYAWLYYNQQEPTFEQDYLLTEAMSCCPHIDEQVYFTDAWMIYAENMNGEHDIVWNSCVDVDMMRSGQNCPSYMMHFFLYTNVAYASFYMDRYREDTCMLMLSKLWAPGFATFVWPLFCACVKDGCGACDTINREHMKRKNIAWQHTFPEKLDRFEIFEDYWHPCKVMTYWVAMVFAKSNSQGYRGVHSDFIGNSVNWPFMHSPCQTGHVMPEVCAGNQMMHPNLQQVKNYPWRGRFGKTLMSKMMHVRTDELGQVCTMTYNEIEHMLDILSIYPIMVFTWYDGLNWSRGVEYWANAQQFTQLWLSFEWYSECTWIIQCNVRHDRAINIHGQNMKECKRESDFYRLRAGLYQWSQGKEPKYVTGTCKCVCACMGLMEQFCRHMNPSDNSTPQVNENAMNVASDSCKLMHIAHIFFA"
5+
val example3 = "MISLGGQRSSSKWNPSNPYYEHFVSDINNALVPEIDWDGGMQAIVFGDIGAILLGELKYMKPFSCCKHKQTKRHTESIWVVDGEEHHYKMFLPEPFNVKEQEEPQWHHKTAAHIYNCLPMAEGMPMLDQWYMFIAAIANTHCWAPRRGTPIHQGELNKEYQDLGMINEHMYICKLCTAIYEGWWTFCTREDNLEELHMTYPKHSFRHRFYFCSAEPTINGIDPVPDRYQWAKFIPYRFVDDSHNNCKHIRNKTSNCCHEWFRAVTWPFMDNTYHRMAHLWGLIYFSNHCANMMIPARMGSPKCTFVKHTQQGGNDSDVPLPVVRNLVWNFYIEDVTKKDRLDGIWPCHMPDDDNCTNFGAMHLHCEGFAEALKVCDSDGTYSAITLFHAWSAKKDDIKGPSDPSTRPRWYMLGQGTFRAMKVTDPSIGQRINNWDPGHVQGHMKAEMHNFWIGSGWDTERMRQHRKEGDMFDDAGVALNGWIWNGYPAGAFRLNMMWQCDGTDAKTQTVHCTWDDPRLCMIKAANSMLRKMWRSFKSPPCHMKDMLQDTLGFICNCYINAVVKHKIAPGKIAWETPQFQLWVTKQMLIFICPVFFIIWIRTNSYCLCIEWYIIPAGVTQFDLSIHVKLKYQQEIFVYLPAGQRKCWLANDQFELNYLSSLRQDQIDSWRWTVHMHGGDHMSPTRDDEARNVLSSDGEWAFYFDDAGSVQMCVEENNQDWVCNKQGGFMRRLTNDVVRKCFKNAHIPCTFEEAHWMEYYVKGMRMLQSCKCGWWVVAGFDYSTCENPGKCSPIINNSSLKFLNPIVDPMWFMWCFMHEKGWWMITLRKTYNNRPFMKKEYVAADECCWLSTTQPVPCDLQDCPFYSDRIYWIHGTSGMCWISPYCQDSPTDCFAYCHIGTREQSIQFGSSQHCFFYNFKHDLKYFSFKASGYFAVVESWLACMDQDWHWYVPCLQPMGEDCLWVTYHCNTHEAEMFWSEITMPLEWLGMAGWIIEASYWIH"
6+
7+
val proteinString = example3
8+
println(countPossibleRNASequences(proteinString))
9+
10+
def countPossibleRNASequences(protein: String) = {
11+
val actual = protein + " "
12+
actual.map(proteinToMRna).foldLeft(1L)((z, x) => (z * x.length) % 1000000)
13+
14+
}
15+
16+
def proteinToMRna(protein: Char) : List[String] = {
17+
protein match {
18+
case 'A' => List("GCU", "GCA", "GCC", "GCG")
19+
case 'C' => List("UGU", "UGC")
20+
case 'D' => List("GAU", "GAC")
21+
case 'E' => List("GAA", "GAG")
22+
case 'F' => List("UUU", "UUC")
23+
case 'G' => List("GGU", "GGC", "GGA", "GGG")
24+
case 'H' => List("CAU", "CAC")
25+
case 'I' => List("AUU", "AUC", "AUA")
26+
case 'K' => List("AAA", "AAG")
27+
case 'L' => List("CUU", "CUC", "UUA", "UUG", "CUG", "CUA")
28+
case 'M' => List("AUG")
29+
case 'N' => List("AAC", "AAU")
30+
case 'P' => List("CCU", "CCC", "CCA", "CCG")
31+
case 'Q' => List("CAA", "CAG")
32+
case 'R' => List("CGU", "CGA", "CGC", "AGA", "CGG", "AGG")
33+
case 'S' => List("AGU", "AGC", "UCU", "UCC", "UCG", "UCA")
34+
case 'T' => List("ACU", "ACC", "ACA", "ACG")
35+
case 'V' => List("GUU", "GUC", "GUA", "GUG")
36+
case 'W' => List("UGG")
37+
case 'Y' => List("UAU", "UAC")
38+
case ' ' => List("UAA", "UAG", "UGA")
39+
case _ => throw new RuntimeException("Bad protein")
40+
}
41+
}
42+
}

0 commit comments

Comments
 (0)