@@ -249,28 +249,44 @@ scoreArrayRolling <- function(seqs, pwm){
249
249
# ' @param na_rm Remove NA scores?
250
250
# ' @param ignore_cent If TRUE, central residue is ignore from scoring.
251
251
# ' @param kinase.domain Whether the domain to be trained is a kinase domain.
252
+ # ' @param residues_groups a vector of regular expressions used to group kinases by central residue they target;
253
+ # ' if a sequence does not have a central residue matching a group chosen from modified.residues by the algorithm
254
+ # ' (based on PWM), the sequence will be discarded.
252
255
# '
253
256
# ' @keywords pwm mss match tfbs
254
257
# '
255
258
# ' @keywords internal
256
259
# ' @examples
257
260
# ' # No Examples
258
- mss <- function (seqs , pwm , na_rm = F , ignore_cent = T , kinase.domain = T ){
261
+ mss <- function (seqs , pwm , na_rm = F , ignore_cent = T , kinase.domain = T , residues_groups = c( ' S|T ' , ' Y ' ) ){
259
262
# If not kinase domain, use non-central MSS instead
260
263
if (! kinase.domain ) {
261
264
return (.mssNonCentral(seqs , pwm ))
262
265
}
263
266
264
- cent_ind = ceiling(ncol(pwm )/ 2 )
267
+ # Central residue index
268
+ central_index = ceiling(ncol(pwm )/ 2 )
269
+
265
270
# Only score sequences which have a central residue S/T or Y depending on the PWM
266
- kinase_type = names(which.max(pwm [,cent_ind ]))
267
- kinase_type = ifelse(grepl(' S|T' , kinase_type ), ' S|T' , ' Y' )
271
+
272
+ # Take the aminoacid with the heaviest weight at the central position
273
+ heaviest_central_residue = names(which.max(pwm [,central_index ]))
274
+
275
+ # Kinases are considered to be of the same type
276
+ # if they modify the same group of residues.
277
+
278
+ # Knowing the most frequently modified residue (from PWM),
279
+ # choose the kinase type (=residues group) that matches the residue.
280
+ for (residue_group in residues_groups ) {
281
+ if (grepl(residue_group , heaviest_central_residue )) {
282
+ kinase_type = residue_group
283
+ }
284
+ }
285
+
268
286
central_res = kinase_type
269
287
270
- # Central residue index
271
- central_ind = NA
272
- if (ignore_cent )
273
- central_ind = ceiling(ncol(pwm )/ 2 )
288
+ if (! ignore_cent )
289
+ central_index = NA
274
290
275
291
# Info content
276
292
ic = attr(pwm , ' match.ic' )
@@ -283,7 +299,7 @@ mss <- function(seqs, pwm, na_rm=F, ignore_cent=T, kinase.domain = T){
283
299
ignore_cent = ignore_cent )
284
300
285
301
# Get array of scores
286
- keep_scores = grepl(central_res , substr(seqs , central_ind , central_ind ))
302
+ keep_scores = grepl(central_res , substr(seqs , central_index , central_index ))
287
303
288
304
# Score only ones we're keeping
289
305
scores = rep(NA , length(seqs ))
0 commit comments