############################### ## Formal R script for SAFE: ## ## Bill Barry, Dan Gatti ## ## 1-12-08 ## ############################### `error.FDR.BH` <- function(P.matrix){ num.cat<-dim(P.matrix)[[2]] order <- order(order(P.matrix[1,])) BH <- P.matrix[1,order(P.matrix[1,])] * num.cat/(1:num.cat) for(i in (num.cat-1):1) BH[i]<-min(BH[i],BH[i+1]) return(BH[order]) } `error.FDR.YB` <- function(P.matrix){ order <- order(order(P.matrix[1,])) P.matrix <- P.matrix[,order(P.matrix[1,])] num.perms<-dim(P.matrix)[[1]] num.cat<-dim(P.matrix)[[2]] s.hat<-rep(NA,num.cat) YB<-rep(NA,num.cat) YB.funct<-function(k.vec,p,s){ v.k<-sum(k.vec<=p) if ((v.k+s)>0) return(v.k/(v.k+s)) else return(0) } for(i in 1:num.cat){ s.hat[i]<-max(0,sum(P.matrix[1,]<=P.matrix[1,i])- sum(P.matrix[-1,]<=P.matrix[1,i])/(num.perms-1)) YB[i]<-mean(apply(P.matrix[-1,],1,YB.funct,P.matrix[1,i],s.hat[i])) } for(i in (num.cat-1):1) YB[i]<-min(YB[i],YB[i+1]) return(YB[order]) } `error.FWER.Bonf` <- function(P.matrix){ cap <- (P.matrix[1,]*dim(P.matrix)[[2]]) <= 1 return((P.matrix[1,]*dim(P.matrix)[[2]])^cap) } `error.FWER.Holm` <- function(P.matrix){ num.cat<-dim(P.matrix)[[2]] order <- order(order(P.matrix[1,])) H <- P.matrix[1,order(P.matrix[1,])] * (num.cat - 0:(num.cat-1)) for(i in 2:num.cat) H[i]<-max(H[i],H[i-1]) return((H^(H <= 1))[order]) } `error.FWER.WY` <- function(P.matrix){ order <- order(order(P.matrix[1,])) P.matrix <- P.matrix[,order(P.matrix[1,])] num.cat<-dim(P.matrix)[[2]] num.perms<-dim(P.matrix)[[1]] WY<-rep(NA,num.cat) for(i in 1:(num.cat-1)){ WY[i]<-sum(apply(P.matrix,1,min)<=P.matrix[1,1])/num.perms P.matrix<-P.matrix[,-1] } WY[num.cat]<-sum(P.matrix<=P.matrix[1])/num.perms for(i in 2:num.cat) WY[i]<-max(WY[i],WY[i-1]) return(WY[order]) } `gene.results` <- function(object = NULL, cat.name = NULL, error = "none"){ if(!error %in% c("none","FWER.Bonf","FWER.Holm","FDR.BH")){ cat(paste("WARNING: error = \"",error,"\" not available for local statistics\n",sep="")) error <- "none" } pretty <- function(t,d) as.character(round(t,d)^(t>10^(-d))*signif(t,1)^(t<=10^(-d))) C.names <- names(object@global.stat) if(is.null(cat.name)) cat.name <- C.names[order(object@global.pval)][1] if(error!="none") error.p<-get(paste("error",error,sep="."))(t(object@local.pval)) cat("Category gene-specific results:\n") cat(paste(" Local:",object@local,"\n")) cat(paste(" Method:",object@method,"\n")) cat("\n") keep <- as.matrix(object@C.mat[,C.names == cat.name])[,1] cat(paste(cat.name,"consists of",sum(keep), "genes\n")) cat("\n") if(min(object@local.stat)<0){ cat("Upregulated Genes\n") cat("-----------------\n") } if(sum(is.na(object@local.pval))>0) { table <- cbind(Local.Stat=round(object@local.stat,3))[keep==1,,drop=FALSE] table <- table[order(-table[,1]*sign(table[,1])),,drop=FALSE] } else if(error=="none"){ table <- data.frame(Local.Stat = round(object@local.stat,3), Emp.pvalue = pretty(object@local.pval,4), Temp = object@local.pval)[keep==1,,drop=FALSE] table <- table[order(table[,3], partial = -table[,1]*sign(table[,1])),,drop=FALSE][,1:2,drop=FALSE] } else { table <- data.frame(Local.Stat = round(object@local.stat,3), Emp.pvalue = pretty(object@local.pval,4), Adj.pvalue = pretty(error.p,4), Temp = object@local.pval)[keep==1,,drop=FALSE] table <- table[order(table[,4], partial = -table[,1]*sign(table[,1])),,drop=FALSE][,1:3,drop=FALSE] } if(max(table[,1])>0) print(table[table[,1]>0,,drop=FALSE]) else cat("None in category\n") if(min(object@local.stat)<0){ cat("\n") cat("Downregulated Genes\n") cat("-------------------\n") if(min(table[,1])<=0) print(table[table[,1]<=0,,drop=FALSE]) else cat("None in category\n") } } `getAnnotateOptions` <- function() { return(c("GO.CC", "GO.BP", "GO.MF", "KEGG", "PFAM")) } `getCmatrix` <- function(keyword.list = NULL, gene.list = NULL, vector = NULL, file = NULL, delimiter = ",", present.genes = NULL, GO.ont = NULL, min.size = 0, max.size = Inf, as.matrix = FALSE,...){ require(SparseM) if(is.null(keyword.list)){ if(is.null(gene.list)) { if(is.null(vector)) { vector<-as.character(read.table(file,...)[,1]) } gene.list<-strsplit(vector,delimiter) } C.names <- sort(unique(unlist(gene.list))) row.names <- names(gene.list) if (!is.null(present.genes)) { if(prod(present.genes %in% row.names) == 0) { cat("WARNING: Some present.genes do not appear in gene.list\n")} if(sum(present.genes %in% row.names) == 0) { stop("No present.genes matched the names of gene.list",call. = FALSE)} } else present.genes <- row.names genes.match = match(present.genes, row.names) present.list <- gene.list[genes.match] names(present.list) = present.genes # ra is the list of values in the csr matrix (all 1's in our case) # ja is the column position in the matrix where each value goes for a given row. # ia is the position in ra and ja where the next row starts. ia has a size of # number of rows + 1 ra = 1 ja = 1 ia = 1 raja.index = 1 cat(" Categories completed:") num <- length(present.list) for(g in 1:num){ if (g %in% floor(seq(num/5,num,num/5))) cat(paste(ceiling(g/num*100),"% ",sep="")) gene.cats = present.list[[g]] if(!is.null(gene.cats)){ if(all(is.na(gene.cats))) gene.cats = NA else { gene.cats <- unique(gene.cats[!is.na(gene.cats)]) } num.cats = length(gene.cats) if(!all(is.na(gene.cats))){ cat.matches = match(gene.cats, C.names) last.index = raja.index + num.cats - 1 ra[raja.index:last.index] = rep(1, num.cats) ja[raja.index:last.index] = cat.matches ia[g] = raja.index raja.index = raja.index + num.cats } else ia[g] = raja.index } else ia[g] = raja.index } ia[g+1] = raja.index dimension = c(num, length(C.names)) C.matrix.csr = new("matrix.csr", ra = as.integer(ra), ja = as.integer(ja), ia = as.integer(ia), dimension = as.integer(dimension)) } else { if(!is.null(GO.ont)) {require(GO) ; require(annotate)} keep <- rep(FALSE,length(keyword.list)) if(is.null(present.genes)) present.genes <- sort(unique(unlist(keyword.list))) for (i in 1:length(keyword.list)) { if(names(keyword.list)[[i]]!="all"){ if(is.null(GO.ont)) { keep[i] <- (sum(present.genes %in% keyword.list[[i]]) >= min.size) & (sum(present.genes %in% keyword.list[[i]]) <= max.size) } else { keep[i] <- (sum(present.genes %in% keyword.list[[i]]) >= min.size) & (sum(present.genes %in% keyword.list[[i]]) <= max.size) & (Ontology(mget(names(keyword.list)[[i]], GOTERM)[[1]]) %in% GO.ont) }} } keyword.list <- keyword.list[keep] C.names <- names(keyword.list) # ra is the list of values in the csr matrix (all 1's in our case) # ja is the column position in the matrix where each value goes for a given row. # ia is the position in ra and ja where the next row starts. ia has a size of # number of rows + 1 ra = 1 ja = 1 ia = 1 raja.index = 1 cat(" Categories completed:") num <- length(keyword.list) for(g in 1:num){ if (g %in% floor(seq(num/5,num,num/5))) cat(paste(ceiling(g/num*100),"% ",sep="")) gene.cats = unique(keyword.list[[g]][keyword.list[[g]] %in% present.genes]) num.cats = length(gene.cats) if(!is.null(gene.cats)){ if(!is.na(gene.cats[1])){ cat.matches = match(gene.cats, present.genes) last.index = raja.index + num.cats - 1 ra[raja.index:last.index] = rep(1, num.cats) ja[raja.index:last.index] = cat.matches ia[g] = raja.index raja.index = raja.index + num.cats } else ia[g] = raja.index } else ia[g] = raja.index } ia[g+1] = raja.index dimension = c(length(C.names), length(present.genes)) C.matrix.csr = new("matrix.csr", ra = as.integer(ra), ja = as.integer(ja), ia = as.integer(ia), dimension = as.integer(dimension)) C.matrix.csr = t(C.matrix.csr) } # The SparseM matrix does not support the apply() function. So multiply # by a row vector of 1's to get the sum of genes in each category. rowvec = as.matrix.csr(matrix(rep(1, dim(C.matrix.csr)[1]), nrow = 1)) size = as.matrix(rowvec %*% C.matrix.csr) C.matrix.csr <- C.matrix.csr[, (size >= min.size) & (size <= max.size)] C.names = C.names[(size >= min.size) & (size <= max.size)] cat(paste("\n ",length(C.names),"catgories formed\n")) if(as.matrix) { C.mat <- as.matrix(C.matrix.csr) dimnames(C.mat) <- list(present.genes,C.names) return(C.mat) } else return(list(C.mat.csr=C.matrix.csr, row.names = present.genes, col.names = C.names)) } `getPImatrix` <- function(n = NULL, y.vec = NULL, block.vec = NULL , K, method="permutation"){ if (!is.null(y.vec)) n <- length(y.vec) if (!is.null(block.vec)) n <- length(block.vec) if (length(n)>1) { n <- length(n) cat(paste("Warning: n is given as a vector, and is assumed to be",n,"\n")) } Pi<-matrix(NA,K,n) Pi[1,]<-1:n if(substr(method,1,9)=="bootstrap"){ if(is.null(y.vec)&is.null(block.vec)) { for(i in 2:K) Pi[i,]<-sample(1:n,replace=TRUE) } else if(is.null(block.vec)){ unique<-unique(y.vec) for (i in 2:K){ for(j in 1:length(unique)){ Pi[i,y.vec==unique[j]]<-sample((1:n)[y.vec==unique[j]],replace=TRUE) } } } else { pairs <- unique(abs(block.vec)) for (i in 2:K){ for(j in 1:length(pairs)){ sample <- sample(pairs,1) Pi[i,match(c(pairs[j],-pairs[j]),block.vec)] <- (1:n)[match( c(sample,-sample),block.vec)] } } } return(Pi) } else { if(is.null(block.vec)) for (i in 2:K) Pi[i,]<-sample(1:n) else { if(is.numeric(block.vec)) block.vec <- abs(block.vec) unique<-unique(block.vec) for (i in 2:K){ for(j in 1:length(unique)){ Pi[i,block.vec==unique[j]]<-sample((1:n)[block.vec==unique[j]]) } } } return(Pi) } } `getResamplingOptions` <- function() { return(c("permutation", "bootstrap", "bootstrap.t", "bootstrap.q")) } `getSAFEResults` <- function(safe.obj, alpha=NULL){ if(!is.null(alpha)) safe.obj@alpha <- alpha if (safe.obj@error=="none"){ keep <- safe.obj@global.pval <= alpha } else keep <- safe.obj@global.error <= alpha return(list(local = safe.obj@local, # character-string describing local statistic gene.names = names(safe.obj@local.stat), # character-vector of gene-names gene.stats = safe.obj@local.stat, # numeric vector of gene-specific statistics gene.pvals = safe.obj@local.pval, # numeric vector of gene-specific p-values global = safe.obj@global, # character-string describing global statistic cat.names = names(safe.obj@global.stat)[keep], # character-vector of cat-names cat.stats = safe.obj@global.stat[keep], # numeric vector of cat-specific statistics cat.pvals = safe.obj@global.pval[keep], # numeric vector of cat-specific p-values cat.error = safe.obj@global.error[keep], # numeric vector of cat-specific adjusted p-values error = safe.obj@error, # character-string describing error adj. method method = safe.obj@method, # character-string describing resampling method alpha = safe.obj@alpha # numeric value giving cutoff for cat-significance ) ) } `global.AveDiff` <- function(C.mat, u, one.sided, args.global ){ m <- length(u) size <- (rep(1,m) %*% C.mat)[1,] if(!args.global$one.sided){ return(function(u, C.mat2 = C.mat, sizeB = size, mB = m){ m1 <- (t(C.mat2) %*% abs(u))/sizeB m2 <- (sum(abs(u)) - m1*sizeB)/(mB-sizeB) s1 <- (t(C.mat2) %*% abs(u)^2) - m1^2 * sizeB s2 <- (sum(abs(u)^2) - s1 - m1^2 * sizeB) - m2^2 * (mB-sizeB) diff <- (m1 - m2)/sqrt(s1/sizeB/(sizeB-1) + s2/(mB-sizeB)/(mB-sizeB-1)) return(as.numeric(diff))}) } else return(function(u, C.mat2 = C.mat, sizeB = size ,mB = m){ m1 <- (t(C.mat2) %*% u)/sizeB m2 <- (sum(u) - m1*sizeB)/(mB-sizeB) s1 <- (t(C.mat2) %*% u^2) - m1^2 * sizeB s2 <- (sum(u^2) - s1 - m1^2 * sizeB) - m2^2 * (mB-sizeB) diff <- (m1 - m2)/sqrt(s1/sizeB/(sizeB-1) + s2/(mB-sizeB)/(mB-sizeB-1)) return(as.numeric(diff))}) } `global.Fisher` <- function(C.mat, u, args.global){ if(is.null(args.global$genelist.cutoff)&is.null(args.global$genelist.length)) { stop("args.global$genelist.cutoff or args.global$genelist.length must be specified",call.=FALSE)} if(!is.null(args.global$genelist.cutoff) & !is.null(args.global$genelist.length)) { stop("Both args.global$genelist.cutoff and args.global$genelist.length cant be given",call.=FALSE)} m2 <- length(u) size2 <- (rep(1,m2) %*% C.mat)[1,] if(!args.global$one.sided){ if(!is.null(args.global$genelist.length)){ return(function(u, C.mat2= C.mat, n = args.global$genelist.length) { return(as.numeric(t(C.mat2) %*% as.numeric(rank(-abs(u))<= n)))}) } else { return(function(u, C.mat2 = C.mat, cut = args.global$genelist.cutoff, size=size2, m = m2) { count <- as.numeric(t(C.mat2) %*% as.numeric(abs(u) >= cut)) return(phyper(count-1,size,m-size,sum(abs(u) >= cut)))}) } } else { if(!is.null(args.global$genelist.length)){ return(function(u, C.mat2 = C.mat, n = args.global$genelist.length) { return(as.numeric(t(C.mat2) %*% as.numeric(rank(-u)<= n)))}) } else { return(function(u, C.mat2 = C.mat, cut = args.global$genelist.cutoff, size=size2, m = m2) { count <- as.numeric(t(C.mat) %*% as.numeric(abs(u) >= cut)) return(phyper(count-1,size,m-size,sum(abs(u) >= cut)))}) } } } `global.Kolmogorov` <- function(C.mat, u, args.global){ m2 <- length(u) size2 <- (rep(1,m2) %*% C.mat)[1,] if(!args.global$one.sided){ return(function(u, C.mat2 = as.matrix(C.mat), m = m2, g.vec = size2) { G <- rep(1,m) %*% t(g.vec) ranked.Cmatrix <- C.mat2[order(-abs(u)),] * sqrt((m-G)/G) - (1 - C.mat2[order(-abs(u)),]) * sqrt(G/(m-G)) return(apply(apply(ranked.Cmatrix,2,cumsum),2,max)) }) } else { return(function(u, C.mat2 = as.matrix(C.mat),m = m2, g.vec = size2) { G <- rep(1,m) %*% t(g.vec) ranked.Cmatrix <- C.mat2[order(-u),] * sqrt((m-G)/G) - (1 - C.mat2[order(-u),]) * sqrt(G/(m-G)) return(apply(apply(ranked.Cmatrix,2,cumsum),2,max)) }) } } `global.Pearson` <- function(C.mat,u, args.global){ if(is.null(args.global$genelist.cutoff)) { stop("args.global$genelist.cutoff must be specified",call.=FALSE)} m2 <- length(u) size2 <- (rep(1,m2) %*% C.mat)[1,] if(!args.global$one.sided){ return(function(u, C.mat2 = C.mat,cut = args.global$genelist.cutoff, size=size2, m = m2) { o1 <- (t(C.mat2) %*% as.numeric(abs(u) >= cut)) o2 <- sum(abs(u) >= cut) - o1 diff <- (o1/size - o2/(m-size))/sqrt((o1+o2)*(1-o1/m-o2/m)/size/(m-size)) return(as.numeric(diff))}) } else { return(function(u, C.mat2 = C.mat,cut = args.global$genelist.cutoff, size=size2, m = m2) { o1 <- (t(C.mat2) %*% as.numeric(u >= cut)) o2 <- sum(u >= cut) - o1 diff <- (o1/size - o2/(m-size))/sqrt((o1+o2)*(1-o1/m-o2/m)/size/(m-size)) return(as.numeric(diff))}) } } `global.Wilcoxon` <- function(C.mat, u, args.global){ if(! args.global$one.sided){ return(function(u,C.mat2=C.mat) return(as.numeric(t(C.mat2) %*% rank(abs(u))))) } else return(function(u,C.mat2=C.mat) return(as.numeric(t(C.mat2) %*% rank(u)))) } `local.f.ANOVA` <- function(X.mat,y.vec,...){ unique <- unique(y.vec) ANOVA.mat<-rep(1,length(y.vec)) for(i in 2:length(unique)) { ANOVA.mat<-cbind(ANOVA.mat,(y.vec==unique[i])*1) } ANOVA.inner <- solve(t(ANOVA.mat) %*% ANOVA.mat) ANOVA.contr <- cbind(rep(0,length(unique)-1),diag(length(unique)-1)) return(function(data = X.mat,a.x = as.matrix(ANOVA.mat),a.xpxi = ANOVA.inner, a.c = as.matrix(ANOVA.contr),...){ n<-dim(a.x)[[1]]; r<-dim(a.x)[[2]] a<-dim(a.c)[[1]]; m<-dim(data)[[1]] a.y<-t(data) mse<-t(rep(1,n))%*%(a.y*((diag(n)-a.x%*%a.xpxi%*%t(a.x))%*%a.y))/(n-r) thetahat<- a.c %*% a.xpxi %*% t(a.x) %*% a.y msa<-t(rep(1,a))%*%(thetahat*((solve(a.c%*%a.xpxi%*%t(a.c))%*%thetahat)))/a return(as.numeric(msa/mse)) }) } `local.t.LM` <- function(X.mat,y.vec,...){ n <- length(y.vec) return(function(data = X.mat,vector = y.vec,n2 = n,...){ m.x <- (data %*% rep(1,n2))/n2 s.xy <- data %*% vector v.x <- ((data^2 %*% rep(1,n2)) - n2*m.x^2)/(n2-1) r <- (s.xy - n2*m.x*mean(vector))/sqrt(v.x*var(vector))/(n2-1) return(as.numeric(r * sqrt((n-2)/(1-r^2))))}) } `local.t.paired` <- function (X.mat, y.vec,...){ if((sum(sort(abs(y.vec[y.vec<0]))!=sort(y.vec[y.vec>0]))>0) | length(unique(abs(y.vec)))!=length(y.vec)/2) { stop("y.vec is improperly defined for a paired t-test", call. = FALSE) } return(function(data, vector = y.vec,...) { x <- as.matrix(data[,y.vec>0][,order(y.vec[y.vec>0])]) y <- as.matrix(data[,y.vec<0][,order(-y.vec[y.vec<0])]) n <- dim(x)[[2]] x.m <- x %*% rep(1/n, n) y.m <- y %*% rep(1/n, n) x.res <- x - x.m %*% rep(1,n) y.res <- y - y.m %*% rep(1,n) t <- (x.m-y.m) * (sqrt(n*(n-1) / ((x.res-y.res)^2 %*% rep(1,n)))) return(as.numeric(t)) }) } `local.t.Student` <- function(X.mat,y.vec,...){ if (length(unique(y.vec))>2) { stop("local = \"t.Student\", but y.vec has more than 2 elements",call.=FALSE) } else if(sum(sort(unique(y.vec))==c(0,1))!=2) { cat(paste("Warning: y.vec is not (0,1), thus Group 1 ==",y.vec[1],"\n")) y.vec <- (y.vec == y.vec[1])*1 } return(function(data,vector = y.vec,...) { x<-as.matrix(data[,vector==1]) y<-as.matrix(data[,!vector==1]) n.x<-dim(x)[[2]]; n.y<-dim(y)[[2]] x.m<- x %*% rep(1/n.x,n.x); y.m<- y %*% rep(1/n.y,n.y) ssx<-(x^2%*%rep(1,n.x)-x.m^2*n.x) ssy<-(y^2%*%rep(1,n.y)-y.m^2*n.y) t <-(x.m-y.m) / sqrt((ssx+ssy)*(1/n.x+1/n.y)/(n.x+n.y-2)) return(as.numeric(t)) }) } `local.t.Welch` <- function(X.mat,y.vec,...){ if (length(unique(y.vec))>2) { stop("local = \"t.Welch\", but y.vec has more than 2 elements",call.=FALSE) } else if(sum(sort(unique(y.vec))==c(0,1))!=2) { cat(paste("Warning: y.vec is not (0,1), thus Group 1 ==",y.vec[1],"\n")) y.vec <- (y.vec == y.vec[1])*1 } return(function(data,vector = y.vec,...) { x<-as.matrix(data[,vector==1]) y<-as.matrix(data[,!vector==1]) n.x<-dim(x)[[2]]; n.y<-dim(y)[[2]] x.m<- x %*% rep(1/n.x,n.x); y.m<- y %*% rep(1/n.y,n.y) ssx<-(x^2%*%rep(1,n.x)-x.m^2*n.x) ssy<-(y^2%*%rep(1,n.y)-y.m^2*n.y) t <- (x.m-y.m)/ sqrt(ssx/n.x/(n.x-1) + ssy/n.y/(n.y-1)) return(as.numeric(t)) }) } `local.z.COXPH` <- function(X.mat,y.vec,args.local=NULL){ require(survival) if(is.null(args.local$censor)) stop("Censoring indicators required",call.=FALSE) n = length(y.vec) if(is.null(args.local$covariate)){ return(function(data = X.mat, times=y.vec, cens=args.local$censor, resample = 1:n, ...){ m <- dim(data)[[1]] z <- rep(0,m) y <- Surv(times[resample],cens[resample]) for(i in 1:m){ fit <- coxph(y ~ data[i,]) z[i] <- fit[[1]]/sqrt(fit[[2]]) } return(z) }) } else return(function(data = X.mat, times=y.vec, cens=args.local$censor, Cov = args.local$covariate,resample = 1:n, ...){ m <- dim(data)[[1]] z <- rep(0,m) y <- Surv(times[resample],cens[resample]) Cov <- data.frame(Cov[resample,]) xnam <- dimnames(Cov)[[2]] fmla <- as.formula(paste("y ~ data[i,] + ", paste(xnam, collapse= "+"))) for(i in 1:m){ fit <- coxph(fmla,data=Cov) z[i] <- fit[[1]][1]/sqrt(fit[[2]][1,1]) } return(z) }) } ###### SAFE Class and Methods ## ## Note: slots changed, show updated; BB011507 ## require(methods) require(SparseM) setClass("SAFE",representation(local="character",local.stat="numeric",local.pval="numeric", global="character",global.stat="numeric",global.pval="numeric",error="character", global.error="numeric",C.mat="matrix.csr",alpha="numeric",method="character")) setMethod("show","SAFE", function(object){ pretty <- function(t,d) as.character(round(t,d)^(t>10^(-d))*signif(t,1)^(t<=10^(-d))) cat("SAFE results:\n") cat(paste(" Local:",object@local,"\n")) cat(paste(" Global:",object@global,"\n")) cat(paste(" Method:",object@method,"\n")) size <- (rep(1,length(object@local.stat)) %*% object@C.mat)[1,] if(object@global=="Wilcoxon"){ stat.name <- "Mean.Rank" stat <- round(object@global.stat/size,1) } else if(object@global %in% c("Pearson","AveDiff")){ stat.name <- "Z.stat.unstd" stat <- round(object@global.stat,3) } else if(object@global=="Fisher"){ stat.name <- "Num.Reject" stat <- object@global.stat } else { stat.name <- "Global.Stat" stat <- object@global.stat } if (object@error=="none"){ cat("\n") if(sum(is.na(object@global.pval))>0) { table <- data.frame(size,stat) dimnames(table) <- list(names(object@global.stat), c("Size",stat.name)) print(table) } else { n <- sum(round(object@global.pval,4) <= object@alpha) if(n == 0) { cat(paste("No categories had p <=",round(object@alpha,4),"\n")) } else { table <- data.frame(size,stat, Emp.p=pretty(object@global.pval,4))[ order(object@global.pval),,drop=FALSE][1:n,,drop=FALSE] dimnames(table) <- list(names(object@global.stat[order(object@global.pval)][1:n]), c("Size",stat.name,"Emp.pvalue")) print(table) } } } else { cat(paste(" Error:",object@error,"\n\n")) n = sum(object@global.error <= object@alpha) if(n == 0) { cat(paste("No categories were significant at", object@error,"<",round(object@alpha,4),"\n")) } else { table <- data.frame(size,stat, Emp.p=pretty(object@global.pval,4), Adj.p=pretty(object@global.error,4))[ order(object@global.pval),,drop=FALSE][1:n,,drop=FALSE] dimnames(table) <- list(names(object@global.stat[order(object@global.pval)][1:n]), c("Size",stat.name,"Emp.pvalue","Adj.pvalue")) print(table) } } } ) setMethod("[", "SAFE", function(x, i, j,...,drop) { if (all(i %in% names(x@global.stat)) | all(i %in% 1:length(x@global.stat)) | (is.logical(i) & (length(i) == length(x@global.stat)))) { if (all(i %in% names(x@global.stat))) i <- match(i,names(x@global.stat)) x@global.stat <- x@global.stat[i] x@global.pval <- x@global.pval[i] x@global.error <- x@global.error[i] x@C.mat <- x@C.mat[,i] x@alpha <- 1 return(x) } else stop("unrecognized category(s)", call. = FALSE) } ) `safedag` <- function(object = NULL, ontology = NULL, top = NULL, file = NULL, color.cutoffs = c(0.1,0.01,0.001), filter = 0,max.GOnames = 200){ require("GOstats");require("Rgraphviz") if(is.null(ontology)) { ontology <- Ontology(mget(names(object@global.stat)[1],GOTERM)[[1]]) cat(paste("Ontology unspecified so assumed to be GO.",ontology,"\n",sep="")) } else if(!ontology %in% c("GO.CC","GO.BP","GO.MF")) { stop("ontology must be \"GO.CC\", \"GO.BP\", or \"GO.MF\"", call.=FALSE) ontology = substr(ontology,4,5) } if(!is.null(top)){ keep2 <- top stop <- 0 names <- names(as.list(get(paste("GO",ontology,"CHILDREN",sep="")))) while(stop==0){ parents <- keep2[keep2 %in% names] children <- unlist(mget(parents,get(paste("GO",ontology,"CHILDREN",sep="")))) stop <- all(children %in% keep2) keep2 <- unique(c(keep2,children)) } } else { top <- c("GO:0005575","GO:0008150","GO:0003674")[c("CC","BP","MF") %in% ontology] keep2 <- NULL } parents <- get(paste("GO",ontology,"PARENTS",sep="")) C.names <- names(object@global.stat) keep <- C.names %in% names(as.list(parents)) cat(paste(sum(keep)," of ",length(C.names)," categories in GO.", ontology,"\n",sep="")) if(!is.null(keep2)){ keep[!C.names %in% keep2] <- FALSE cat(paste(sum(keep),"of",length(C.names),"categories below",top,"\n")) } p <- object@global.pval cat(paste("\n",sum(p<=color.cutoffs[3]),"categories with p <=",color.cutoffs[3],"\n")) if(filter<3) cat(paste("",sum(p<=color.cutoffs[2]),"categories with p <=",color.cutoffs[2],"\n")) if(filter<2) cat(paste("",sum(p<=color.cutoffs[1]),"categories with p <=",color.cutoffs[1],"\n")) if(filter) keep[p>color.cutoffs[filter]] <- FALSE color <- 8 - (p<=color.cutoffs[1])*6 + (p<=color.cutoffs[2]) + (p<=color.cutoffs[3]) g <- GOGraph(C.names[keep], parents) nda <- makeNodeAttrs(g) nodes <- names(nda$fillcolor) if(is.null(keep2)) g <- subGraph(nodes[!nodes %in% c("all","top")],g) else g <- subGraph(nodes[nodes %in% keep2],g) nda <- makeNodeAttrs(g) nodes <- names(nda$fillcolor) match <- match(nodes,C.names[keep], nomatch=0) nda$fillcolor[match] <- color[keep] nda$fillcolor[!nodes %in% C.names] <- "white" if(length(nodes)>max.GOnames) nda$label[nda$label!=""] <- " " else { if(is.null(keep2)) nda$label[nodes==top] <- ontology } g <- agopen(g, nodeAttrs = nda,name="whatever") if(!is.null(file)) postscript(file) #else x11() par(mfrow=c(1,1)) plot(g) if(!is.null(file)) dev.off() } `safeplot` <- function(safe = NULL, cat.name = NULL, c.vec = NULL, local.stats = NULL, p.val = NULL, one.sided = NA, limits = c(-Inf,Inf), extreme = NA, italic = FALSE , x.label = "Ranked local statistic"){ if(!is.null(safe)){ local.stats <- safe@local.stat C.names <- names(safe@global.stat) if (is.na(one.sided)) if(substr(safe@local,1,1)=="f") one.sided<-TRUE else one.sided<-FALSE if(is.null(cat.name)) cat.name <- C.names[order(safe@global.pval)][1] if(one.sided & length(limits)==1) limits <- c(-Inf,limits) if (prod(limits == c(-Inf,Inf)) == 1 & prod(1-is.na(safe@local.pval))) { if(!one.sided) limits[1] <- min(local.stats[(safe@local.pval > 0.05)&(local.stats < 0)]) limits[2] <- max(local.stats[(safe@local.pval > 0.05)&(local.stats > 0)]) cat(paste("Shaded limits are the largest local statistics with p > 0.05\n", " Limits = (",round(limits[1],3),",",round(limits[2],3),")\n")) } c.vec <- as.matrix(safe@C.mat[,C.names == cat.name])[,1] if(is.null(p.val)) p.val <- safe@global.pval[C.names == cat.name] } if(!is.null(names(local.stats))) gene.names <- names(local.stats) else gene.names <- NULL if(is.na(extreme)) extreme <- (sum(c.vec) > 24) g <- sum(c.vec==1) m <- length(c.vec) local.sorted <- sort(local.stats) c.vec.sorted <- c.vec[order(local.stats)] if(!is.null(gene.names)) gene.names.sorted <- gene.names[order(local.stats)] cdf <- cumsum(c.vec.sorted)/ g nf <- layout(matrix(c(1,2),2,1,byrow=TRUE), c(4,4), c(1,3), TRUE) par(plt=c(.1,.9,0,1),adj=0) plot(1:m,rep(0.5,m),ylim=c(0.04,1),xaxs="i",axes=FALSE,ylab="",xlab= "",col=0) for(i in 1:m) if(c.vec.sorted[i]==1) points(rep(i,2),c(0.005,0.1),type="l",lwd=2) if(!is.null(gene.names)){ zero <- sum(local.stats<0) + 0.5 if (extreme) c.vec.sorted[(local.sorted>=limits[1])&(local.sorted<=limits[2])] <- 0 tick<-(1:m)[c.vec.sorted==1] gA <- sum(tickzero) tick.adj<-c(rep(max(tick[1],0.01*m),gA),rep(min(tick[gA+gB],0.99*m),gB)) i <- gA+gB; stopB <- 0 if (gB) lines(c(tick.adj[i],tick[i]), c(0.25, 0.1)) if (gB>1) while(stopB == 0){ i <- i - 1 tick.adj[i]<-min(tick.adj[i+1]-0.02*m,tick[i]) if(tick.adj[i]>m/2) lines(c(tick.adj[i],tick[i]), c(0.25, 0.1)) else { stopB <- i+1} if(i == gA+1) stopB <- gA+1 } else stopB <- 0 i <- 1; stopA <- 0 if (gA) lines(c(tick.adj[i],tick[i]), c(0.25, 0.1)) if (gA>1) while(stopA == 0){ i <- i + 1 tick.adj[i]<- max(tick.adj[i-1]+0.02*m,tick[i]) if(tick.adj[i]0):stopA,stopB:((gA+gB)*(gB>0)))]+0.0005*m,.26, gene.names.sorted[c.vec.sorted==1][c((gA>0):stopA,stopB:((gA+gB)*(gB>0)))], srt=90,cex=.5,font=1+2*italic) } par(adj=0.5,plt=c(.1,.9,.5,1)) plot(c(1,m),c(0,1),xlim=c(1,m),xaxs="i",yaxs="i",xlab=x.label,ylab="",main="",col=0) blocks <- c(sum(local.stats -Inf) polygon(c(m*0.0025,blocks[1],blocks[1],m*0.0025), c(0.004,0.004,0.996,0.996), border=FALSE,col=8) if(limits[2]< Inf) polygon(c(blocks[2],m*0.9975,m*0.9975,blocks[2]), c(0.004,0.004,0.996,0.996), border=FALSE,col=8) lines(c(1,m),c(0,1),lty=2, lwd=0.5) lines(1:m,cdf,type="s", lty=1,lwd=0.5) if (!is.null(p.val) & !is.null(cat.name)){ if (mean(cdf)<=0.5){ text(m/25,0.87,cat.name,cex=0.8,font=2,adj=0) text(m/25,0.80,paste("p =",round(p.val,4)),cex=0.8,font=2,adj=0) if(substr(cat.name,1,3)=="GO:"){ require("GO") require("annotate") text(m/25,0.75,mget(cat.name, GOTERM)[[1]]@Term,cex=0.6,font=2,adj=0) text(m/25,0.70,paste("Ont:",mget(cat.name, GOTERM)[[1]]@Ontology), cex=0.6,font=2,adj=0) } } else { if(substr(cat.name,1,3)=="GO:"){ require("GO") require("annotate") text(24*m/25,0.27,cat.name,cex=0.8,font=2,adj=1) text(24*m/25,0.2,paste("p =",round(p.val,4)),cex=0.8,font=2,adj=1) text(24*m/25,0.15,mget(cat.name, GOTERM)[[1]]@Term,cex=0.6,font=2,adj=1) text(24*m/25,0.10,paste("Ont:",mget(cat.name, GOTERM)[[1]]@Ontology), cex=0.6,font=2,adj=1) } else { text(24*m/25,0.2,cat.name,cex=0.8,font=2,adj=1) text(24*m/25,0.13,paste("p =",round(p.val,4)),cex=0.8,font=2,adj=1) } } } } `safe` <- function(X.mat, y.vec, C.mat = NULL, platform = NULL, annotate = NULL, Pi.mat = NULL, local="default", global = "Wilcoxon", args.local = NULL, args.global = list(one.sided=FALSE), error = "none", alpha = NA, method = "permutation", min.size = 2, max.size = Inf, ...){ if(is(X.mat,"exprSet")) { require("Biobase") pData <- pData(X.mat) X.mat <- exprs(X.mat) if (length(y.vec) == 1){ if(is.character(y.vec)){ if(y.vec %in% names(pData)) y.vec <- pData[[y.vec]] else { stop(paste("y.vec = '", y.vec, "' is not a column in pData(X.mat)",sep=""),call.=FALSE) } } else { if(y.vec <= dim(pData)[[2]]) y.vec <- pData[[y.vec]] else { stop(paste("y.vec =",y.vec, " is not a column # in pData(X.mat)"),call.=FALSE) } } } } if (length(y.vec)!=dim(X.mat)[[2]]) { stop("Dimensions of X.mat and y.vec do not conform",call.=FALSE)} X.mat <- as.matrix(X.mat) if(is.null(C.mat)){ if(is.null(platform) | is.null(annotate)){ stop("C.mat or platform&annotate must be specified",call.=FALSE) } else { require(platform,character.only=TRUE) names <- names(as.list(get(paste(platform,"ACCNUM",sep="")))) if (is.null(dimnames(X.mat)[[1]]) | (sum(dimnames(X.mat)[[1]] %in% names)==0) ) { stop(paste("row.names of X.mat do not conform with the '",platform,"' platform",sep=""),call.=FALSE)} if(substr(annotate[1],1,3)=="GO."){ cat(paste("Building ",annotate," categories from ",platform,"GO2ALLPROBES\n",sep="")) C.mat <- getCmatrix(keyword.list = as.list(get(paste(platform,"GO2ALLPROBES",sep=""))), present.genes = dimnames(X.mat)[[1]], min.size = min.size, max.size = max.size, GO.ont = substr(annotate,4,5)) C.names <- C.mat$col.names C.mat <- C.mat$C.mat.csr } else if(annotate=="KEGG"){ cat(paste("Building ",annotate," categories from ",platform,"PATH\n",sep="")) C.mat <- getCmatrix(gene.list = as.list(get(paste(platform,"PATH",sep=""))), present.genes = dimnames(X.mat)[[1]], min.size = min.size, max.size = max.size) C.names <- paste("KEGG:",C.mat$col.names,sep="") C.mat <- C.mat$C.mat.csr } else if(annotate=="PFAM"){ cat(paste("Building ",annotate," categories from ",platform,"PFAM\n",sep="")) C.mat <- getCmatrix(gene.list = as.list(get(paste(platform,"PFAM",sep=""))), present.genes = dimnames(X.mat)[[1]], min.size = min.size, max.size = max.size) C.names <- paste("PFAM:",substr(C.mat$col.names,3,100),sep="") C.mat <- C.mat$C.mat.csr } else stop(paste("Annotate = '",annotate,"' not recognized",sep=""),call.=FALSE) } } else if(class(C.mat)=="matrix"){ require(SparseM) C.names <- dimnames(C.mat)[[2]] C.mat <- as.matrix.csr(C.mat) if (sum(dimnames(X.mat)[[1]]!=dimnames(C.mat)[[1]])>0) { cat("Warning: gene labels do not match between X.mat and C.mat")} } else { C.names <- C.mat$col.names C.mat <- C.mat$C.mat.csr } if (dim(C.mat)[[1]]!=dim(X.mat)[[1]]) { stop("Dimensions of X.mat and C.mat do not conform",call.=FALSE)} num.cats <- dim(C.mat)[[2]] num.genes <- dim(X.mat)[[1]] num.arrays<- dim(X.mat)[[2]] if (local=="default") { if (length(unique(y.vec)) == 2){ local <- "t.Student" } else if(class(y.vec)=="character") { local <- "f.ANOVA" } else local <- "t.LM" } if(is.null(Pi.mat)) { if(method %in% c("bootstrap","bootstrap.t")) Pi.mat <- 200 else Pi.mat <- 1000 } if(length(unlist(Pi.mat))==1) { if(Pi.mat > 1){ n <- length(y.vec) num.perms <- Pi.mat if(local %in% c("t.paired")){ if(method!="permutation") count <- factorial(n/2) else count <- 2^(n/2-1) Pi.mat <- getPImatrix(block.vec = y.vec, K = Pi.mat, method = method) } else if(local %in% c("t.LM","f.GLM","z.COXPH")) { if(method!="permutation") count<-sum(choose(n,k=2:n)*choose(n-1,k=1:(n-1))) else { count <- factorial(n) } Pi.mat <- getPImatrix(n = length(y.vec), K = Pi.mat, method = method) } else { n2 <- table(y.vec) n3 <- 1 for(i in 1:length(n2)) n3<-n3*sum(choose(n2[i],k=2:n2[i])*choose(n2[i]-1,k=1:(n2[i]-1))) if(method!="permutation") count<-n3 else count<-exp(lgamma(n+1) - sum(lgamma(n2+1))) Pi.mat <- getPImatrix(y.vec = y.vec, K = Pi.mat, method = method) } if(count= abs(u.obs)) / num.perms v <- global.stat(u) if(error == "none") emp.p <- emp.p + (v>=v.obs)/num.perms else V.mat[i,] <- v if (trunc(i/100)==i/100) cat(paste(i,"permutations completed\n")) } if((global=="Fisher") & (!is.null(args.global$genelist.cutoff))){ size <- (rep(1,length(u.obs)) %*% C.mat)[1,] if(args.global$one.sided) L <- sum(u.obs >= args.global$genelist.cutoff) else { L <- sum(abs(u.obs) >= args.global$genelist.cutoff)} v.obs <- 1+ qhyper(v.obs,size,num.genes-size,L) } if (error != "none"){ P.mat <- (num.perms + 1 - apply(V.mat,2,rank,ties.method="min")) / num.perms dimnames(P.mat)[[2]] <- dimnames(C.mat)[[2]] error.p<-get(paste("error",error,sep="."))(P.mat) names(error.p) <- C.names if(is.na(alpha)) alpha <- 0.1 return(new("SAFE",local=local, local.stat=u.obs, local.pval=u.pvalue, global=global, global.stat=v.obs, global.pval=P.mat[1,,drop=TRUE], error=error, alpha=alpha, global.error=error.p, C.mat=C.mat, method=method)) } else { if(is.na(alpha)) alpha <- 0.05 return(new("SAFE",local=local, local.stat=u.obs, local.pval=u.pvalue, global=global, global.stat=v.obs, global.pval=emp.p, error=error, alpha=alpha, global.error=as.numeric(rep(NA,num.cats)), C.mat=C.mat, method=method)) } } else if(method=="bootstrap" | method=="bootstrap.t" | method=="bootstrap.q"){ if(local %in% c("f.GLM")){ stop(paste("local = \"", local,"\" can not be used in the bootstrap"),call.=FALSE)} if(!global %in% c("Wilcoxon","Pearson","AveDiff")){ stop(paste("global = \"", global,"\" cant be used in the bootstrap"),call.=FALSE)} if(!error %in% c("none","FWER.Bonf","FWER.Holm","FDR.BH")){ cat(paste("WARNING: error = \"",error,"\" not available in bootstrap\n",sep="")) error="none" } if(is.null(args.local$boot.test)){ u.pvalue <- rep(NA, num.genes) } else if(args.local$boot.test=="q"){ u.pvalue <- rep(1/num.perms, num.genes) } else if(args.local$boot.test=="t"){ u.pvalue <- rep(NA, num.genes) u.sum <- u.obs u2.sum <- u.obs^2 null.local <- 0 } else { cat(paste("WARNING: args.local$boot.test = \"",args.local$boot.test, "\" not recognized\n",sep="")) u.pvalue <- rep(NA, num.genes) args.local$boot.test <- NULL } names(u.pvalue) <- dimnames(X.mat)[[1]] emp.p <- rep(1/num.perms,num.cats) names(emp.p) <- C.names if(global=="Wilcoxon"){ C.size <- (rep(1,num.genes) %*% C.mat)[1,] null.global <- (num.genes + 1) * C.size / 2 } else null.global = 0 v.sum <- v.obs v2.sum <- v.obs^2 for(i in 2:num.perms){ u <- local.stat(data = X.mat[,Pi.mat[i,]], vector = y.vec[Pi.mat[i,]], resample <- Pi.mat[i,]) if(!is.null(args.local$boot.test)){ if(args.local$boot.test=="q"){ u.pvalue <- u.pvalue + (u*sign(u.obs) <= 0)/num.perms } else {u.sum <- u + u.sum ; u2.sum <- u^2 + u2.sum} } v <- global.stat(u) v.sum <- v + v.sum v2.sum <- v^2 + v2.sum emp.p <- emp.p + (v <= null.global)/num.perms if (trunc(i/100)==i/100) cat(paste(i,"bootstrap resamples completed\n")) } if(method=="bootstrap" | method=="bootstrap.t"){ emp.p <- 1 - pt(((v.sum / num.perms) - null.global) / sqrt((v2.sum - v.sum^2 / num.perms) / (num.perms - 1)), df = num.arrays - 1) } if(!is.null(args.local$boot.test)) if(args.local$boot.test=="t") { u.pvalue <- 1- pt(abs(u.sum / num.perms) / sqrt((u2.sum - u.sum^2 / num.perms) / (num.perms - 1)) ,df = num.arrays - 1) } if (error == "none"){ if(is.na(alpha)) alpha <- 0.05 return(new("SAFE",local=local, local.stat=u.obs, local.pval=as.numeric(u.pvalue), global=global, global.stat=v.obs, global.pval=emp.p, error=error, alpha=alpha, global.error=as.numeric(rep(NA,num.cats)), C.mat=C.mat, method=method)) } else { error.p<-get(paste("error",error,sep="."))(t(emp.p)) names(error.p) <- C.names if(is.na(alpha)) alpha <- 0.1 return(new("SAFE",local=local, local.stat=u.obs, local.pval=as.numeric(u.pvalue), global=global, global.stat=v.obs, global.pval=emp.p, error=error, alpha=alpha, global.error=error.p, C.mat=C.mat, method=method)) } } }