#################################################################################### ## ## Rcgh ## ## version 5.3 ## ################################################################################## ################################################################################# ## ## this version appears to work fine (not all functions tested) on R 2.6 ## ################################################################################# ## ## chris.jones@icr.ac.uk ## ################################################################################ ## ## much of this is a clumsy rip-off from and is dependent on ## BioConductor "aCGH" package ## written by Jane Fridlyand and Peter Dimitrov ## ################################################################################## ## ## last update 19th February 2008 ## ################################################################################# ## ## STILL NEED HELP FILES ## AND DEDICATED INTRANET SITE ## ################################################################################## ## ## Version 3 incorporates a bunch of correlative functions ## such as matrix.Rcgh and correlations.Rcgh ## ################################################################################## ## ## NB this version uses hg18 as default ## ################################################################################## ## ## NB general defaults are log2.ratios, threshold = 0.263, cutplot=0, p=0.05 ## ################################################################################## ################################################################################## ## ## process.Rcgh ## ## ## super-filter one-stop shopping ## read, norm, strip, collate, filter, impute, map, create ## ## process.Rcgh <- function (datadir, norm.method="printTipLoess", cy.dye="Cy3",#dye.file, cloneProp=0.5, stdev=0.2, cloneThres=0.7, sampleThres=0.7, map.info=5) #map, phen=NULL ) { ## get dye, map and pheno files from directory too!? dye.name <- dir(path=datadir,pattern=paste("*", "dye", sep="\.")) read.table(paste(datadir,dye.name,sep=""))->dye.tab map.name <- dir(path=datadir,pattern=paste("*", "map", sep="\.")) read.table(paste(datadir,map.name,sep=""),sep="\t",header=T)->map.tab pheno.name <- dir(path=datadir,pattern=paste("*", "pheno", sep="\.")) read.table(paste(datadir,pheno.name,sep=""),sep="\t",header=T,row.names=1)->pheno.tab readNorm.Rcgh(datadir=datadir, norm.method=norm.method, dye.file=dye.tab, cy.dye=cy.dye) -> data.readnorm filter.Rcgh(data.readnorm, logged=TRUE, cloneProp=cloneProp, stdev=stdev, cloneThres=cloneThres, sampleThres=sampleThres) -> data.filter write.table(data.filter,paste(datadir,"data.filter.txt",sep=""),sep="\t",col.names=NA)## write table impute.Rcgh(data.filter)->data.impute write.table(data.impute,paste(datadir,"data.impute.txt",sep=""),sep="\t",col.names=NA)## write table map.Rcgh(data.impute,map=map.tab) -> data.map write.table(data.map,paste(datadir,"data.map.txt",sep=""),sep="\t",col.names=NA)## write table create.Rcgh (data.map, phen=pheno.tab,map.info=map.info) -> data.cgh return (data.cgh) } ################################################################################## ## ## process.fudge.Rcgh ## ## ## super-filter one-stop shopping ## read, norm, strip, collate, filter, impute, map, create ## ## process.fudge.Rcgh <- function (datadir, norm.method="printTipLoess", cy.dye="Cy3",#dye.file, cloneProp=0.5, stdev=0.2, cloneThres=0.7, sampleThres=0.7, map.info=5) #map, phen=NULL ) { ## get dye, map and pheno files from directory too!? dye.name <- dir(path=datadir,pattern=paste("*", "dye", sep="\.")) read.table(paste(datadir,dye.name,sep=""))->dye.tab map.name <- dir(path=datadir,pattern=paste("*", "map", sep="\.")) read.table(paste(datadir,map.name,sep=""),sep="\t",header=T)->map.tab pheno.name <- dir(path=datadir,pattern=paste("*", "pheno", sep="\.")) read.table(paste(datadir,pheno.name,sep=""),sep="\t",header=T,row.names=1)->pheno.tab ## get fudgekey and fudgefactor files from directory too!? fudgekey.name <- dir(path=datadir,pattern=paste("*", "fudgekey", sep="\.")) read.table(paste(datadir,fudgekey.name,sep=""),sep="\t",header=F,row.names=1)->fudgekey.tab fudgefactor.name <- dir(path=datadir,pattern=paste("*", "fudgefactor", sep="\.")) read.table(paste(datadir,fudgefactor.name,sep=""),sep="\t",header=T)->fudgefactor.tab fudgefactor.tab<-fudgefactor.tab[,-1] ##incorp fudging readNorm.rm.Rcgh(datadir=datadir, norm.method=norm.method, dye.file=dye.tab, cy.dye=cy.dye) -> data.readnorm ##write.table write.table(data.readnorm,paste(datadir,"data.norm.txt",sep=""),sep="\t",col.names=NA) rm(data.readnorm) ## numeric/character workaround read.table(paste(datadir,"data.norm.txt",sep=""),sep="\t",header=T,row.names=1,na.strings=c("NA","NaN"))->data.readnorm cat("\nApplying correction factor to normalised data\n\n") fudgeCollated.Rcgh(data.readnorm, fudge.key=fudgekey.tab, method="fudgefactor", fudge.factor=fudgefactor.tab, logged=TRUE)->data.fudged write.table(data.fudged,paste(datadir,"data.fudged.txt",sep=""),sep="\t",col.names=NA)## write table filter.Rcgh(data.fudged, logged=TRUE, cloneProp=cloneProp, stdev=stdev, cloneThres=cloneThres, sampleThres=sampleThres) -> data.filter write.table(data.filter,paste(datadir,"data.filter.txt",sep=""),sep="\t",col.names=NA)## write table impute.Rcgh(data.filter)->data.impute write.table(data.impute,paste(datadir,"data.impute.txt",sep=""),sep="\t",col.names=NA)## write table map.Rcgh(data.impute,map=map.tab) -> data.map write.table(data.map,paste(datadir,"data.map.txt",sep=""),sep="\t",col.names=NA)## write table create.Rcgh (data.map, phen=pheno.tab,map.info=map.info) -> data.cgh return (data.cgh) } #################################################################################### ## ## readNorm.Rcgh ## ## ## wrapper for read, norm, strip, flagtrash, dye collate ## incorporates a rm() when using the fudging ## ## readNorm.rm.Rcgh <- function (datadir, norm.method, dye.file, cy.dye="Cy3") { ## read in gpr files from specified directory readGpr.Rcgh (datadir=datadir) -> raw.data ## normalise by user-defined method normalise.Rcgh (raw.data, norm.method=norm.method) -> norm.data ## trash flagged values, output friendly table flagTrash.Rcgh (norm.data) -> flagtrash.data #cbind(raw.data@maGnames@maInfo$Name,flagtrash.data@maM) -> data.out ## strip out empties, nones, unknowns stripGpr.Rcgh (flagtrash.data) -> strip.data ## collate dye swaps as defined in .dye file dyeCollate.Rcgh (strip.data, dye.file=dye.file, cy.dye=cy.dye) -> dyecoll.data ##write out table? #write.table(dyecoll.data,paste(datadir,"data.norm.txt",sep=""),sep="\t",col.names=NA) ## numeric/character workaround #read.table(paste(datadir,"data.norm.txt",sep=""),sep="\t",header=T,row.names=1,na.strings=c("NA","NaN"))->Mtable ## output unfiltered, unaveraged M table (NB log2) return(dyecoll.data) #return (dyecoll.data) } #################################################################################### require (limma) require (marray) ################################################################################### ## ## BioConductor based reading, normalising ## ## alternative to perl RNormalise to link into filter.Rcgh etc ## #################################################################################### #################################################################################### ## ## readNorm.Rcgh ## ## ## wrapper for read, norm, strip, flagtrash, dye collate ## ## readNorm.Rcgh <- function (datadir, norm.method, dye.file, cy.dye="Cy3") { ## read in gpr files from specified directory readGpr.Rcgh (datadir=datadir) -> raw.data ## normalise by user-defined method normalise.Rcgh (raw.data, norm.method=norm.method) -> norm.data ## trash flagged values, output friendly table flagTrash.Rcgh (norm.data) -> flagtrash.data #cbind(raw.data@maGnames@maInfo$Name,flagtrash.data@maM) -> data.out ## strip out empties, nones, unknowns stripGpr.Rcgh (flagtrash.data) -> strip.data ## collate dye swaps as defined in .dye file dyeCollate.Rcgh (strip.data, dye.file=dye.file, cy.dye=cy.dye) -> dyecoll.data ##write out table? write.table(dyecoll.data,paste(datadir,"data.norm.txt",sep=""),sep="\t",col.names=NA) ## numeric/character workaround read.table(paste(datadir,"data.norm.txt",sep=""),sep="\t",header=T,row.names=1,na.strings=c("NA","NaN"))->Mtable ## output unfiltered, unaveraged M table (NB log2) return(Mtable) #return (dyecoll.data) } ############################################################################################# ## ## readGpr.Rcgh ## ## ## read in all gpr files from specified directory ## ## into marray Raw object ## ## readGpr.Rcgh <- function (datadir) { cat("\n") fnames <- dir(path=datadir,pattern=paste("*", "gpr", sep="\.")) read.GenePix(fnames, path=datadir) -> raw.data cat("\n") return (raw.data) } ############################################################################################### ## ## normalise.Rcgh ## ## ## use maNorm on Raw object, defined norm method ## ## output as marray Norm object ## ## normalise.Rcgh <- function (raw.data, norm.method) { maNorm.cj (raw.data, norm=norm.method, echo=T) -> norm.data #cat("\n.") return (norm.data) } ## ## ## internal function with loess row normalisation ## (and, potentially, others???) ## ## maNorm.cj <- function (mbatch, norm = c("printTipLoess", "none", "median", "loess", "twoD", "scalePrintTipMAD","rowLoess"), subset = TRUE, span = 0.4, Mloc = TRUE, Mscale = TRUE, echo = FALSE, ...) { opt <- list(...) norm.method <- match.arg(norm) if (echo) cat(paste("Normalization method: ", norm.method, ".\n", sep = "")) switch(norm.method, none = maNormMain(mbatch, f.loc = NULL, Mloc = Mloc, Mscale = Mscale, echo = echo), median = maNormMain(mbatch, f.loc = list(maNormMed(x = NULL, y = "maM", subset = subset)), Mloc = Mloc, Mscale = Mscale, echo = echo), loess = maNormMain(mbatch, f.loc = list(maNormLoess(x = "maA", y = "maM", z = NULL, w = NULL, subset = subset, span = span, ...)), Mloc = Mloc, Mscale = Mscale, echo = echo), twoD = maNormMain(mbatch, f.loc = list(maNorm2D(x = "maSpotRow", y = "maSpotCol", z = "maM", g = "maPrintTip", w = NULL, subset = subset, span = span, ...)), Mloc = Mloc, Mscale = Mscale, echo = echo), printTipLoess = maNormMain(mbatch, f.loc = list(maNormLoess(x = "maA", y = "maM", z = "maPrintTip", w = NULL, subset = subset, span = span, ...)), Mloc = Mloc, Mscale = Mscale, echo = echo), scalePrintTipMAD = maNormMain(mbatch, f.loc = list(maNormLoess(x = "maA", y = "maM", z = "maPrintTip", w = NULL, subset = subset, span = span, ...)), f.scale = list(maNormMAD(x = "maPrintTip", y = "maM", geo = TRUE, subset = subset)), Mloc = Mloc, Mscale = Mscale, echo = echo), rowLoess = maNormMain(mbatch, f.loc = list(maNormLoess(x = "maA", y = "maM", z = "maSpotRow", w = NULL, subset = subset, span = span, ...)), Mloc = Mloc, Mscale = Mscale, echo = echo)) } ################################################################################################ ## ## flagTrash.Rcgh ## ## ## trashes to NA any flagged clone from gpr file ## ## flagTrash.Rcgh <- function (norm.data) { as.matrix(norm.data@maM) -> Mdata as.matrix(norm.data@maW) -> Wdata for (j in 1:ncol(Mdata)) { cat("\nTrashing flagged values in array",j) for (k in 1:nrow(Mdata)) { if (Wdata[k,j] == 0) next else Mdata[k,j] <- NA } } cat("\n") cbind(norm.data@maGnames@maInfo$Name,Mdata) -> flag.data return (flag.data) } ########################################################################################## ## ## stripGpr.Rcgh ## ## from readCollated.Rcgh - strips empties, unknowns, nones ## ## NEEDS TABLE WITH NAMES, NOT MARRAY OBJECT ## ## stripGpr.Rcgh <- function (raw.gpr) { raw.gpr -> coll.tab5 ##4 ##was 5 ## 5.8K "Ctb-empties" #coll.tab4[,1]->clonenames4 #clonenames4<-as.matrix(clonenames4) #which(clonenames4=="CTb-empty")->ctbempties #if (length(ctbempties)>0){coll.tab5<-coll.tab4[-ctbempties,]} else coll.tab4->coll.tab5 coll.tab5[,1]->clonenames5 clonenames5<-as.matrix(clonenames5) which(clonenames5=="empty")->empties if (length(empties)>0){coll.tab6<-coll.tab5[-empties,]} else coll.tab5->coll.tab6 coll.tab6[,1]->clonenames6 clonenames6<-as.matrix(clonenames6) which(clonenames6=="none")->nones if (length(nones)>0){coll.tab7<-coll.tab6[-nones,]} else coll.tab6->coll.tab7 coll.tab7[,1]->clonenames7 clonenames7<-as.matrix(clonenames7) which(clonenames7=="UNKNOWN")->unknowns if (length(unknowns)>0){coll.tab8<-coll.tab7[-unknowns,]} else coll.tab7->coll.tab8 rownames(coll.tab8)<-c(1:nrow(coll.tab8)) as.character(coll.tab8[,1])->coll.tab8[,1] return (coll.tab8) } ################################################################################################ ## ## dyeCollate.Rcgh ## ## averages by dye swap from .dye file ## ## dyeCollate.Rcgh <- function (data.tab, dye.file, cy.dye="Cy3") { as.character(dye.file[,1])->array.names as.character(dye.file[,2])->sample.names as.character(dye.file[,3])->dye.names data.tab[,1] -> Name data.tab[,2:ncol(data.tab)]->data.work colnames(data.work[,1:ncol(data.work)])->file.names unique (sample.names) -> unique.samples dye.ave.out <- c(NULL) for (i in 1:length(unique.samples)) { cat("\nCollating dye swaps of sample",i) grep(unique.samples[i],sample.names) -> dye.swap.samples array.names[dye.swap.samples] -> dye.swap.arrays dye.table <- c(NULL) for (j in 1:length(dye.swap.arrays)) { grep(dye.swap.arrays[j],file.names) -> data.col.j ## dye swap flipping dye.swap.samples[j]->current.array as.numeric(data.work[,data.col.j]) -> current.data if (dye.names[current.array]==cy.dye) current.data <- -current.data dye.table <- cbind (dye.table,current.data) } dye.out <-c(NULL) for (l in 1:ncol(dye.table)) { dye.tmp<-as.numeric(dye.table[,l]) dye.out<-cbind(dye.out,dye.tmp) } dye.ave <- apply(dye.out,1,mean,na.rm=T) dye.ave.out <- cbind(dye.ave.out,dye.ave) } colnames(dye.ave.out) <- unique.samples dye.ave.out.tab <-c(NULL) for (m in 1:ncol(dye.ave.out)) { dye.ave.out.tmp<-as.numeric(dye.ave.out[,m]) dye.ave.out.tab<-cbind(dye.ave.out.tab,dye.ave.out.tmp) } colnames(dye.ave.out.tab) <- unique.samples dye.ave.out.final <- cbind(Name,dye.ave.out.tab) cat("\n\n") return(dye.ave.out.final) } ################################################################################### require(impute) #################################################################################### ## ## quality filtering ## #################################################################################### ## ## dye collate input ## sample filter ## clone filter ## calculate mean log2 ratios ## impute ## map ## ##################################################################################### ###################################################################################### ## ## readCollated.Rcgh ## ## ## reads RNormalise "collated.ratios" file and strips out empties, nones, unknowns ## ## readCollated.Rcgh <- function (path) { read.delim(path,header=T,row.names=NULL) -> coll.tab1 coll.tab2 <- coll.tab1[,-1] coll.tab3 <- coll.tab2[,-1] coll.tab4 <- coll.tab3[,-1] coll.tab5 <- coll.tab4[,-2] coll.tab5[,1]->clonenames5 clonenames5<-as.matrix(clonenames5) which(clonenames5=="empty")->empties if (length(empties)>0){coll.tab6<-coll.tab5[-empties,]} else coll.tab5->coll.tab6 coll.tab6[,1]->clonenames6 clonenames6<-as.matrix(clonenames6) which(clonenames6=="none")->nones if (length(nones)>0){coll.tab7<-coll.tab6[-nones,]} else coll.tab6->coll.tab7 coll.tab7[,1]->clonenames7 clonenames7<-as.matrix(clonenames7) which(clonenames7=="UNKNOWN")->unknowns if (length(unknowns)>0){coll.tab8<-coll.tab7[-unknowns,]} else coll.tab7->coll.tab8 rownames(coll.tab8)<-c(1:nrow(coll.tab8)) as.character(coll.tab8[,1])->coll.tab8[,1] return (coll.tab8) } ######################################################################################################## ## ## fudgeCollated.Rcgh ## ## ## correct for e.g. Sigma placental DNA ## ## ## makes the correction directly on collated.ratios ## (before filtering) ## ## fudgeCollated.Rcgh <- function (M, fudge.key=rep(1, ncol(M)), method="median", fudge.factor=NULL, logged=FALSE) { if (logged) { which(fudge.key==1)->fudge.ind M2<-M[,2:ncol(M)] M.fudge <- M2[,fudge.ind] apply (M.fudge, 1, median, na.rm=T) -> fudge.median apply (M.fudge, 1, mean, na.rm=T) -> fudge.mean for (i in 2:ncol(M)) { if (fudge.key[i-1,]==0) next else { if (method=="median") { M[,i] - fudge.median -> M[,i] } if (method=="mean") { M[,i] - fudge.mean -> M[,i] } if (method=="fudgefactor") { M[,i] - fudge.factor -> M[,i] } } } return (M) } else { which(fudge.key==1)->fudge.ind M2<-M[,2:ncol(M)] M.fudge <- M2[,fudge.ind] apply (M.fudge, 1, median, na.rm=T) -> fudge.median apply (M.fudge, 1, mean, na.rm=T) -> fudge.mean for (i in 2:ncol(M)) { if (fudge.key[i-1,]==0) next else { if (method=="median") { M[,i]/fudge.median -> M[,i] } if (method=="mean") { M[,i]/fudge.mean -> M[,i] } if (method=="fudgefactor") { M[,i]/fudge.factor -> M[,i] } } } return (M) } } ######################################################################################### ## ## cloneAve ## cloneSD ## cloneFilter ## sampleStrip ## cloneStrip ## ## ## internal functions for ## mean, sd, clone and sample trashing ## based upon standard deviation ## ## requires file with first column = name ## rest columns = unlogged data ## from "dyecollate" (RDW RNormalise.pl) ## ## ## calculate mean by clone cloneAve<- function (x) { tmp<-c(NULL) mn<-by(x[2:ncol(x)],x[,1],mean,na.rm=T) for (i in 1:length(mn)) { tmp<-rbind(tmp,mn[[i]]) } rownames(mn)->Clone tmp<-cbind(Clone,tmp) return (tmp) } ## calculate standard deviation by clone cloneSD<- function (x) { tmp<-c(NULL) sd1<-by(x[2:ncol(x)],x[,1],sd,na.rm=T) for (i in 1:length(sd1)) { tmp<-rbind(tmp,sd1[[i]]) } rownames(sd1)->Clone tmp<-cbind(Clone,tmp) return (tmp) } ## trash to NA if proportion good clones or sd threshold fails cloneFilter<- function (x,stdev,cloneProp) { settab<-by(x[2:ncol(x)],x[,1],subset) tmp<-c(NULL) mn<-by(x[2:ncol(x)],x[,1],mean,na.rm=T) sd1<-by(x[2:ncol(x)],x[,1],sd,na.rm=T) ## loop within loop has got to be a bad thing for (j in 1:length(mn)) { for (k in 1:length(mn[[j]])) { length(which(!is.na(settab[[j]][,k])))->cl.pres length(which(is.na(settab[[j]][,k])))->cl.na cl.tot<-cl.pres+cl.na if (cl.pres/cl.tot < cloneProp) mntt<-NA else { mn[[j]][k]->mnt sd1[[j]][k]->sdt if (is.na(mnt)) mntt<-NA if (is.na(sdt)) mntt<-NA else { if (sdt>stdev) mntt<-NA else mntt<-mnt } } mntt -> mn[[j]][k] } } for (g in 1:length(mn)) { tmp<-rbind(tmp,mn[[g]]) } rownames(mn)->Clone tmp<-cbind(Clone,tmp) return (tmp) } ## strip by clone thres cloneStrip<-function(data,cloneThres) { clind<-c(NULL) for (l in 1:nrow(data)) { cltot<-length(data[l,]) clpres<-length(which(!is.na(data[l,]))) clprop<-clpres/cltot clind<-c(clind,clprop) } clstrip <- which(clind >= cloneThres) tmpcl<-data[clstrip,] return(tmpcl) } ## strip by sample thres sampleStrip<-function(data,sampleThres) { smind<-1 for (p in 2:ncol(data)) { smtot<-length(data[,p]) smpres<-length(which(!is.na(data[,p]))) smprop<-smpres/smtot smind<-c(smind,smprop) } smstrip <- which(smind >= sampleThres) tmpsm<-data[,smstrip] return(tmpsm) } ######################################################################################### ## ## filter.Rcgh ## ## ## wrapper for all filtering functions ## outputs log2 ratios (if unlogged ratios read in) ## reports how many clones and which samples have been removed ## ## ## removes samples first filter.Rcgh<-function (M, logged=TRUE, cloneProp=0.5, stdev=0.2, cloneThres=0.7, sampleThres=0.7) { cat("\naveraging clones with >",cloneProp*100,"% good and standard deviation <",stdev) M1<-cloneFilter(M,stdev,cloneProp) cat("\nremoving samples with values in < ",sampleThres*100,"% clones") M2<-sampleStrip(M1,sampleThres) cat("\nremoving clones with values in < ",cloneThres*100,"% samples\n\n") M3<-cloneStrip(M2,cloneThres) Mout<-M3 Mdat<-Mout[,2:ncol(Mout)] Mstart<-M[,2:ncol(M)] Mtmp<-c(NULL) for (h in 1:ncol(Mdat)) { Mtmpp<-as.numeric(Mdat[,h]) Mtmp<-cbind(Mtmp,Mtmpp) } if (logged) { Mtmp -> Mdatl2r } else Mdatl2r <- log2(Mtmp) rownames(Mdatl2r)<-Mout[,1] colnames(Mdatl2r)<-colnames(Mdat) colnames(Mstart)->Mall colnames(Mdat)->Mleft which(is.na(match(Mall,Mleft)))->Mrem cat("final number of clones =",nrow(Mdatl2r),"\t( removed ",nrow(M1)-nrow(Mdatl2r),")") cat("\nfinal number of samples =",ncol(Mdat),"\t( removed ",length(Mrem),")\n") cat("\tsamples removed =",Mall[Mrem],"\n\n") return (Mdatl2r) } ## removes clones first filter.clonesfirst.Rcgh<-function (M, logged=FALSE, cloneProp=0.5,stdev=0.2,cloneThres=0.7,sampleThres=0.7) { cat("\naveraging clones with >",cloneProp*100,"% good and standard deviation <",stdev) M1<-cloneFilter(M,stdev,cloneProp) cat("\nremoving clones with values in < ",cloneThres*100,"% samples") M2<-cloneStrip(M1,cloneThres) cat("\nremoving samples with values in < ",sampleThres*100,"% clones\n\n") M3<-sampleStrip(M2,sampleThres) Mout<-M3 Mdat<-Mout[,2:ncol(Mout)] Mstart<-M[,2:ncol(M)] Mtmp<-c(NULL) for (h in 1:ncol(Mdat)) { Mtmpp<-as.numeric(Mdat[,h]) Mtmp<-cbind(Mtmp,Mtmpp) } if (logged) { Mtmp -> Mdatl2r } else Mdatl2r <- log2(Mtmp) rownames(Mdatl2r)<-Mout[,1] colnames(Mdatl2r)<-colnames(Mdat) colnames(Mstart)->Mall colnames(Mdat)->Mleft which(is.na(match(Mall,Mleft)))->Mrem cat("final number of clones =",nrow(Mdatl2r),"\t( removed ",nrow(M1)-nrow(Mdatl2r),")") cat("\nfinal number of samples =",ncol(Mdat),"\t( removed ",length(Mrem),")\n") cat("\tsamples removed =",Mall[Mrem],"\n\n") return (Mdatl2r) } ######################################################################################################## ## ## fudgeFilter.Rcgh ## ## ## correct for e.g. Sigma placental DNA ## ## ## makes the correction after averaging and filtering ## (before imputation) ## ## fudgeFilter.Rcgh <- function (M, fudge.key=rep(1, ncol(M)), method="median", fudge.factor=NULL) { which(fudge.key==1)->fudge.ind M.fudge <- M[,fudge.ind] apply (M.fudge, 1, median, na.rm=T) -> fudge.median apply (M.fudge, 1, mean, na.rm=T) -> fudge.mean for (i in 1:ncol(M)) { if (fudge.key[i]==0) next else { if (method=="median") { M[,i] - fudge.median -> M[,i] } if (method=="mean") { M[,i] - fudge.mean -> M[,i] } if (method=="fudgefactor") { M[,i] - fudge.factor -> M[,i] } } } return (M) } ##################################################################################### ## ## impute.Rcgh ## ## ## wrapper for knn impute ## using output from filter.Rcgh ## ## impute.Rcgh <- function (Mfilter) { ## note change in R 2.0.0 and associated impute package, no longer have to write out "data" imp.tmp <- impute.knn(Mfilter) #imp.out <- imp.tmp$data return (imp.tmp) } ######################################################################################## ## ## map.Rcgh ## ## ## wrapper for mapping clones from filter.Rcg and impute.Rcgh ## requires mapping file with clone name as first column ## ## map.Rcgh <- function (Mknn,map) { map.out <- merge (map,Mknn,by.x=1,by.y=0) nrow (map.out) -> clones.map nrow (Mknn) - clones.map -> clones.unmap cat("\nnumber of clones unmapped =",clones.unmap,"\n") cat("final number of mapped clones =",clones.map,"\n\n") return (map.out) } ####################################################################################### require(aCGH) ######################################################################################## ## ## aCGH package ## ######################################################################################## ########################################################################################### ## ## create.Rcgh ## ## takes output from map.Rcgh and makes an aCGH object ## without going via Excel ## ## merge unfiltered samples with optional (total?) pheno file ## ## create.Rcgh <- function (x, phen=NULL, map.info=6) { ## no. rows of mapping info start.data<-map.info+1 ## genome order genord <- order(x$Chrom,x$kb) x.ord <- x[genord,] rownames(x.ord) <- c(1:nrow(x)) ## split data.Rcgh <- x.ord[,start.data:ncol(x.ord)] clones.Rcgh <- x.ord[,1:map.info] ## create pheno from samples present if (!is.null(phen)) { which(!is.na(match(phen[,1],colnames(data.Rcgh)))) -> sample.match phen[sample.match,] -> pheno.tmp order(pheno.tmp[,1])->sample.order pheno.tmp[sample.order,]->pheno.tmp.ord rownames(pheno.tmp.ord)<-c(1:nrow(pheno.tmp.ord)) pheno.tmp.ord -> pheno.Rcgh } ## order data by samples order(colnames(data.Rcgh))-> data.samp.ord data.Rcgh[,data.samp.ord]->data.Rcgh ## create if (!is.null(phen)) aCGH.out <- create.aCGH(data.Rcgh,clones.Rcgh,pheno.Rcgh) else aCGH.out <- create.aCGH(data.Rcgh,clones.Rcgh) ##summary and return summary.Rcgh(aCGH.out) return (aCGH.out) } ########################################################################################## ########################################################################################## ## ## set up start/end centromere positions for each chromosome (for plotting) ## hg_16<-human.chrom.info.Jul03 human.chrom.info.May04 <- structure(list(chrom = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), length = c(245523, 243018, 199506, 191411, 180858, 170976, 158628, 146275, 138429, 135414, 134452, 132450, 114143, 106369, 100339, 88827, 78775, 76117, 63812, 62436, 46944, 49555, 154824, 57702), centromere = c(124200, 93400, 91700, 50900, 47700, 60500, 58900, 45200, 50600, 40300, 52900, 35400, 16000, 15600, 17000, 38200, 22200, 16100, 28500, 27100, 12300, 11800, 59400, 11500)), .Names = c("chrom", "length", "centromere"), class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24")) hg_17 <- human.chrom.info.May04 hg_18 <- structure(list(chrom = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), length = c(247250, 242951, 199502, 191273, 180858, 170900, 158821, 146275, 140273, 135375, 134452, 132350, 114143, 106369, 100339, 88827, 78775, 76117, 63812, 62436, 46944, 49691, 154914, 57773), centromere = c(124300, 93300, 91700, 50700, 47700, 60500, 58900, 45200, 50600, 40300, 52900, 35400, 16000, 15600, 17000, 38200, 22200, 16100, 28500, 27100, 12300, 11800, 59400, 11500)), .Names = c("chrom", "length", "centromere"), class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24")) ############################################################################################## require(aws) ############################################################################################ ## ## aCGH.obj data structures ## ############################################################################################ ## ## hmm ratios ## aws ratios ## thresholded data ## steady state and overall MADs ## ############################################################################################# ############################################################################################## ## ## summary.Rcgh ## ## summary.Rcgh<- function(object, ...) { print.aCGH(object, ...) if (!is.null(log2.ratios.imputed(object))) cat("\naCGH-imputed data exist\n") else cat("\naCGH-imputed data does not exist\n") if (!is.null(hmm(object))) cat("HMM states assigned\n") else cat("HMM states are not assigned\n") if (!is.null(hmm.merged(object))) cat("merged HMM states assigned\n") else cat("merged HMM states are not assigned\n") if (!is.null(sd.samples(object))) cat("samples standard deviations are computed\n") else cat("samples standard deviations are not computed\n") if (!is.null(mads(object))) cat("overall MADs are computed\n") else cat("overall MADs are not computed\n") if (!is.null(genomic.events(object))) cat("genomic events are assigned\n\n") else cat("genomic events are not assigned\n\n") if (!is.null(phenotype(object))) cat("phenotype exists\n") else cat("phenotype does not exist\n") if (!is.null(hmm.ratios(object))) cat("\nHMM ratios calculated\n") else cat("\nHMM ratios not calculated\n") if (!is.null(aws.ratios(object))) cat("AWS ratios calculated\n") else cat("AWS ratios not calculated\n") if (!is.null(cbs.ratios(object))) cat("CBS ratios calculated\n") else cat("CBS ratios not calculated\n") if (!is.null(thres.ratios(object))) cat("thresholded ratios calculated\n\n") else cat("thresholded ratios not calculated\n\n") } ######################################################################################## ## ## phenoSub.Rcgh ## ## ## subsets aCGH.obj by NAs in phenotype ## keeps hmm.ratios, aws.ratios, thres.ratios ## ffpe.ratios ## mads, phenotype ## ## ## works out which samples to take phenoSub.Rcgh <- function (aCGH.obj, ph) { which(colnames(phenotype(aCGH.obj))==ph)->phe phenotype(aCGH.obj)[[phe]]->phenotmp which(!is.na(phenotmp))->sub.nonna phenosubbed <- subCGH(aCGH.obj,,sub.nonna) return (phenosubbed) } ## internal, does the subsetting subCGH <- function(aCGH.obj, i, j, keep = TRUE) { drop.i <- missing(i) drop.j <- missing(j) if (drop.i && drop.j) aCGH.obj else { if (drop.i) i <- 1:nrow(aCGH.obj) else if (mode(i) == "logical") i <- which(i) if (drop.j) j <- 1:ncol(aCGH.obj) else if (mode(j) == "logical") j <- which(j) res <- if (keep) list( log2.ratios = aCGH.obj$log2.ratios[i, j], clones.info = aCGH.obj$clones.info[ i, ], qual.rep = NULL, bad.quality.index = NULL, log2.ratios.imputed = if (is.null(aCGH.obj$log2.ratios.imputed)) NULL else aCGH.obj$log2.ratios.imputed[i, j], hmm.ratios = if (is.null(aCGH.obj$hmm.ratios)) NULL else aCGH.obj$hmm.ratios[i, j], hmm.merged = if (is.null(aCGH.obj$hmm.merged)) NULL else aCGH.obj$hmm.merged[i, j], aws.ratios = if (is.null(aCGH.obj$aws.ratios)) NULL else aCGH.obj$aws.ratios[i, j], ffpe.ratios = if (is.null(aCGH.obj$ffpe.ratios)) NULL else aCGH.obj$ffpe.ratios[i, j], thres.ratios = if (is.null(aCGH.obj$thres.ratios)) NULL else aCGH.obj$thres.ratios[i, j], mads = if (is.null(aCGH.obj$mads)) NULL else aCGH.obj$mads[ j], sd.samples = if (is.null(aCGH.obj$sd.samples)) NULL else with(aCGH.obj$sd.samples, list(madChrom = madChrom[ ,j ], madGenome = madGenome[j] ) ), genomic.events = if (is.null(aCGH.obj$genomic.events)) NULL else lapply(aCGH.obj$genomic.events, function(el) if (is.matrix(el)) el[ ,j ] else lapply(el, function(el1) el1[i, j]) ), hmm = if (is.null(hmm(aCGH.obj))) NULL else subset.hmm(hmm(aCGH.obj), i = i, j = j, chroms = which(table(clones.info(aCGH.obj)$Chrom[i]) > 0) ), phenotype = if (is.null(aCGH.obj$phenotype)) NULL else aCGH.obj$phenotype[j, , drop = FALSE] ) else { warning("For now just subsetting the log2.ratios\ and phenotype. Please rerun the find.hmm.states function!") list(log2.ratios = aCGH.obj$log2.ratios[i, j, drop = FALSE], clones.info = aCGH.obj$clones.info[i, , drop = FALSE], qual.rep = NULL, bad.quality.index = NULL, log2.ratios.imputed = if (is.null(aCGH.obj$log2.ratios.imputed)) NULL else aCGH.obj$log2.ratios.imputed[i, j, drop = FALSE], sd.samples = NULL, genomic.events = NULL, hmm = NULL, phenotype = if (is.null(aCGH.obj$phenotype)) NULL else aCGH.obj$phenotype[j, , drop = FALSE] ) } attr(res, "call") <- sys.call() class(res) <- "aCGH" res } } ##################################################################################### ## ## mads.Rcgh ## ## ## calculates overall mad for thresholding (quality control??) ## ## incorporate sd, hmm mad, new aws mad?? ## ## mads.Rcgh <- function (aCGH.obj) { dat<-log2.ratios(aCGH.obj) datmad<-apply(dat,2,mad) #datsd<-apply(dat,2,sd) #datmed<-apply(dat,2,median) return (datmad) } mads <- function(aCGH.obj) aCGH.obj$mads "mads<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$mads)) { mads.ok <- all( sapply(1:length(aCGH.obj$mads), function(i) all(dim(aCGH.obj$mads[[i]]) == dim(value[[i]])) ) ) if (mads.ok) stop("invalid replacement dimensions") } aCGH.obj$mads<- value aCGH.obj } ###################################################################################### ## ## hmm.Rcgh ## ## ## writes hmm.ratios from find.hmm.states to aCGH.obj ## ## hmm.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), chr = 1:num.chromosomes(aCGH.obj), statesres = hmm.merged(aCGH.obj), maxChrom = 23, chrominfo = hg_17, yScale = c(-2, 2), samplenames = sample.names(aCGH.obj) ) { preddatALL<-c(NULL) for (k in (1:length(samples))) { ge <- genomic.events(aCGH.obj) aber <- ge$aberrations$aber amplif <- ge$amplifications$amplif trans <- ge$transitions$trans.matr outliers <- ge$outliers$outlier pred <- ge$outliers$pred.out chrom.rat <- chrominfo$length / max(chrominfo$length) chrom.start <- c(0, cumsum(chrominfo$length))[1:maxChrom] ##chrom.mid contains middle positions of the chromosomes relative to ##the whole genome (useful for plotting the whole genome) chrom.mid <- chrom.start + chrominfo$length[1:maxChrom] / 2 chrom <- statesres[ ,1 ] sq.state <- seq(3, ncol(statesres), b = 6) sq.obs <- seq(8, ncol(statesres), b = 6) preddat<-c(NULL) for (j in 1:length(chr)) { ind.nonna <- which(!is.na(statesres[chrom == chr[j], sq.obs[k]])) kb <- statesres[chrom == chr[j], 2][ind.nonna] / 1000 obs <- statesres[chrom == chr[j], sq.obs[k]][ind.nonna] states <- statesres[chrom == chr[j], sq.state[k]][ind.nonna] nstates <- length(unique(states)) abernow <- aber[chrom == chr[j], k][ind.nonna] outliersnow <- outliers[chrom == chr[j], k][ind.nonna] amplifnow <- amplif[chrom == chr[j], k][ind.nonna] transnow <- trans[chrom == chr[j], k][ind.nonna] ## predicted values when non-aberration of outlier: otherwise ## observed prednow <- obs predicted <- pred[chrom == chr[j], k][ind.nonna] prednow[outliersnow == 0 & abernow == 0] <- predicted[outliersnow == 0 & abernow == 0] y.min <- min(yScale[1], min(obs)) y.max <- max(yScale[2], max(obs)) preddat<- c(preddat,prednow) preddat<-as.vector(preddat) } preddatALL<-cbind(preddatALL,preddat) } colnames(preddatALL)<-samplenames preddatALL } hmm.ratios <- function(aCGH.obj) aCGH.obj$hmm.ratios "hmm.ratios<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$hmm.ratios)) { hmm.ratios.ok <- all( sapply(1:length(aCGH.obj$hmm.ratios), function(i) all(dim(aCGH.obj$hmm.ratios[[i]]) == dim(value[[i]])) ) ) if (hmm.ratios.ok) stop("invalid replacement dimensions") } aCGH.obj$hmm.ratios<- value aCGH.obj } ########################################################################################## ## ## aws.Rcgh ## ## ## adaptive weight smoothing split by chromosome ## ## default hmax currently 1000 ## aws.Rcgh <- function (aCGH.obj,data=log2.ratios(aCGH.obj),h=1000) { #data<-log2.ratios(aCGH.obj) chr<-clones.info(aCGH.obj)$Chrom chrout<-c(NULL) for (j in 1:length(unique(chr))) { chr.tmp<-which(chr==j) dat.chr<-data[chr.tmp,] tmpout<-c(NULL) for (i in 1:ncol(dat.chr)) { tmp<-aws(dat.chr[,i],hmax=h) tmpout<-cbind(tmpout,tmp$theta) } chrout<-rbind(chrout,tmpout) } colnames(data)->colnames(chrout) return(chrout) } aws.ratios <- function(aCGH.obj) aCGH.obj$aws.ratios "aws.ratios<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$aws.ratios)) { aws.ratios.ok <- all( sapply(1:length(aCGH.obj$aws.ratios), function(i) all(dim(aCGH.obj$aws.ratios[[i]]) == dim(value[[i]])) ) ) if (aws.ratios.ok) stop("invalid replacement dimensions") } aCGH.obj$aws.ratios<- value aCGH.obj } ## special fudge function to allow for the Chrom 22 "issues" ## skips smoothing 22 and uses log2.ratios aws.chr22.Rcgh <- function (aCGH.obj,h=1000) { data<-log2.ratios(aCGH.obj) chr<-clones.info(aCGH.obj)$Chrom chrout<-c(NULL) for (j in 1:21) { chr.tmp<-which(chr==j) dat.chr<-data[chr.tmp,] tmpout<-c(NULL) for (i in 1:ncol(dat.chr)) { tmp<-aws(dat.chr[,i],hmax=h) tmpout<-cbind(tmpout,tmp$theta) } chrout<-rbind(chrout,tmpout) } colnames(data)->colnames(chrout) chr.tmp22<-which(chr==22) dat.chr22<-data[chr.tmp22,] chrout<-rbind(chrout,dat.chr22) for (k in 23:24) { chr.tmp<-which(chr==k) dat.chr<-data[chr.tmp,] tmpout<-c(NULL) for (i in 1:ncol(dat.chr)) { tmp<-aws(dat.chr[,i],hmax=h) tmpout<-cbind(tmpout,tmp$theta) } colnames(data)->colnames(tmpout) chrout<-rbind(chrout,tmpout) } colnames(data)->colnames(chrout) rownames(chrout)<-seq(1:nrow(chrout)) return(chrout) } ################################################################################### ## ## thres.Rcgh ## ## ## calculate gains and losses and writes "thresholded" data to aCGH.obj ## in the form of -1, 0, 1 ## ## thres.Rcgh<- function(aCGH.obj, calcthres=0.263, chrominfo=hg_17 , data = log2.ratios(aCGH.obj) ) { calcthres->chkthres if (length(chkthres)==1) chkthresout<-rep(chkthres,times=ncol(data))else chkthres->chkthresout thresOUT<-c(NULL) x <- data datainfo <- clones.info(aCGH.obj) for (j in 1:ncol(x)) { thresJ<- sapply(x[,j],thresCalc,thres=chkthresout[j]) thresOUT<-cbind(thresOUT,thresJ) } colnames(thresOUT)<-colnames(x) return (thresOUT) } ## internal, bins the data to 1,0,-1 thresCalc <- function (y,thres) { if (y >= thres) return(1) if (y <= -thres) return(-1) else return(0) } thres.ratios <- function(aCGH.obj) aCGH.obj$thres.ratios "thres.ratios<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$thres.ratios)) { thres.ratios.ok <- all( sapply(1:length(aCGH.obj$thres.ratios), function(i) all(dim(aCGH.obj$thres.ratios[[i]]) == dim(value[[i]])) ) ) if (thres.ratios.ok) stop("invalid replacement dimensions") } aCGH.obj$thres.ratios<- value aCGH.obj } ############################################################################################# ## ## basic plotting functions ## ############################################################################################## ## ## whole genome ## specific chromosome ## choice of data ## can plot multiple data sets ## points and/or lines ## ############################################################################################## ############################################################################################### ## ## plotGenome.Rcgh ## ## ## plots data from each experiment along ordered length of genome ## ## plotGenome.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), naut = 22, Y = TRUE, X = TRUE, data = log2.ratios(aCGH.obj), chrominfo = hg_18, lw= -2, hi=2,plotthres=0.263, samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { yScale = c(lw, hi) datainfo <- clones.info(aCGH.obj) ##total number of chromosomes to plot: nchr <- naut if (X) nchr <- nchr+1 if (Y) nchr <- nchr+1 nsamples <- length(samplenames) ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] ##screening out unmapped clones ind.unmap <- which(is.na(chrom) | is.na(kb) | (chrom > (naut+2))) if (length(ind.unmap) > 0) { chrom <- chrom[-ind.unmap] kb <- kb[-ind.unmap] data <- data[-ind.unmap,] } ##removing chromosome not to plot: data <- data[chrom <= nchr,] kb <- kb[chrom <= nchr] chrom <- chrom[chrom <= nchr] chrominfo <- chrominfo[1:nchr,] chrom.start <- c(0, cumsum(chrominfo$length))[1:nchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) par(cex = .6, pch = 16, lab = c(1, 6, 7), cex.axis = 1.2, xaxt="n", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##Now, determine vertical scale for each chromosome: y.min <- rep(yScale[1], nrow(chrominfo)) y.max <- rep(yScale[2], nrow(chrominfo)) for (i in 1:nrow(chrominfo)) { if (minna(vec[(chrom==i)]) < y.min[i]) y.min[i] <- minna(vec[(chrom==i)]) if (maxna(vec[(chrom==i)]) > y.max[i]) y.max[i] <- maxna(vec[(chrom==i)]) } ##set genome scale to the min and mx values of the rest of the chromosomes: ygenome.min <- minna(y.min) ygenome.max <- maxna(y.max) plot(clone.genomepos / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim = c(min(clone.genomepos[clone.genomepos > 0], na.rm = TRUE) / 1000, clone.genomepos[sum(clone.genomepos>0)] / 1000), col="darkblue") title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) for (i in seq(1,naut,b=2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]/1000), line=1, col="purple4",cex=0.75,font=2) for (i in seq(2,naut,b=2)) mtext(paste("", i), side = 1, at = chrom.mid[i] / 1000, line=1, col="purple4",cex=0.75,font=2) if (X) mtext("X", side = 1, at = chrom.mid[naut + 1] / 1000, line=1, col="purple4",cex=0.75,font=2) if (Y) mtext("Y", side = 1, at = chrom.mid[naut + 2] / 1000, line=1, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) ##abline(h=seq(ygenome.min,ygenome.max, b=.2), lty=3) abline(h = seq(lw,hi, b=.5), lty = 3,col="gray25") abline(h=0,lty=1,col="black") abline(h=-(plotthresout[[k]]),lty=5,col="red",lwd=1.5) abline(h=plotthresout[[k]],lty=5,col="green",lwd=1.5) abline(v = (chrominfo$centromere + chrom.start) / 1000, lty = 3, col = "purple4") } } ########################################################################################### ## ## plotGenome.BW.Rcgh ## ## black&white for publication ## ## need to incorporate as options in generic plotGenome function ## plotGenome.BW.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), naut = 22, Y = TRUE, X = TRUE, data = log2.ratios(aCGH.obj), chrominfo = hg_18, lw= -2, hi= 2,plotthres=0.263, samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { yScale = c(lw, hi) datainfo <- clones.info(aCGH.obj) ##total number of chromosomes to plot: nchr <- naut if (X) nchr <- nchr+1 if (Y) nchr <- nchr+1 nsamples <- length(samplenames) ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] ##screening out unmapped clones ind.unmap <- which(is.na(chrom) | is.na(kb) | (chrom > (naut+2))) if (length(ind.unmap) > 0) { chrom <- chrom[-ind.unmap] kb <- kb[-ind.unmap] data <- data[-ind.unmap,] } ##removing chromosome not to plot: data <- data[chrom <= nchr,] kb <- kb[chrom <= nchr] chrom <- chrom[chrom <= nchr] chrominfo <- chrominfo[1:nchr,] chrom.start <- c(0, cumsum(chrominfo$length))[1:nchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) par(cex = .6, pch = 16, lab = c(1, 6, 7), cex.axis = 1.2, xaxt="n", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##Now, determine vertical scale for each chromosome: y.min <- rep(yScale[1], nrow(chrominfo)) y.max <- rep(yScale[2], nrow(chrominfo)) for (i in 1:nrow(chrominfo)) { if (minna(vec[(chrom==i)]) < y.min[i]) y.min[i] <- minna(vec[(chrom==i)]) if (maxna(vec[(chrom==i)]) > y.max[i]) y.max[i] <- maxna(vec[(chrom==i)]) } ##set genome scale to the min and mx values of the rest of the chromosomes: ygenome.min <- minna(y.min) ygenome.max <- maxna(y.max) plot(clone.genomepos / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim = c(min(clone.genomepos[clone.genomepos > 0], na.rm = TRUE) / 1000, clone.genomepos[sum(clone.genomepos>0)] / 1000), col="black") title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) for (i in seq(1,naut,b=2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]/1000), line=1, col="gray25",cex=0.75,font=2) for (i in seq(2,naut,b=2)) mtext(paste("", i), side = 1, at = chrom.mid[i] / 1000, line=1, col="gray25",cex=0.75,font=2) if (X) mtext("X", side = 1, at = chrom.mid[naut + 1] / 1000, line=1, col="gray25",cex=0.75,font=2) if (Y) mtext("Y", side = 1, at = chrom.mid[naut + 2] / 1000, line=1, col="gray25",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) ##abline(h=seq(ygenome.min,ygenome.max, b=.2), lty=3) abline(h = seq(lw,hi, b=.5), lty = 3,col="darkgrey") abline(h=0,lty=3) abline(h=-(plotthresout[[k]]),lty=5,col="gray50",lwd=1.5) abline(h=plotthresout[[k]],lty=5,col="gray50",lwd=1.5) abline(v = (chrominfo$centromere + chrom.start) / 1000, lty = 3, col = "gray50") } } ############################################################################################ ## ## coplotGenome.Rcgh ## ## ## allows two datasets from each experiment to be plotted ## first as points, second either as points or lines ## ## coplotGenome.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), naut = 22, Y = TRUE, X = TRUE, data = log2.ratios(aCGH.obj), data2=hmm.ratios(aCGH.obj), data.col="slategray4", data2.col="darkblue", data2.lines=FALSE, chrominfo = hg_18, chrominfoh=hg_18,yScale = c(-2, 2),plotthres=0.263, samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { data<-data datainfo <- clones.info(aCGH.obj) ##total number of chromosomes to plot: nchr <- naut if (X) nchr <- nchr+1 if (Y) nchr <- nchr+1 nsamples <- length(samplenames) ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##screening out unmapped clones ind.unmap <- which(is.na(chrom) | is.na(kb) | (chrom > (naut+2))) if (length(ind.unmap) > 0) { chrom <- chrom[-ind.unmap] kb <- kb[-ind.unmap] data <- data[-ind.unmap,] } ##removing chromosome not to plot: data <- data[chrom <= nchr,] kb <- kb[chrom <= nchr] chrom <- chrom[chrom <= nchr] chrominfo <- chrominfo[1:nchr,] chrom.start <- c(0, cumsum(chrominfo$length))[1:nchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) datah <- data2 datainfoh <- clones.info(aCGH.obj) ##total number of chromosomes to plot: nchr <- naut if (X) nchr <- nchr+1 if (Y) nchr <- nchr+1 nsamples <- length(samplenames) ##reordering according to genomic position ordh <- order(datainfoh$Chrom, datainfoh$kb) chromh <- datainfoh$Chrom[ord] kbh <- datainfoh$kb[ord] datah <- datah[ord,] ##screening out unampped clones ind.unmaph <- which(is.na(chromh) | is.na(kbh) | (chromh > (naut+2))) if (length(ind.unmaph) > 0) { chromh <- chromh[-ind.unmaph] kbh <- kbh[-ind.unmaph] datah <- datah[-ind.unmaph,] } ##removing chromosome not to plot: datah <- datah[chromh <= nchr,] kbh <- kbh[chromh <= nchr] chromh <- chromh[chromh <= nchr] chrominfoh <- chrominfoh[1:nchr,] chrom.starth <- c(0, cumsum(chrominfoh$length))[1:nchr] chrom.centrh <- chrom.starth + chrominfoh$centr chrom.midh <- chrom.starth + chrominfoh$length / 2 chrom.rath <- chrominfoh$length / max(chrominfoh$length) par(cex = 0.6, pch = 16, lab = c(1, 6, 7), cex.axis = 1.2,xaxt="n", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##Now, determine vertical scale for each chromosome: y.min <- rep(yScale[1], nrow(chrominfo)) y.max <- rep(yScale[2], nrow(chrominfo)) for (i in 1:nrow(chrominfo)) { if (minna(vec[(chrom==i)]) < y.min[i]) y.min[i] <- minna(vec[(chrom==i)]) if (maxna(vec[(chrom==i)]) > y.max[i]) y.max[i] <- maxna(vec[(chrom==i)]) } ##set genome scale to the min and mx values of the rest of the chromosomes: ygenome.min <- minna(y.min) ygenome.max <- maxna(y.max) plot(clone.genomepos / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim = c(min(clone.genomepos[clone.genomepos > 0], na.rm = TRUE) / 1000, clone.genomepos[sum(clone.genomepos>0)] / 1000), col=data.col) title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) for (i in seq(1,naut,b=2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]/1000), line=1, col="purple4",cex=0.75,font=2) for (i in seq(2,naut,b=2)) mtext(paste("", i), side = 1, at = chrom.mid[i] / 1000, line=1, col="purple4",cex=0.75,font=2) if (X) mtext("X", side = 1, at = chrom.mid[naut + 1] / 1000, line=1, col="purple4",cex=0.75,font=2) if (Y) mtext("Y", side = 1, at = chrom.mid[naut + 2] / 1000, line=1, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) ##abline(h=seq(ygenome.min,ygenome.max, b=.2), lty=3) abline(h = seq(-1.5,1.5, b=.5), lty = 3,col="grey") abline(h=0,lty=3) abline(h=-(plotthresout[[k]]),lty=5,col="red",lwd=1.5) abline(h=plotthresout[[k]],lty=5,col="green",lwd=1.5) abline(v = (chrominfo$centromere + chrom.start) / 1000, lty = 3, col = "purple4") ## data2 vech <- datah[ ,samples[k] ] nameh <- samplenames[samples[k]] clone.genomeposh <- rep(0, length(kbh)) for (i in 1:nrow(chrominfoh)) clone.genomeposh[chromh == i] <- kb[chromh == i] + chrom.starth[i] ##Now, determine vertical scale for each chromosome: y.min <- rep(yScale[1], nrow(chrominfo)) y.max <- rep(yScale[2], nrow(chrominfo)) for (i in 1:nrow(chrominfoh)) { if (minna(vech[(chromh==i)]) < y.min[i]) y.min[i] <- minna(vech[(chromh==i)]) if (maxna(vech[(chrom==i)]) > y.max[i]) y.max[i] <- maxna(vech[(chromh==i)]) } ## points or lines? if (data2.lines) lines(clone.genomeposh / 1000, vech, col=data2.col,lwd=2) else points(clone.genomeposh / 1000, vech, col=data2.col,pch=18,cex=1.2) } } ############################################################################################ ## ## plotChrom.Rcgh ## ## ## plot of single chromosome data from experiment ## ## plotChrom.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), chr, data = log2.ratios(aCGH.obj), plotthres=0.263,lines=F, chrominfo = hg_18, lw= -2, hi= 2,points.col="darkblue", samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { yScale=c(lw,hi) datainfo <- clones.info(aCGH.obj) nsamples <- length(samplenames) ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##removing chromosome not to plot: data <- data[chrom==chr,] kb <- kb[chrom==chr] chrom <- chrom[chrom==chr] chrominfo <- chrominfo[chr,] chrom.start <- c(0, cumsum(chrominfo$length))[chr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) chrom.length<-chrominfo$length par(cex = .6, pch = 16, lab = c(10, 6, 7), cex.axis = 1.2,xaxt="s", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) #i<-chr clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##lines if (lines) { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "",type="b", xlim=c(0,chrom.length/1000), col=points.col) } else { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim=c(0,chrom.length/1000), col=points.col) } title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) mtext(paste("Chromosome", chr, " (Mb)"), side = 1, at = (chrom.mid[i]/1000),line=3, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) abline(h = seq(lw,hi, b=.5), lty = 3,col="gray25") abline(h=0,lty=1,col="black") abline(h=-(plotthresout[[k]]),lty=5,lwd=1.5,col="red") abline(h=plotthresout[[k]],lty=5,lwd=1.5,col="green") abline(v = (chrominfo$centromere) / 1000, lty = 3, col = "purple4") } } ########################################################################################## ## ## plotChrom.BW.Rcgh ## ## ## Black&White for publication ## ## need to incorporate options in generic plotChrom function!! ## plotChrom.BW.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), chr, data = log2.ratios(aCGH.obj), plotthres=0.263,lines=F, chrominfo = hg_18, yScale = c(-2, 2),points.col="black", samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { datainfo <- clones.info(aCGH.obj) nsamples <- length(samplenames) ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##removing chromosome not to plot: data <- data[chrom==chr,] kb <- kb[chrom==chr] chrom <- chrom[chrom==chr] chrominfo <- chrominfo[chr,] chrom.start <- c(0, cumsum(chrominfo$length))[chr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) chrom.length<-chrominfo$length par(cex = .6, pch = 16, lab = c(10, 6, 7), cex.axis = 1.2,xaxt="s", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) #i<-chr clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##lines if (lines) { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "",type="b", xlim=c(0,chrom.length/1000), col=points.col) } else { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim=c(0,chrom.length/1000), col=points.col) } title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) mtext(paste("Chromosome", chr, " (Mb)"), side = 1, at = (chrom.mid[i]/1000), line=3, col="gray25",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) abline(h = seq(-1.5,1.5, b=.5), lty = 3,col="gray50") abline(h=0,lty=3) abline(h=-(plotthresout[[k]]),lty=5,lwd=1.5,col="gray50") abline(h=plotthresout[[k]],lty=5,lwd=1.5,col="gray50") abline(v = (chrominfo$centromere) / 1000, lty = 3, col = "gray25") } } ########################################################################################## ## ## coplotChrom.Rcgh ## ## ## plot of two datasets from one experiment along chromosome ## first= points, second = lines or points ## ## coplotChrom.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), chr, data = log2.ratios(aCGH.obj), data2 = hmm.ratios(aCGH.obj), chrominfo = hg_18, yScale = c(-2, 2), plotthres=0.263, data.col="slategray4", data2.col="darkblue", data2.lines=FALSE, samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { datainfo <- clones.info(aCGH.obj) nsamples <- length(samplenames) ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] data2 <- data2[ord,] ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##removing chromosome not to plot: data <- data[chrom==chr,] data2 <- data2[chrom==chr,] kb <- kb[chrom==chr] chrom <- chrom[chrom==chr] chrominfo <- chrominfo[chr,] chrom.start <- c(0, cumsum(chrominfo$length))[chr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) chrom.length<-chrominfo$length par(cex = .6, pch = 16, lab = c(10, 6, 7), cex.axis = 1.2,xaxt="s", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] vec2 <- data2[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim=c(0,chrom.length/1000), col=data.col) title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) mtext(paste("Chromosome", chr, " (Mb)"), side = 1, at = (chrom.mid[chr]/1000), line=3, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) abline(h = seq(-1.5,1.5, b=.5), lty = 3,col="grey") abline(h=0,lty=3) abline(h=-(plotthresout[[k]]),lty=5,lwd=1.5,col="red") abline(h=plotthresout[[k]],lty=5,lwd=1.5,col="green") abline(v = (chrominfo$centromere) / 1000, lty = 3, col = "purple4") ## lines or points? if (data2.lines) lines(kb / 1000, vec2, col=data2.col,lwd=2) else points(kb / 1000, vec2, col=data2.col,lwd=2,pch=18,cex=1.2) } } ############################################################################################## ## ## statistics and plotting ## ############################################################################################## ## ## t-tests & freq stat plots ## SAM ## EBAM ## cox model & plots ## logranks & plots ## ############################################################################################### require (siggenes) ############################################################################################## ## ## plotFreqAll.Rcgh ## ## ## frequency plot of all data along genome ## user defined data and threshold ## ## writes table ## ## plotFreqAll.Rcgh <- function (aCGH.obj, chrominfo = hg_18, X = TRUE, Y = TRUE, cutplot = 0, thres = 0.263, ylm = c(-1, 1), numaut = 22, titles=NULL, data=log2.ratios(aCGH.obj) ) { ## aCGH arguments removed from function call threshold = TRUE; nlim = 1; onepage = TRUE; colored = TRUE; ngrid = 2; #p.thres = c(0.001, 0.01, 0.05, 0.1); mincolors = .1; quant.col = .11; col.scheme <- if (colored) list( #pal = # c("red", "blue", "green", # "gold")[1:length(p.thres)], gain.low = "white", gain.high = "green",loss.low = "red",loss.high = "white", abline1 = "blue",abline2 = "grey50", mtext ="purple4", #"red", kb.loc = "blue", abline3 = "black", abline4 = "grey50" ) else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50", gain.high = "grey0", loss.low = "grey0", loss.high = "grey50", abline1 = "grey50",abline2 = "grey50", mtext = "black", kb.loc = "black", abline3 = "black", abline4 = "grey50" ) datainfo <- clones.info(aCGH.obj) dataSign<-data colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: if (threshold) dataSign <- threshold.func(dataSign, thres) numsamp<-ncol(data) z<-rep(c(1),times=numsamp) tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply( ###1:nrow(colmatr), 1, function(j) gainloss.func(dat = data, cols = which(z== 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,z == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) dt <- cbind(dt, dataSign[ ,z == 1 ]) rsp <- c(rsp, rep(1, sum(z == 1))) rsp <- rsp - 1 ##Now preparing for plotting: numchr <- numaut if (X) numchr <- numchr + 1 if (Y) numchr <- numchr + 1 chrominfo <- chrominfo[ 1:numchr, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb ## take care of X and Y (bug fix) if (!Y) { which (datainfo$Chrom==24) ->yind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) 1), 1), lab = c(1, 8, 7), cex.main=1, cex.lab=0.75, tcl = -.2, xaxs = "i", xaxt="n", cex.axis=.75) g<-1 gl <- gainloss[[g]] ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", main=titles, type = "h", xlab = "", ylab = "Fraction gained or lost", pch = 18, ylim = ylm, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 3, col = "purple4") for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), #from side=3 line = .3, col = col.scheme$mtext, cex = .75,font=2) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .3, col = col.scheme$mtext, cex = .75,font=2) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .3, col = col.scheme$mtext, cex = .75,font=2) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]),#from side=3 line = .3, col = col.scheme$mtext, cex = .75,font=2) if (Y) if (i == numaut) mtext("Y", side = 1, at = (chrom.mid[numaut + 2]),#from side=3 line = .3, col = col.scheme$mtext, cex = .75,font=2) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .3, col = col.scheme$mtext, cex = .75,font=2) GLout<-c(NULL) gp<-as.vector(gl$gainP) lp<-as.vector(gl$lossP) GLout<-cbind(GLout,gp,lp) GLcnames<-c("PropGain","Proploss") colnames(GLout)<-GLcnames GLout<-cbind(datainfo,GLout) return (GLout) } ## gainloss.func from aCGH 1.0.6, upon which this was written ## aCGH 1.1.4 does not calculate in this way! gainloss.func <- function (dat, cols, thres, quant = .5) { if (length(thres) == 1) thres <- rep(thres, ncol(dat)) if (length(thres) != ncol(dat)) stop("Error: number of thresholds is not the same as number\ of samples") dt <- as.matrix(dat[ ,cols ]) thr <- thres[cols] loss.med <- loss <- gain.med <- gain <- rep(0, nrow(dt)) for (i in 1:nrow(dt)) if (!all(is.na(dt[ i, ]))) { x <- dt[ i, ] th <- thr[!is.na(x)] x <- x[!is.na(x)] tmp.gain <- x >= th gain[i] <- mean(tmp.gain) if (any(tmp.gain)) gain.med[i] <- quantile(x[tmp.gain], 1 - quant) tmp.loss <- x <= -th loss[i] <- mean(tmp.loss) if (any(tmp.loss)) loss.med[i] <- quantile(x[tmp.loss], quant) } list(gainP = gain, lossP = loss, gainMed = gain.med, lossMed = loss.med) } ########################################################################################## ## ## plotFreqChrom.Rcgh ## ## ## frequency plot of all data along specific chromosome ## user defined data and threshold ## ## does not write table ## ## plotFreqChrom.Rcgh <- function (aCGH.obj, chrom, chrominfo = hg_18, cutplot = 0, thres = 0.263, ylm = c(-1, 1), numaut = 22, titles=NULL, data=log2.ratios(aCGH.obj) ) { ## aCGH argumements removed from function call threshold = TRUE; quant.col = .11; onepage = TRUE; colored = TRUE; nlim = 1; ngrid = 2; p.thres = c(0.001, 0.01, 0.05, 0.1); mincolors = .1; col.scheme <- if (colored) list(pal = c("red", "blue", "green", "gold")[1:length(p.thres)], gain.low = "white", gain.high = "green",loss.low = "red",loss.high = "white", abline1 = "blue",abline2 = "grey50", mtext = "red", kb.loc = "blue", abline3 = "black", abline4 = "grey50") else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50", gain.high = "grey0", loss.low = "grey0", loss.high = "grey50", abline1 = "grey50",abline2 = "grey50", mtext = "black", kb.loc = "black", abline3 = "black", abline4 = "grey50") datainfo <- clones.info(aCGH.obj) dataSign<-data colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: if (threshold) dataSign <- threshold.func(dataSign, thres) numsamp<-ncol(data) z<-rep(c(1),times=numsamp) tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply( 1, function(j) gainloss.func(dat = data, cols = which(z== 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,z == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) dt <- cbind(dt, dataSign[ ,z == 1 ]) rsp <- c(rsp, rep(1, sum(z == 1))) rsp <- rsp - 1 ##Now preparing for plotting: numchr <- 1 chrom.ind <- (1:nrow(datainfo))[datainfo$Chrom==chrom] kb <- datainfo$kb[chrom.ind] chrominfo <- chrominfo[chrom, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length)) chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) 1), 1), lab = c(10, 8, 7),xaxt="s",cex.main=1,cex.lab=0.75, tcl = -.2, xaxs = "i", cex.axis=.75) g<-1 gl <- gainloss[[g]] ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) gl$gainP<-gl$gainP[chrom.ind] gl$lossP<-gl$lossP[chrom.ind] chrlab<-paste("Chromosome", chrom,sep="\ ") ind <- which(gl$gainP >= cutplot) plot(kb[ind]/1000, gl$gainP[ind], col = "green", type = "h", xlab = "", ylab = "Fraction gained or lost", pch = 18, ylim = ylm, main=titles, xlim = c(0, max(cumsum(chrominfo$length)/1000, kb[ind]/1000, rm.na = TRUE)) ) ## from plotChrom.Rcgh mtext(paste("Chromosome", chrom, " (Mb)"), side = 1, #at = (chrom.mid[i]/1000), line=3, col="purple4",cex=0.8,font=2) ind <- gl$lossP >= cutplot points(kb[ind]/1000, -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = chrom.centr/1000, lty = 3, col = "purple4") } ############################################################################################ ## ## plotFreqStat.Rcgh ## ## ## plot of frequency of alterations by phenotype ## also test statistics from multtest ## ## requires output from, e.g. maxT ## ## writes table ## ## plotFreqStat.Rcgh <- function(aCGH.obj, resT, pheno, chrominfo = hg_18, X = TRUE, Y = TRUE, cutplot = 0, titles = rsp.uniq, thres = 0.263, ylm = c(-1, 1), p.thres = c(0.01,0.05), numaut = 22, onepage = TRUE, statmax=6, data=log2.ratios(aCGH.obj) ) { ## aCGH arguments removed from function call minChanged = 0; all = FALSE; summarize.clones = TRUE; colored = TRUE; threshold = TRUE; rsp.uniq = unique(pheno); nlim = 1; ngrid = 2; mincolors = .1; quant.col = .11; col.scheme <- if (colored) list(pal = c("brown","darkorange", "gold","yellow")[1:length(p.thres)], gain.low = "white", gain.high = "green", loss.low = "red", loss.high = "white", abline1 = "blue", abline2 = "grey50", mtext = "purple4", kb.loc = "blue", abline3 = "black", abline4 = "grey50", ) else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50", gain.high = "grey0", loss.low = "grey0", loss.high = "grey50", abline1 = "grey50", abline2 = "grey50", mtext = "black", kb.loc = "black", abline3 = "black", abline4 = "grey50", ) datainfo <- clones.info(aCGH.obj) dataSign <- data rsp.uniq <- sort(rsp.uniq) ## creating response matrix colmatr if (!all) colmatr <- t( sapply(rsp.uniq, function(rsp.uniq.level) ifelse(pheno == rsp.uniq.level, 1, 0) ) ) ## screening out clones that are gained or lost in < minChanged in ## classes under comparison ## indeces present: tmp <- apply(as.matrix(colmatr), 2, sum) indecesnow <- which(tmp == 1) data.thres <- threshold.func(data, thresAbs = thres) prop.ch <- changeProp.func(dat = data.thres, colMatr = colmatr) maxch <- changeProp.overall.func(dat = data.thres[ ,indecesnow ]) clones.index <- which(maxch >= minChanged) ## ##removing clones to skip from the dataset data <- data[clones.index,] data.thres <- data.thres[clones.index,] dataSign <- dataSign[clones.index,] datainfo <- datainfo[clones.index,] ## start table: if (summarize.clones) bac.summary <- table.bac.func(dat = data.thres, colMatr = colmatr) ## creating color matrix for displaying intensity of gains and losses ## and for plotting p-values colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: if (threshold) dataSign <- threshold.func(dataSign, thres) nr <- nrow(colmatr) nr <- nr + 1 tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply(1:nrow(colmatr), function(j) gainloss.func(dat = data, cols = which(colmatr[ j, ] == 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,colmatr[1,] == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) for (j in 2:nrow(colmatr)) { dt <- cbind(dt, dataSign[ ,colmatr[ j, ] == 1 ]) rsp <- c(rsp, rep(j, sum(colmatr[ j, ] == 1))) } rsp <- rsp - 1 ## Process statistics ## for plotting test stats and p-values res <- resT[clones.index,] maxT <- res$adjp[order(res$index)] maxTok<-maxT[which(!is.na(maxT))] teststat <- abs(res$teststat[order(res$index)]) teststatok<-teststat[which(!is.na(maxT))] st.now <- sapply(p.thres, function(threshold) { if (any(maxTok <= threshold)) min(teststatok[maxTok <= threshold]) else NA } ) pal.now <- col.scheme$pal ##Now preparing for plotting: numchr <- numaut if (X) numchr <- numchr + 1 if (Y) numchr <- numchr + 1 chrominfo <- chrominfo[ 1:numchr, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb ## take care of X and Y (bug fix) if (!Y) { which (datainfo$Chrom==24) ->yind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) nr else 1), 1), lab = c(1, 8, 7), xaxt="n",cex.main=1.25,cex.lab=1, tcl = -.2, xaxs = "i", cex.axis=1) GLout<-c(NULL) for (g in 1:length(titles)) { gl <- gainloss[[g]] tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) gp<-as.vector(gl$gainP) lp<-as.vector(gl$lossP) GLout<-cbind(GLout,gp,lp) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", type = "h", xlab="", #xlab = "Chromosome number", ylab = "Fraction gained or lost", pch = 18, main = tl, ylim = ylm, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 3, col = "purple4") for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (Y) if (i == numaut) mtext("Y", side = 3, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) } plot(kb.loc, teststat, col = col.scheme$kb.loc, ylim = c(0, statmax), type = "h", xlab="", #xlab = "Chromosome number", ylab = "Test statistic", pch = 18, main = paste(titles, collapse = " vs "), #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) if (length(st.now) > 0) abline(h = rev(st.now), col = rev(pal.now), lty = 2) abline(v = cumsum(chrominfo$length), col = col.scheme$abline3) abline(v = chrom.centr, lty = 3, col = "purple4") for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .5, col = col.scheme$mtext, cex = .75,font=2) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .5, col = col.scheme$mtext, cex = .75,font=2) if (X) if (i == numaut) mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .5, col = col.scheme$mtext, cex = .75,font=2) if (Y) if (i == numaut) mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .5, col = col.scheme$mtext, cex = .75,font=2) ##reset plot layout par (mfrow=c(1,1)) ##write out table GLcnames<-c("PropGain.0","Proploss.0","PropGain.1","Proploss.1") colnames(GLout)<-GLcnames res$rawp[order(res$index)] -> rawP GLout<-cbind(datainfo,GLout,rawP, maxT) return (GLout) } ########################################################################################### ## ## plotFreqStatChrom.Rcgh ## ## ## plots frequency by class and teststats ## for individual chromosomes ## ## does not write table ## ## plotFreqStatChrom.Rcgh <- function(aCGH.obj, resT, pheno, chrom, chrominfo = hg_18, X = TRUE, Y = FALSE, statmax=6, cutplot = 0, titles = rsp.uniq, thres = 0.263, ylm = c(-1, 1), p.thres=c(0.01,0.05), numaut = 22, onepage = TRUE, data=log2.ratios(aCGH.obj) ) { ## aCGH arguments removed from function call threshold = TRUE; minChanged = 0; all = FALSE; rsp.uniq = unique(pheno); nlim = 1; ngrid = 2; mincolors = .1; quant.col = .11; colored = TRUE; col.scheme <- if (colored) list(pal = c("brown","darkorange","gold","yellow")[1:length(p.thres)], gain.low = "white", gain.high = "green",loss.low = "red", loss.high = "white", abline1 = "blue", abline2 = "grey50", mtext = "red", kb.loc = "blue", abline3 = "black", abline4 = "grey50", ) else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50",gain.high = "grey0", loss.low = "grey0", loss.high = "grey50", abline1 = "grey50", abline2 = "grey50", mtext = "black", kb.loc = "black", abline3 = "black", abline4 = "grey50", ) datainfo <- clones.info(aCGH.obj) dataSign <- data rsp.uniq <- sort(rsp.uniq) ## creating response matrix colmatr if (!all) colmatr <- t( sapply(rsp.uniq, function(rsp.uniq.level) ifelse(pheno == rsp.uniq.level, 1, 0) ) ) ## screening out clones that are gained or lost in < minChanged in ## classes under comparison ## indeces present: tmp <- apply(as.matrix(colmatr), 2, sum) indecesnow <- which(tmp == 1) data.thres <- threshold.func(data, thresAbs = thres) prop.ch <- changeProp.func(dat = data.thres, colMatr = colmatr) maxch <- changeProp.overall.func(dat = data.thres[ ,indecesnow ]) clones.index <- which(maxch >= minChanged) ##removing clones to skip from the dataset data <- data[clones.index,] data.thres <- data.thres[clones.index,] dataSign <- dataSign[clones.index,] datainfo <- datainfo[clones.index,] ## creating color matrix for displaying intensity of gains and losses ## and for plotting p-values colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: if (threshold) dataSign <- threshold.func(dataSign, thres) nr <- nrow(colmatr) nr <- nr + 1 tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply(1:nrow(colmatr), function(j) gainloss.func(dat = data, cols = which(colmatr[ j, ] == 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,colmatr[1,] == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) for (j in 2:nrow(colmatr)) { dt <- cbind(dt, dataSign[ ,colmatr[ j, ] == 1 ]) rsp <- c(rsp, rep(j, sum(colmatr[ j, ] == 1))) } rsp <- rsp - 1 ## Process statistics ## for plotting test stats and p-values res <- resT[clones.index,] maxT <- res$adjp[order(res$index)] maxTok<-maxT[which(!is.na(maxT))] teststat <- abs(res$teststat[order(res$index)]) teststatok<-teststat[which(!is.na(maxT))] st.now <- sapply(p.thres, function(threshold) { if (any(maxTok <= threshold)) min(teststatok[maxTok <= threshold]) else NA } ) pal.now <- col.scheme$pal ##Now preparing for plotting: numchr <- 1 nc<-1 chrom.ind <- (1:nrow(datainfo))[datainfo$Chrom==chrom] kb <- datainfo$kb[chrom.ind] chrominfo <- chrominfo[chrom, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc<-datainfo$kb for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length)) chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot if (onepage) {par( #par(mfrow = c((if (onepage) nr else 1), 1), mfrow=c(3,1), lab = c(10, 8, 7), xaxt="s", tcl = -.2, xaxs = "i")} else {par( mfrow=c(1,1), lab = c(10, 8, 7), xaxt="s", tcl = -.2, xaxs = "i")} for (g in 1:length(titles)) { gl <- gainloss[[g]] tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) gl$gainP<-gl$gainP[chrom.ind] chrlab<-paste("chromosome", chrom,sep="\ ") ind <- which(gl$gainP >= cutplot) plot(kb[ind], gl$gainP[ind], col = "green", type = "h", xlab = "", ylab = "Fraction gained or lost", pch = 18, main = tl, ylim = ylm, xlim = c(0, max(cumsum(chrominfo$length), kb[ind], rm.na = TRUE)) ) ## from plotChrom.Rcgh mtext(paste("Chromosome", chrom, " (kb)"), side = 1, #at = (chrom.mid[i]/1000), line=3, col="purple4",cex=0.8,font=2) gl$lossP<-gl$lossP[chrom.ind] ind <- which(gl$lossP >= cutplot) points(kb[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = chrom.centr, lty = 3, col = "purple4") } plot(kb, teststat[chrom.ind], col = col.scheme$kb.loc, ylim = c(0, statmax), type = "h", xlab = "", ylab = "Test statistic", pch = 18, main = paste(titles, collapse = " vs "), xlim = c(0, max(cumsum(chrominfo$length), kb, rm.na = TRUE)) ) if (length(st.now) > 0) abline(h = rev(st.now), col = rev(pal.now), lty = 2) abline(v = chrom.centr, lty = 3, col = "purple4") ## from plotChrom.Rcgh mtext(paste("Chromosome", chrom, " (bp)"), side = 1, #at = (chrom.mid[i]/1000), line=3, col="purple4",cex=0.8,font=2) ##reset plot layout par (mfrow=c(1,1)) } ############################################################################################ ## ## sam.Rcgh ## ## ## wrapper using SAM port into R by package "siggenes" ## ## user-defined fdr value - calculates delta based upon this ## does SAM plot ## ## writes table ## ## annoying print functions in siggenes which I may think about sorting out ## ## sam.Rcgh <- function (aCGH.obj, pheno, sam.fdr=0.1, sam.rand=123, sam.method="d.stat", data=log2.ratios(aCGH.obj)) { ## run SAM sam(data,pheno,method=sam.method, n.delta=25,rand=sam.rand)->samout ## calculate FDRs for a large range of deltas ##sam.delta(sam.out,seq(0,2,0.01))->calc.delta.tab summary(samout)->samoutsum samoutsumd<-as.data.frame(samoutsum) ## find delta appropriate to user-defined FDR samoutsumd$FDR->calc.fdr samoutsumd$Delta->calc.delta min(calc.fdr[which(calc.fdr>=sam.fdr)])->calc.min.fdr min(calc.delta[which(calc.fdr==calc.min.fdr)])->calc.min.delta plot(samout,calc.min.delta) sam(data,pheno,delta=calc.min.delta,rand=sam.rand)->samoutd summary(samoutd,delta=calc.min.delta,ll=FALSE,what="genes")->samsum samsum[3]->samsum3 samsum3<-as.data.frame(samsum3) ## make nice table merge(clones.info(aCGH.obj),samsum3,by.x=0,by.y=1)->sammergeout ord<-order(sammergeout$"mat.sig.q.value") fin<-sammergeout[ord,] rownames(fin)<-fin$Row.names fin<-fin[,-1] colnames(clones.info(aCGH.obj))->mapnames c("SAM.score","SAM.st.dev","SAM.p.value","SAM.q.value","SAM.fold.change")->samnames colnames(fin)<-c(mapnames,samnames) #colnames(fin)<-c("Clone","Chrom","kb","end","info","SAM.score","SAM.st.dev","SAM.p.value","SAM.q.value","SAM.fold.change") summary(samout,delta=calc.min.delta,ll=FALSE,what="stats") #return (samsum3) return (fin) } ####################################################################################### ## ## plotSam.Rcgh ## ## ## plots the SAM scores from sam.Rcgh along genome ## ## not sure how useful this is? ## ## plotSam.Rcgh <- function (aCGH.obj, samout, pheno, titles = rsp.uniq, chrominfo = hg_18, X = TRUE, Y = TRUE, cutplot = 0, thres = 0.263, ylm = c(-1, 1),onepage = TRUE, numaut = 22, samlim=c(-7.5,7.5), data=log2.ratios(aCGH.obj) ) { ## aCGH arguments removed from function call threshold = TRUE; nlim = 1; quant.col = .11; colored = TRUE;rsp.uniq = unique(pheno);summarize.clones = TRUE; ngrid = 2; p.thres = c(0.001,0.01,0.05, 0.1); mincolors = .1;all = FALSE;minChanged = 0; numbsamp<-ncol(data) col.scheme <- if (colored) list(pal = c("red", "blue", "green", "gold")[1:length(p.thres)], gain.low = "white", gain.high = "green",loss.low = "red", loss.high = "white", abline1 = "blue",abline2 = "grey50", mtext = "purple4", kb.loc = "blue", abline3 = "black", abline4 = "grey50", ) else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50", gain.high = "grey0", loss.low = "grey0",loss.high = "grey50", abline1 = "grey50", abline2 = "grey50", mtext = "black",kb.loc = "black", abline3 = "black", abline4 = "grey50", ) datainfo <- clones.info(aCGH.obj) dataSign <- data rsp.uniq <- sort(rsp.uniq) ## creating response matrix colmatr if (!all) colmatr <- t( sapply(rsp.uniq, function(rsp.uniq.level) ifelse(pheno == rsp.uniq.level, 1, 0) ) ) ## screening out clones that are gained or lost in < minChanged in ## classes under comparison ## indeces present: tmp <- apply(as.matrix(colmatr), 2, sum) indecesnow <- which(tmp == 1) data.thres <- threshold.func(data, thresAbs = thres) prop.ch <- changeProp.func(dat = data.thres, colMatr = colmatr) maxch <- changeProp.overall.func(dat = data.thres[ ,indecesnow ]) clones.index <- which(maxch >= minChanged) ## ##removing clones to skip from the dataset data <- data[clones.index,] data.thres <- data.thres[clones.index,] dataSign <- dataSign[clones.index,] datainfo <- datainfo[clones.index,] colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## start table: if (summarize.clones) bac.summary <- table.bac.func(dat = data.thres, colMatr = colmatr) ## creating color matrix for displaying intensity of gains and losses ## and for plotting p-values colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: if (threshold) dataSign <- threshold.func(dataSign, thres) nr <- nrow(colmatr) nr <- nr + 1 tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply(1:nrow(colmatr), function(j) gainloss.func(dat = data, cols = which(colmatr[ j, ] == 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,colmatr[1,] == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) for (j in 2:nrow(colmatr)) { dt <- cbind(dt, dataSign[ ,colmatr[ j, ] == 1 ]) rsp <- c(rsp, rep(j, sum(colmatr[ j, ] == 1))) } rsp <- rsp - 1 ##Now preparing for plotting: numchr <- numaut if (X) numchr <- numchr + 1 if (Y) numchr <- numchr + 1 chrominfo <- chrominfo[ 1:numchr, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb ## take care of X and Y (bug fix) if (!Y) { which (datainfo$Chrom==24) ->yind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) nr else 1), 1), lab = c(1, 8, 7), xaxt="n",cex.main=1.25,cex.lab=1, tcl = -.2, xaxs = "i", cex.axis=1) GLout<-c(NULL) for (g in 1:length(titles)) { gl <- gainloss[[g]] tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) gp<-as.vector(gl$gainP) lp<-as.vector(gl$lossP) GLout<-cbind(GLout,gp,lp) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", type = "h", xlab="", #xlab = "Chromosome number", ylab = "Fraction gained or lost", pch = 18, main = tl, ylim = ylm, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 3, col = "purple4") for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (Y) if (i == numaut) mtext("Y", side = 3, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) } ## from samout #sam.ind<-rownames(samout) #sam.score<-samout$d.i. ## fudge sam.ind<-rownames(samout) sam.score<-samout$SAM.score ##compute cumulative kb locations for SAM results datainfo.sam<-datainfo[sam.ind,] start.sam <- c(0, cumsum(chrominfo$length)) kb.loc.sam <- datainfo.sam$kb for (i in 1:nrow(chrominfo)) kb.loc.sam[datainfo.sam$Chrom == i] <- start.sam[i] + datainfo.sam$kb[datainfo.sam$Chrom == i] plot(kb.loc.sam, sam.score, col = "steelblue4", ylim = samlim, type = "h", xlab = "", ylab = "SAM score", pch = 18, main = paste(titles, collapse = " vs "), #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) abline(h=0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline3) abline(v = chrom.centr, lty = 2, col = col.scheme$abline4) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) ##reset plot layout par (mfrow=c(1,1)) } ################################################################################################# ## ## ebam.Rcgh ## ## ## empirical Bayes from siggenes ## ## ebam.Rcgh <- function (aCGH.obj, pheno, data=log2.ratios(aCGH.obj)) { clones.info(aCGH.obj) -> clonesinfo par(mfrow=c(1,2)) data<-as.matrix(data) find.a0(data, pheno) -> find.out ebam(find.out,R.fold=F,R.unlog=F) -> ebam.out ebam.out$row.sig.genes -> ebam.clones ebam.out$mat.Z.unsorted -> ebam.values ebam.values[ebam.clones,] -> ebam.clones.values ebam.clones.values <- cbind(ebam.clones,ebam.clones.values) colnames(ebam.clones.values) <- c("clone.id","EBAM.Z.score", "EBAM.prob") ebam.clones.values <- as.data.frame(ebam.clones.values) par(mfrow=c(1,1)) merge (clonesinfo, ebam.clones.values, by.x=0, by.y=1) -> ebam.clones.values.merge order(ebam.clones.values.merge$EBAM.prob, decreasing=TRUE) -> prob.ord ebam.clones.values.merge[prob.ord,] -> ebam.clones.values.out rownames(ebam.clones.values.out)<-ebam.clones.values.out$Row.names ebam.clones.values.out<-ebam.clones.values.out[,-1] return (ebam.clones.values.out) } ################################################################################################ ## ## plotEbam.Rcgh ## ## ## plots the Z scores from ebam.Rcgh along genome ## ## not sure how useful this is? ## ## plotEbam.Rcgh <- function (aCGH.obj, ebamtab, pheno, titles = rsp.uniq, chrominfo = hg_18, X = TRUE, Y = TRUE, cutplot = 0, thres = 0.263, ylm = c(-1, 1),onepage = TRUE, numaut = 22, ebamlim=c(-7.5,7.5), data=log2.ratios(aCGH.obj) ) { ## aCGH arguments removed from function call threshold = TRUE; nlim = 1; quant.col = .11; colored = TRUE;rsp.uniq = unique(pheno);summarize.clones = TRUE; ngrid = 2; p.thres = c(0.001,0.01,0.05, 0.1); mincolors = .1;all = FALSE;minChanged = 0; numbsamp<-ncol(data) col.scheme <- if (colored) list(pal = c("red", "blue", "green", "gold")[1:length(p.thres)], gain.low = "white", gain.high = "green",loss.low = "red", loss.high = "white", abline1 = "blue",abline2 = "grey50", mtext = "purple4", kb.loc = "blue", abline3 = "black", abline4 = "grey50", ) else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50", gain.high = "grey0", loss.low = "grey0",loss.high = "grey50", abline1 = "grey50", abline2 = "grey50", mtext = "black",kb.loc = "black", abline3 = "black", abline4 = "grey50", ) datainfo <- clones.info(aCGH.obj) dataSign <- data rsp.uniq <- sort(rsp.uniq) ## creating response matrix colmatr if (!all) colmatr <- t( sapply(rsp.uniq, function(rsp.uniq.level) ifelse(pheno == rsp.uniq.level, 1, 0) ) ) ## screening out clones that are gained or lost in < minChanged in ## classes under comparison ## indeces present: tmp <- apply(as.matrix(colmatr), 2, sum) indecesnow <- which(tmp == 1) data.thres <- threshold.func(data, thresAbs = thres) prop.ch <- changeProp.func(dat = data.thres, colMatr = colmatr) maxch <- changeProp.overall.func(dat = data.thres[ ,indecesnow ]) clones.index <- which(maxch >= minChanged) ## ##removing clones to skip from the dataset data <- data[clones.index,] data.thres <- data.thres[clones.index,] dataSign <- dataSign[clones.index,] datainfo <- datainfo[clones.index,] colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## start table: if (summarize.clones) bac.summary <- table.bac.func(dat = data.thres, colMatr = colmatr) ## creating color matrix for displaying intensity of gains and losses ## and for plotting p-values colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: if (threshold) dataSign <- threshold.func(dataSign, thres) nr <- nrow(colmatr) nr <- nr + 1 tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply(1:nrow(colmatr), function(j) gainloss.func(dat = data, cols = which(colmatr[ j, ] == 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,colmatr[1,] == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) for (j in 2:nrow(colmatr)) { dt <- cbind(dt, dataSign[ ,colmatr[ j, ] == 1 ]) rsp <- c(rsp, rep(j, sum(colmatr[ j, ] == 1))) } rsp <- rsp - 1 ##Now preparing for plotting: numchr <- numaut if (X) numchr <- numchr + 1 if (Y) numchr <- numchr + 1 chrominfo <- chrominfo[ 1:numchr, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb ## take care of X and Y (bug fix) if (!Y) { which (datainfo$Chrom==24) ->yind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) nr else 1), 1), lab = c(1, 8, 7), xaxt="n",cex.main=1.25,cex.lab=1, tcl = -.2, xaxs = "i", cex.axis=1) GLout<-c(NULL) for (g in 1:length(titles)) { gl <- gainloss[[g]] tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) gp<-as.vector(gl$gainP) lp<-as.vector(gl$lossP) GLout<-cbind(GLout,gp,lp) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", type = "h", xlab="", #xlab = "Chromosome number", ylab = "Fraction gained or lost", pch = 18, main = tl, ylim = ylm, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 3, col = "purple4") for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (Y) if (i == numaut) mtext("Y", side = 3, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) } ## from ebamtab ebamtab.Z.score <- ebamtab$EBAM.Z.score ebamtab.clone.id <- rownames(ebamtab) ##compute cumulative kb locations for SAM results datainfo.ebam<-datainfo[ebamtab.clone.id,] start.ebam <- c(0, cumsum(chrominfo$length)) kb.loc.ebam <- datainfo.ebam$kb for (i in 1:nrow(chrominfo)) kb.loc.ebam[datainfo.ebam$Chrom == i] <- start.ebam[i] + datainfo.ebam$kb[datainfo.ebam$Chrom == i] plot(kb.loc.ebam, ebamtab.Z.score, col = "midnightblue", ylim = ebamlim, type = "h", xlab = "", ylab = "EBAM Z score", pch = 18, main = paste(titles, collapse = " vs "), #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) abline(h=0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline3) abline(v = chrom.centr, lty = 2, col = col.scheme$abline4) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) ##reset plot layout par (mfrow=c(1,1)) } ########################################################################################## ## ## cox.Rcgh ## ## ## calculate cox score for each clone ## ## requires survival data (time, censor) ## ## writes table ## ## cox.Rcgh <- function (aCGH.obj, survtime, survcensor, chrominfo = hg_18, X = TRUE, Y = FALSE, data=log2.ratios(aCGH.obj) ) { ## aCGH and plotting arguments removed from function call threshold = TRUE; nlim = 1; quant.col = .11; colored = TRUE; ylm = c(-1, 1); numaut = 22;onepage = TRUE; ngrid = 2; p.thres = c(0.001,0.01,0.05, 0.1); mincolors = .1; cutplot = 0;coxlim=c(-15,15); coxP=0.05; thres = 0.263; datainfo <- clones.info(aCGH.obj) numbsamp<-ncol(data) # get time and censor which(colnames(phenotype(aCGH.obj))==survtime)->st phenotype(aCGH.obj)[[st]]->stime which(colnames(phenotype(aCGH.obj))==survcensor)->sc phenotype(aCGH.obj)[[sc]]->scensor xC<-data xC<-as.data.frame(xC) xC<-t(xC) xC<-as.data.frame(xC) #yC<- aCGHsurv #yC<-as.data.frame(yC) zC<-c(NULL) zP<-c(NULL) for (i in 1:ncol(xC)) { tmp<-coxph(Surv(stime,scensor)~xC[,i],data=xC,na.action=na.exclude) tmpp<-1-(pchisq(tmp$wald.test,df=1)) zC<-c(zC,tmp$coef) zP<-c(zP,tmpp) } zC<-as.vector(zC) zP<-as.vector(zP) zC<-t(zC) zP<-t(zP) COXout<-c(NULL) zCt<-t(zC) zPt<-t(zP) zPt.fdr<-p.adjust(zPt,"fdr") COXout<-cbind(zCt,zPt,zPt.fdr) colnames(COXout)<-c("Cox.score","raw.p", "fdr.p") COXout<-cbind(datainfo,COXout) return (COXout) } ################################################################################################# ## ## cox.covar.Rcgh ## ## ## calculate cox score for each clone ## allowing (currently only one) covariate to be included in the model ## ## requires survival data (time, censor) ## ## writes table ## ## cox.covar.Rcgh <- function (aCGH.obj, survtime, survcensor, survcovar, chrominfo = hg_18, X = TRUE, Y = FALSE, data=log2.ratios(aCGH.obj) ) { ## aCGH and plotting arguments removed from function call threshold = TRUE; nlim = 1; quant.col = .11; colored = TRUE; ylm = c(-1, 1); numaut = 22;onepage = TRUE; ngrid = 2; p.thres = c(0.001,0.01,0.05, 0.1); mincolors = .1; cutplot = 0;coxlim=c(-15,15); coxP=0.05; thres = 0.263; datainfo <- clones.info(aCGH.obj) numbsamp<-ncol(data) xC<-data xC<-as.data.frame(xC) xC<-t(xC) xC<-as.data.frame(xC) # get time and censor which(colnames(phenotype(aCGH.obj))==survtime)->st phenotype(aCGH.obj)[[st]]->stime which(colnames(phenotype(aCGH.obj))==survcensor)->sc phenotype(aCGH.obj)[[sc]]->scensor which(colnames(phenotype(aCGH.obj))==survcovar)->sv phenotype(aCGH.obj)[[sv]]->phenocov #C<- aCGHsurv #yC<-as.data.frame(yC) zC<-c(NULL) zP<-c(NULL) for (i in 1:ncol(xC)) { tmp<-coxph(Surv(stime,scensor)~xC[,i]+phenocov,data=xC,na.action=na.exclude) #tmpp<-1-(pchisq(tmp$wald.test,df=1)) tcoef <- tmp$coef tcoef[[1]]->tcoef tse <- sqrt(diag(tmp$var)) tse[[1]]->tse 1 - pchisq((tcoef/ tse)^2, 1)->tmpp zC<-c(zC,tcoef) zP<-c(zP,tmpp) } zC<-as.vector(zC) zP<-as.vector(zP) zC<-t(zC) zP<-t(zP) COXout<-c(NULL) zCt<-t(zC) zPt<-t(zP) zPt.fdr<-p.adjust(zPt,"fdr") COXout<-cbind(zCt,zPt,zPt.fdr) colnames(COXout)<-c("Cox.score","raw.p", "fdr.p") COXout<-cbind(datainfo,COXout) return (COXout) } ################################################################################# ## ## plotCox.Rcgh ## ## ## plots cox score (filtered by adjusted P) ## needs cox scores and p values from cox.Rcgh ## also plots FreqAll ## ## plotCox.Rcgh <- function (aCGH.obj, coxout, chrominfo = hg_18, X = TRUE, Y = TRUE, cutplot = 0, thres = 0.263, ylm = c(-1, 1),onepage = TRUE, numaut = 22, coxlim=c(-10,10), coxP=0.05, title1=NULL,title2=NULL, data=log2.ratios(aCGH.obj) ) { ## aCGH arguments removed from function call threshold = TRUE; nlim = 1; quant.col = .11; colored = TRUE; ngrid = 2; p.thres = c(0.001,0.01,0.05, 0.1); mincolors = .1; numbsamp<-ncol(data) col.scheme <- if (colored) list(pal = c("red", "blue", "green", "gold")[1:length(p.thres)], gain.low = "white", gain.high = "green",loss.low = "red", loss.high = "white", abline1 = "blue",abline2 = "grey50", mtext = "red", kb.loc = "blue", abline3 = "black", abline4 = "grey50", ) else list(pal = c("grey10", "grey40", "grey70", "grey90")[1:length(p.thres)], gain.low = "grey50", gain.high = "grey0", loss.low = "grey0",loss.high = "grey50", abline1 = "grey50", abline2 = "grey50", mtext = "black",kb.loc = "black", abline3 = "black", abline4 = "grey50", ) datainfo <- clones.info(aCGH.obj) dataSign <- data colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) if (threshold) dataSign <- threshold.func(dataSign, thres) z<-rep(c(1),times=numbsamp) tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply( 1, function(j) gainloss.func(dat = data, cols = which(z== 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,z == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) dt <- cbind(dt, dataSign[ ,z == 1 ]) rsp <- c(rsp, rep(1, sum(z == 1))) rsp <- rsp - 1 numchr <- numaut if (X) numchr <- numchr + 1 if (Y) numchr <- numchr + 1 chrominfo <- chrominfo[ 1:numchr, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb ## take care of X and Y (bug fix) if (!Y) { which (datainfo$Chrom==24) ->yind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) 2), 1), lab = c(1, 8, 7),xaxt="n", cex.main=0.8,cex.lab=0.75, tcl = -.2, xaxs = "i", cex.axis=0.75) g<-1 gl <- gainloss[[g]] ### tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", type = "h", xlab = "", ylab = "Fraction gained or lost", pch = 18, ylim = ylm,main=title1, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 2, col = col.scheme$abline2) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .3, col = "purple4",font=2, cex = .75) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .3, col = "purple4",font=2, cex = .75) ## from coxout zC <- coxout$Cox.score zP<- coxout$fdr.p indP <- which(zP <= coxP) #abs(zC)->zCa plot(kb.loc[indP], zC[indP], col = "turquoise4", ylim = coxlim, type = "h", xlab = "", ylab = "Cox score", pch = 18,main=title2, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) abline(h=0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline3) abline(v = chrom.centr, lty = 2, col = col.scheme$abline4) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) ##reset plot layout par (mfrow=c(1,1)) } ########################################################################################### ## ## logrank.Rcgh ## ## ## calculate logrank score for each clone ## ## requires survival data (time, censor) ## ## writes table ## ## logrank.Rcgh <- function (aCGH.obj, survtime, survcensor, chrominfo = hg_18, X = TRUE, Y = FALSE, thres = .263, data=log2.ratios(aCGH.obj) ) { ## aCGH and plotting arguments removed from function call threshold = TRUE; ngrid = 2; p.thres = c(0.01,0.05, 0.1, 0.2); mincolors = .1;cutplot = 0; quant.col = .11; onepage = TRUE; colored = TRUE; nlim = 1; numaut = 22; lrlim=c(-15,15); ylm = c(-1, 1); logrankP=0.05; datainfo <- clones.info(aCGH.obj) datax <- data ncol(datax)->nsam # get time and censor which(colnames(phenotype(aCGH.obj))==survtime)->st phenotype(aCGH.obj)[[st]]->stime which(colnames(phenotype(aCGH.obj))==survcensor)->sc phenotype(aCGH.obj)[[sc]]->scensor ### calc gains thres->chkthres if (length(chkthres)==1) chkthresout<-rep(chkthres,times=ncol(datax)) else chkthres->chkthresout xL<-gainTable(aCGH.obj,chkthresout,x=datax) xG<-as.data.frame(xL) xG<-t(xG) xG<-as.data.frame(xG) #yL<-aCGHsurv #yL<-as.data.frame(yL) zL<-c(NULL) zChi<-c(NULL) for (i in 1:ncol(xG)) { sum(xG[,i])->Si if (Si==nsam) { tmpp<-NA tmpChi<-NA zL<-c(zL,tmpp) zChi<-c(zChi,tmpChi) next } if (Si<1) { tmpp<-NA tmpChi<-NA zL<-c(zL,tmpp) zChi<-c(zChi,tmpChi) next } else { tmp <- survdiff (Surv (stime,scensor) ~ xG[,i], data=xG, na.action=na.exclude) tmpp<- 1 - (pchisq(tmp$chisq, df=1)) tmpChi<-tmp$chisq } zL<-c(zL,tmpp) zChi<-c(zChi,tmpChi) } ## calc losses aL<-lossTable(aCGH.obj,chkthresout,x=datax) aG<-as.data.frame(aL) aG<-t(aG) aG<-as.data.frame(aG) #bL<-aCGHsurv #bL<-as.data.frame(bL) dL<-c(NULL) dChi<-c(NULL) for (i in 1:ncol(aG)) { sum(aG[,i])->Si if (Si==nsam) { tmpp<-NA tmpChi<-NA dL<-c(dL,tmpp) dChi<-c(dChi,tmpChi) next } if (Si<1) { tmpp<-NA tmpChi<-NA dL<-c(dL,tmpp) dChi<-c(dChi,tmpChi) next } else { tmp <- survdiff (Surv (stime,scensor) ~ aG[,i], data=aG, na.action=na.exclude) tmpp<- 1 - (pchisq(tmp$chisq, df=1)) tmpChi<-tmp$chisq } dL<-c(dL,tmpp) dChi<-c(dChi,tmpChi) } zLf<-as.matrix(zL) zLf.fdr<-p.adjust(zLf,"fdr") zLf.fdr<-as.matrix(zLf.fdr) zLp<-cbind(zL,zLf.fdr) dLf<-as.matrix(dL) dLf.fdr<-p.adjust(dLf,"fdr") dLf.fdr<-as.matrix(dLf.fdr) ALLout<-cbind(zChi,zL,zLf.fdr,dChi,dL,dLf.fdr) colnames(ALLout)<-c("GAIN.chi.sq","GAIN.log.rank","GAIN.adjP.fdr","LOSS.chi.sq","LOSS.log.rank","LOSS.adjP.fdr") v<-cbind(datainfo,ALLout) return (v) } ## gainTable/lossTable ## ## internal functions to logrank.Rcgh ## calculate gains and losses and write binned tables ## ## ## make binned table of gains gainCalc<- function (y,thres) { if (y>thres) return(1) else return(0) } gainTable<- function(aCGH.obj, calcthres, x, chrominfo=hg_17 ) { gainOUT<-c(NULL) #x <- hmmPred.ratios(aCGH.obj) datainfo <- clones.info(aCGH.obj) for (j in 1:ncol(x)) { gainJ<- sapply(x[,j],gainCalc,thres=calcthres[j]) gainOUT<-cbind(gainOUT,gainJ) } colnames(gainOUT)<-colnames(x) return (gainOUT) } ###################################################### ## ## ## fix properly!!!! ### ## gainTable2<- function(aCGH.obj, calcthres, x, chrominfo=hg_17 ) { gainOUT<-c(NULL) #x <- hmmPred.ratios(aCGH.obj) datainfo <- clones.info(aCGH.obj) for (j in 1:ncol(x)) { gainJ<- sapply(x[,j],gainCalc,thres=calcthres) gainOUT<-cbind(gainOUT,gainJ) } colnames(gainOUT)<-colnames(x) return (gainOUT) } lossCalc<- function (y,thres) { if (yyind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) 2), 1), lab = c(1, 8, 7), xaxt="n",cex.main=0.8,cex.lab=0.75, tcl = -.2, xaxs = "i",cex.axis=0.75) ### for (g in 1:length(titles)) ### { g<-1 gl <- gainloss[[g]] ### tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) # ) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", type = "h", xlab = "", ylab = "Fraction gained or lost", pch = 18, ylim = ylm, main=title1, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 2, col = col.scheme$abline2) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .3, col = "purple4",font=2, cex = .75) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .3, col = "purple4",font=2, cex = .75) ## retrieve from logrankout zL <- logrankout$GAIN.adjP.fdr zChi <- logrankout$GAIN.chi.sq dL <- logrankout$LOSS.adjP.fdr dChi <- logrankout$LOSS.chi.sq ## plot gains indP <- which(zL <= logrankP) zLi<- -(log2(zL)) plot(kb.loc[indP], zLi[indP], col = "darkorange4", ylim = lrlim, type = "h", xlab = "", ylab = "Log rank adj P", pch = 18, main=title2, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ## plot losses indPl <- which(dL <= logrankP) dLi<- log2(dL) points(kb.loc[indPl], dLi[indPl], col = "darkorange4", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline3) abline(v = chrom.centr, lty = 2, col = col.scheme$abline4) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) ##reset plot layout par (mfrow=c(1,1)) } ########################################################################################### ## ## fisher.Rcgh ## ## ## fisher's exact test for each clone ## in relation to given phenotype ## ## gains and losses thresholded separately ## ## writes table ## ## fisher.Rcgh <- function (aCGH.obj, pheno, chrominfo = hg_18, thres = .263, data=log2.ratios(aCGH.obj) ) { ## aCGH and plotting arguments removed from function call threshold = TRUE; ngrid = 2; p.thres = c(0.01,0.05, 0.1, 0.2); mincolors = .1;cutplot = 0; quant.col = .11; onepage = TRUE; colored = TRUE; nlim = 1;X = TRUE; Y = FALSE; numaut = 22; lrlim=c(-15,15); ylm = c(-1, 1); logrankP=0.05; datainfo <- clones.info(aCGH.obj) datax <- data ncol(datax)->nsam ### calc gains thres->chkthres if (length(chkthres)==1) chkthresout<-rep(chkthres,times=ncol(datax))else chkthres->chkthresout xL<-gainTable(aCGH.obj,chkthresout,x=datax) xG<-as.data.frame(xL) xG<-t(xG) xG<-as.data.frame(xG) #yL<-pheno #yL<-as.data.frame(yL) zP<-c(NULL) zOR<-c(NULL) for (i in 1:ncol(xG)) { sum(xG[,i])->Si if (Si==nsam) { tmpP<-NA tmpOR<-NA zP<-c(zP,tmpP) zOR<-c(zOR,tmpOR) next } if (Si<1) { tmpP<-NA tmpOR<-NA zP<-c(zP,tmpP) zOR<-c(zOR,tmpOR) next } else { tmp <- fisher.test (xG[,i], pheno) tmpP<- tmp$p.value tmpOR<-tmp$estimate } zP<-c(zP,tmpP) zOR<-c(zOR,tmpOR) } ## calc losses aL<-lossTable(aCGH.obj,chkthresout,x=datax) aG<-as.data.frame(aL) aG<-t(aG) aG<-as.data.frame(aG) #bL<-aCGHsurv #bL<-as.data.frame(bL) dP<-c(NULL) dOR<-c(NULL) for (i in 1:ncol(aG)) { sum(aG[,i])->Si if (Si==nsam) { tmpP<-NA tmpOR<-NA dP<-c(dP,tmpP) dOR<-c(dOR,tmpOR) next } if (Si<1) { tmpP<-NA tmpOR<-NA dP<-c(dP,tmpP) dOR<-c(dOR,tmpOR) next } else { tmp <- fisher.test (aG[,i], pheno) tmpP<- tmp$p.value tmpOR<-tmp$estimate } dP<-c(dP,tmpP) dOR<-c(dOR,tmpOR) } zPf<-as.matrix(zP) zPf.fdr<-p.adjust(zPf,"fdr") zPf.fdr<-as.matrix(zPf.fdr) zPp<-cbind(zP,zPf.fdr) dPf<-as.matrix(dP) dPf.fdr<-p.adjust(dPf,"fdr") dPf.fdr<-as.matrix(dPf.fdr) ALLout<-cbind(zOR,zP,zPf.fdr,dOR,dP,dPf.fdr) colnames(ALLout)<-c("GAIN.odds.ratio","GAIN.fisher","GAIN.adjP.fdr","LOSS.odds.ratio","LOSS.fisher","LOSS.adjP.fdr") v<-cbind(datainfo,ALLout) return (v) } ## gainTable/lossTable ## ## internal functions to logrank.Rcgh ## calculate gains and losses and write binned tables ## ## ## make binned table of gains gainCalc<- function (y,thres) { if (y>thres) return(1) else return(0) } gainTable<- function(aCGH.obj, calcthres, x, chrominfo=hg_17 ) { gainOUT<-c(NULL) #x <- hmmPred.ratios(aCGH.obj) datainfo <- clones.info(aCGH.obj) for (j in 1:ncol(x)) { gainJ<- sapply(x[,j],gainCalc,thres=calcthres[j]) gainOUT<-cbind(gainOUT,gainJ) } colnames(gainOUT)<-colnames(x) return (gainOUT) } lossCalc<- function (y,thres) { if (y= minChanged) ## ##removing clones to skip from the dataset data <- data[clones.index,] data.thres <- data.thres[clones.index,] dataSign <- dataSign[clones.index,] datainfo <- datainfo[clones.index,] colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## start table: if (summarize.clones) bac.summary <- table.bac.func(dat = data.thres, colMatr = colmatr) ## creating color matrix for displaying intensity of gains and losses ## and for plotting p-values colors.gain <- maPalette(low = col.scheme$gain.low, high = col.scheme$gain.high, k = ngrid) colors.loss <- maPalette(low = col.scheme$loss.low, high = col.scheme$loss.high, k = ngrid) ## Now, start: ## if perform significance analysis on thresholded data only: #if (threshold) dataSign <- threshold.func(dataSign, thres) nr <- nrow(colmatr) nr <- nr + 1 tmp <- as.data.frame(matrix(0, ncol = 2, nrow = 1)) colnames(tmp) <- c("gainP", "lossP") gainloss <- lapply(1:nrow(colmatr), function(j) gainloss.func(dat = data, cols = which(colmatr[ j, ] == 1), thres = thres, quant = quant.col) ) dt <- dataSign[ ,colmatr[1,] == 1, drop = FALSE ] rsp <- rep(1, ncol(dt)) for (j in 2:nrow(colmatr)) { dt <- cbind(dt, dataSign[ ,colmatr[ j, ] == 1 ]) rsp <- c(rsp, rep(j, sum(colmatr[ j, ] == 1))) } rsp <- rsp - 1 ##Now preparing for plotting: numchr <- numaut if (X) numchr <- numchr + 1 if (Y) numchr <- numchr + 1 chrominfo <- chrominfo[ 1:numchr, ] ##compute cumulative kb locations start <- c(0, cumsum(chrominfo$length)) kb.loc <- datainfo$kb ## take care of X and Y (bug fix) if (!Y) { which (datainfo$Chrom==24) ->yind kb.loc <- kb.loc[-yind] } if (!X) { which (datainfo$Chrom==23) ->xind kb.loc <- kb.loc[-xind] } for (i in 1:nrow(chrominfo)) kb.loc[datainfo$Chrom == i] <- start[i] + datainfo$kb[datainfo$Chrom == i] ## preparation for graphs chrom.start <- c(0, cumsum(chrominfo$length))[1:numchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 ##now, plot par(mfrow = c((if (onepage) nr else 1), 1), lab = c(1, 8, 7), xaxt="n",cex.main=1.25,cex.lab=1, tcl = -.2, xaxs = "i", cex.axis=1) GLout<-c(NULL) for (g in 1:length(titles)) { gl <- gainloss[[g]] tl <- as.character(titles[g]) ylm[1] <- min(ylm, min(gl$lossP)) ylm[2] <- max(ylm, max(gl$gainP)) gp<-as.vector(gl$gainP) lp<-as.vector(gl$lossP) GLout<-cbind(GLout,gp,lp) ind <- which(gl$gainP >= cutplot) plot(kb.loc[ind], gl$gainP[ind], col = "green", type = "h", xlab="", #xlab = "Chromosome number", ylab = "Fraction gained or lost", pch = 18, main = tl, ylim = ylm, #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ind <- gl$lossP >= cutplot points(kb.loc[ind], -gl$lossP[ind], col = "red", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline1) abline(v = chrom.centr, lty = 3, col = "purple4") for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (X) if (i == numaut) mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("X", side = 1, at = (chrom.mid[numaut + 1]), line = .5, col = col.scheme$mtext, cex = .75,font=2) if (Y) if (i == numaut) mtext("Y", side = 3, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) else mtext("Y", side = 1, at = (chrom.mid[numaut + 2]), line = .5, col = col.scheme$mtext, cex = .75,font=2) } ## retrieve from fisherout zP <- fisherout$GAIN.adjP.fdr zOR <- fisherout$GAIN.odds.ratio dP <- fisherout$LOSS.adjP.fdr dOR <- fisherout$LOSS.odds.ratio ## plot gains indP <- which(zP <= fisherP) #zLi<- -(log2(zL)) for (h in 1:length (zOR)) { if (is.infinite(zOR[h])) zOR[h]<- 10 else next } #zLi <- zOR plot(kb.loc[indP], zOR[indP], col = "grey30", ylim = fisherlim, type = "h", xlab = "", ylab = "Odds Ratio", pch = 18, main=paste(titles, collapse = " vs "), #xlim = c(0, max(cumsum(chrominfo$length), kb.loc[ind], #rm.na = TRUE)) xlim=c(0,max(cumsum(chrominfo$length)[1:numchr])) ) ## plot losses indPl <- which(dP <= fisherP) #dLi<- log2(dL) for (h in 1:length (dOR)) { if (is.infinite(dOR[h])) dOR[h]<- 10 else next } points(kb.loc[indPl], -dOR[indPl], col = "grey30", type = "h") abline(h = 0) abline(v = cumsum(chrominfo$length), col = col.scheme$abline3) abline(v = chrom.centr, lty = 2, col = col.scheme$abline4) for (i in seq(1, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) for (i in seq(2, numaut, b = 2)) mtext(paste("", i), side = 1, at = chrom.mid[i], line = .3, col = "purple4",font=2, cex = .75) if (X) if (i == numaut) mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) else mtext("X", side = 1, at = chrom.mid[numaut + 1], line = .3, col = "purple4",font=2, cex = .75) if (Y) if (i == numaut) mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) else mtext("Y", side = 1, at = chrom.mid[numaut + 2], line = .3, col = "purple4",font=2, cex = .75) ##reset plot layout par (mfrow=c(1,1)) } ############################################################################################## ############################################################################################## ## ## correlations etc ## ## need to tidy this up!!! ## ############################################################################################### ############################################################################################### ###################################################################################################### ## ## scatMat.Rcgh ## ## ## scatMat.Rcgh <- function (dat) { pairs(dat,xlim=c(-2,2),ylim=c(-2,2), panel=function(x,y,...) { points(x,y,col="slategray4",cex=0.6,pch=16) abline(h=seq(-2,2,b=0.5),lty=3,col="grey") abline(h=0,lty=3) abline(v=seq(-1.5,1.5,b=0.5),lty=3,col="grey") abline(v=0,lty=3) lm(y~x)->lm.out abline(lm.out,,lty=2,col=4) cor(x,y,use="pairwise", "pearson")->dat.cor round(dat.cor,digits=5)->dat.cor text(1,-1.2, dat.cor) } ) } ##################################################################################################### ## ## scatBland.Rcgh ## ## ## scatBland.Rcgh <- function (dat,main=NULL) { pairs(dat,xlim=c(-2,2),ylim=c(-2,2), main=main, upper.panel=function(x,y,...) { points(x,y,col="slategray4",cex=0.8,pch=16) abline(h=seq(-2,2,b=0.5),lty=3,col="grey") abline(h=0,lty=3) abline(v=seq(-1.5,1.5,b=0.5),lty=3,col="grey") abline(v=0,lty=3) lm(y~x)->lm.out abline(lm.out,,lty=2,col=4) cor(x,y,use="pairwise", "pearson")->dat.cor round(dat.cor,digits=5)->dat.cor text(1,-1.2, dat.cor) }, lower.panel=function(x,y,...) { mn <<- 0.5*(x+y) diff <- x-y points(mn,diff,pch=16,cex=0.8,col="antiquewhite4")#xlab="Mean",ylab="Difference") #sub= paste("Mean = ",formatC(mean(diff,na.rm=T),digits=3)), ) abline(h=seq(-2,2,b=0.5),lty=3,col="grey") abline(h=0,lty=3) abline(v=seq(-1.5,1.5,b=0.5),lty=3,col="grey") abline(v=0,lty=3) abline(h=mean(diff,na.rm=T),lty=4,col="purple4") abline(h=mean(diff,na.rm=T)+c(2,-2)*sqrt(var(diff,na.rm=T)),lty=6,col="purple") } ) } #################################################################################################### ## ## scatChromAll.Rcgh ## ## ## a wee wrapper... ## scatChromAll.Rcgh <- function(aCGH.obj, data=log2.ratios(aCGH.obj)) { for (i in 1:24) { scatChrom.Rcgh(aCGH.obj, chrom=i, data=data) } } ###################################################################################################### ## ## scatChrom.Rcgh ## ## ## scatChrom.Rcgh <- function (aCGH.obj, chrom, data) { clones.info(aCGH.obj)->clonesinfo clonesinfo[clonesinfo$Chrom==chrom,]->dat.chrom which (clonesinfo$Chrom==chrom)->dat.chrom.ind data[dat.chrom.ind,]->dat pairs(dat,main=paste("Chromosome",chrom),#xlim=c(-2,2),ylim=c(-2,2), upper.panel=function(x,y,...) { par(xaxt="n",yaxt="n",usr=c(-2,2,-2,2)) points(x,y,col="slategray4",cex=0.6,pch=16) abline(h=seq(-2,2,b=0.5),lty=3,col="grey") abline(h=0,lty=3) abline(v=seq(-1.5,1.5,b=0.5),lty=3,col="grey") abline(v=0,lty=3) lm(y~x)->lm.out abline(lm.out,,lty=2,col=4) cor(x,y,use="pairwise", "pearson")->dat.cor round(dat.cor,digits=5)->dat.cor text(1,-1.2, dat.cor) }, lower.panel=function(x,y,...) { #mn <<- 0.5*(x+y) par(usr=c(0,max(dat.chrom$kb),-2,2),xaxt="n",yaxt="n") diff <- x-y points(dat.chrom$kb,diff,pch=16,cex=0.6,col="darkblue")#xlab="Mean",ylab="Difference") #sub= paste("Mean = ",formatC(mean(diff,na.rm=T),digits=3)), ) #abline(h=seq(-2,2,b=0.5),lty=3,col="grey") abline(h=0,lty=1) abline(v=hg_17$centromere[chrom],lty=3,col="purple4") abline(h=0.263,lty=2,col="green") abline(h=-0.263,lty=2,col="red") abline(h=seq(-1.5,1.5,b=0.5),lty=3,col="grey") #abline(v=0,lty=3) #abline(h=mean(diff,na.rm=T),lty=4,col="purple4") #abline(h=mean(diff,na.rm=T)+c(2,-2)*sqrt(var(diff,na.rm=T)),lty=6,col="purple") } ) } ################################################################################################### ## ## corrMatPlots.Rcgh ## ## ## another wrapper! correlations.Rcgh <- function (aCGH.obj, data=log2.ratios(aCGH.obj)) { scatBland.Rcgh(data,main=c("Whole Genome")) scatChromAll.Rcgh(aCGH.obj, data=data) } ################################################################################################### ## ## corrMatPlots.Rcgh ## ## ## another wrapper! corrMatPlotsPdf.Rcgh <- function (aCGH.obj,data=log2.ratios(aCGH.obj), out="C:\\corrplots.pdf") { pdf(width=12,height=12,file=out,pointsize=8) scatBland.Rcgh(data,main=c("Whole Genome")) scatChromAll.Rcgh(aCGH.obj,data=data) dev.off() } ########################################################################################### ## ## matrix correlations ## ########################################################################################### ########################################################################################### ## ## matrix.Rcgh ## ## ## does Pearson's correlations as default ## and currently only one colour scheme ## ## ## this is the generic function ## ## currently untested ## ## matrix.Rcgh <- function ( aCGH.obj, data=thres.ratios(aCGH.obj), cor.method="pearson" ) { maPalette(low="darkblue",mid="white",high="darkred",k=8) -> matcols mat.data <- t(as.matrix(data)) cor (mat.data, method = cor.method) -> cor.tab image (cor.tab, xaxt="n", yaxt="n", col=matcols) clones.info(aCGH.obj) -> clones nrow (clones) -> clone.tot abline(v=0,lty=1) abline(v=1,lty=1) abline(h=0,lty=1) abline(h=1,lty=1) cum.clone <- 0 for (i in 1:22) { clones[clones$Chrom==i,] -> clones.chrom clones[clones$Chrom==i-1,] -> clones.chrom.prev cum.clone.prev <- cum.clone cum.clone.prev/clone.tot -> chrom.line.prev cum.clone <- cum.clone + nrow(clones.chrom) cum.clone/clone.tot -> chrom.line abline(v=chrom.line,lty=1) abline(h=chrom.line,lty=1) chrom.line.prev + ((chrom.line - chrom.line.prev)/2) -> lab.pos mtext(side=1,at=lab.pos,i,cex=0.75,col="purple4",font=2,line=0.15) mtext(side=2,at=lab.pos,i,cex=0.75,col="purple4",font=2,line=0.35) } i=23 clones[clones$Chrom==i,] -> clones.chrom clones[clones$Chrom==i-1,] -> clones.chrom.prev cum.clone.prev <- cum.clone cum.clone.prev/clone.tot -> chrom.line.prev cum.clone <- cum.clone + nrow(clones.chrom) cum.clone/clone.tot -> chrom.line abline(v=chrom.line,lty=1) abline(h=chrom.line,lty=1) chrom.line.prev + ((chrom.line - chrom.line.prev)/2) -> lab.pos mtext(side=1,at=lab.pos,"X",cex=0.75,col="purple4",font=2,line=0.15) mtext(side=2,at=lab.pos,"X",cex=0.75,col="purple4",font=2,line=0.35) i=24 clones[clones$Chrom==i,] -> clones.chrom clones[clones$Chrom==i-1,] -> clones.chrom.prev cum.clone.prev <- cum.clone cum.clone.prev/clone.tot -> chrom.line.prev cum.clone <- cum.clone + nrow(clones.chrom) cum.clone/clone.tot -> chrom.line abline(v=chrom.line,lty=1) abline(h=chrom.line,lty=1) chrom.line.prev + ((chrom.line - chrom.line.prev)/2) -> lab.pos mtext(side=1,at=lab.pos,"Y",cex=0.75,col="purple4",font=2,line=0.15) mtext(side=2,at=lab.pos,"Y",cex=0.75,col="purple4",font=2,line=0.35) cbind (clones,cor.tab) -> cor.out return (cor.out) } ############################################################################################# ## ## matrix.4.6.Rcgh ## ## hard-coded for 4.6K array issues!! ## ## matrix.4.6K.Rcgh <- function ( aCGH.obj, data=thres.ratios(aCGH.obj), cor.method="pearson" ) { maPalette(low="darkblue",mid="white",high="darkred",k=8) -> matcols mat.data <- t(as.matrix(data)) cor (mat.data, method = cor.method) -> cor.tab image (cor.tab, xaxt="n", yaxt="n", col=matcols) clones.info(aCGH.obj) -> clones nrow (clones) -> clone.tot abline(v=0,lty=1) abline(v=1,lty=1) abline(h=0,lty=1) abline(h=1,lty=1) cum.clone <- 0 for (i in 1:21) { clones[clones$Chrom==i,] -> clones.chrom clones[clones$Chrom==i-1,] -> clones.chrom.prev cum.clone.prev <- cum.clone cum.clone.prev/clone.tot -> chrom.line.prev cum.clone <- cum.clone + nrow(clones.chrom) cum.clone/clone.tot -> chrom.line abline(v=chrom.line,lty=1) abline(h=chrom.line,lty=1) chrom.line.prev + ((chrom.line - chrom.line.prev)/2) -> lab.pos mtext(side=1,at=lab.pos,i,cex=0.75,col="purple4",font=2,line=0.25,las=2) mtext(side=2,at=lab.pos,i,cex=0.75,col="purple4",font=2,line=0.35) } i=23 clones[clones$Chrom==i,] -> clones.chrom clones[clones$Chrom==i-1,] -> clones.chrom.prev cum.clone.prev <- cum.clone cum.clone.prev/clone.tot -> chrom.line.prev cum.clone <- cum.clone + nrow(clones.chrom) cum.clone/clone.tot -> chrom.line abline(v=chrom.line,lty=1) abline(h=chrom.line,lty=1) chrom.line.prev + ((chrom.line - chrom.line.prev)/2) -> lab.pos mtext(side=1,at=lab.pos,"X",cex=0.75,col="purple4",font=2,line=0.1) mtext(side=2,at=lab.pos,"X",cex=0.75,col="purple4",font=2,line=0.35) cbind (clones,cor.tab) -> cor.out return (cor.out) } ## ## ## similarly, ignoring X and Y for 4.6K ## ## hard-coded to Rachael WT data - need to re-write!!!! ## ## matrix.noXY.4.6K.Rcgh <- function ( aCGH.obj, data=thres.ratios(aCGH.obj), method="pearson" ) { maPalette(low="darkblue",mid="white",high="darkred",k=8) -> matcols data <- data[1:3694,] mat.data <- t(as.matrix(data)) cor (mat.data, method = method) -> cor.tab image (cor.tab, xaxt="n", yaxt="n", col=matcols) clones.info(aCGH.obj)[1:3694,] -> clones clones[1:3694,] ->clones nrow (clones) -> clone.tot abline(v=0,lty=1) abline(v=1,lty=1) abline(h=0,lty=1) abline(h=1,lty=1) cum.clone <- 0 for (i in 1:21) { clones[clones$Chrom==i,] -> clones.chrom clones[clones$Chrom==i-1,] -> clones.chrom.prev cum.clone.prev <- cum.clone cum.clone.prev/clone.tot -> chrom.line.prev cum.clone <- cum.clone + nrow(clones.chrom) cum.clone/clone.tot -> chrom.line abline(v=chrom.line,lty=1) abline(h=chrom.line,lty=1) chrom.line.prev + ((chrom.line - chrom.line.prev)/2) -> lab.pos mtext(side=1,at=lab.pos,i,cex=0.75,col="purple4",font=2,line=0.1) mtext(side=2,at=lab.pos,i,cex=0.75,col="purple4",font=2,line=0.35) } colnames(cor.tab) <- clones$Clone cbind (clones,cor.tab) -> cor.out return (cor.out) } ############################################################################################### ## ## plotGenomeCol.Rcgh ## ## ## plots data from each experiment along ordered length of genome ## ## plotGenomeCol.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), naut = 22, Y = TRUE, X = TRUE, data = log2.ratios(aCGH.obj), coldata = aws.ratios(aCGH.obj), colthres = 0.15, chrominfo = hg_18, lw= -2, hi=2,plotthres=0.263, samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { yScale = c(lw, hi) datainfo <- clones.info(aCGH.obj) ##total number of chromosomes to plot: nchr <- naut if (X) nchr <- nchr+1 if (Y) nchr <- nchr+1 nsamples <- length(samplenames) ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] coldata <- coldata[ord,] ##screening out unmapped clones ind.unmap <- which(is.na(chrom) | is.na(kb) | (chrom > (naut+2))) if (length(ind.unmap) > 0) { chrom <- chrom[-ind.unmap] kb <- kb[-ind.unmap] data <- data[-ind.unmap,] coldata <- coldata[-ind.unmap,] } ##removing chromosome not to plot: data <- data[chrom <= nchr,] coldata <- coldata[chrom <= nchr,] kb <- kb[chrom <= nchr] chrom <- chrom[chrom <= nchr] chrominfo <- chrominfo[1:nchr,] chrom.start <- c(0, cumsum(chrominfo$length))[1:nchr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) par(cex = .6, pch = 16, lab = c(1, 6, 7), cex.axis = 1.2, xaxt="n", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] colvec <- coldata[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##Now, determine vertical scale for each chromosome: y.min <- rep(yScale[1], nrow(chrominfo)) y.max <- rep(yScale[2], nrow(chrominfo)) for (i in 1:nrow(chrominfo)) { if (minna(vec[(chrom==i)]) < y.min[i]) y.min[i] <- minna(vec[(chrom==i)]) if (maxna(vec[(chrom==i)]) > y.max[i]) y.max[i] <- maxna(vec[(chrom==i)]) } ##set genome scale to the min and mx values of the rest of the chromosomes: ygenome.min <- minna(y.min) ygenome.max <- maxna(y.max) ## no change blue vec -> vec1 plot(clone.genomepos / 1000, vec1, ylim = yScale, xlab = "", ylab = "", xlim = c(min(clone.genomepos[clone.genomepos > 0], na.rm = TRUE) / 1000, clone.genomepos[sum(clone.genomepos>0)] / 1000), col="darkblue") ## gain green which (colvec > colthres) -> v2 vec[v2] -> vec2 clone.genomepos[v2] -> clone.genomepos2 points(clone.genomepos2 / 1000, vec2, col="green") ## loss red which (colvec < -colthres) -> v3 vec[v3] -> vec3 clone.genomepos[v3] -> clone.genomepos3 points(clone.genomepos3 / 1000, vec3, col="red") title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) for (i in seq(1,naut,b=2)) mtext(paste("", i), side = 1, at = (chrom.mid[i]/1000), line=1, col="purple4",cex=0.75,font=2) for (i in seq(2,naut,b=2)) mtext(paste("", i), side = 1, at = chrom.mid[i] / 1000, line=1, col="purple4",cex=0.75,font=2) if (X) mtext("X", side = 1, at = chrom.mid[naut + 1] / 1000, line=1, col="purple4",cex=0.75,font=2) if (Y) mtext("Y", side = 1, at = chrom.mid[naut + 2] / 1000, line=1, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) ##abline(h=seq(ygenome.min,ygenome.max, b=.2), lty=3) abline(h = seq(lw,hi, b=.5), lty = 3,col="gray25") abline(h=0,lty=1,col="black") #abline(h=-(plotthresout[[k]]),lty=5,col="red",lwd=1.5) #abline(h=plotthresout[[k]],lty=5,col="green",lwd=1.5) abline(v = (chrominfo$centromere + chrom.start) / 1000, lty = 3, col = "purple4") } } ############################################################################################ ## ## plotChromCol.Rcgh ## ## ## plot of single chromosome data from experiment ## ## plotChromCol.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), chr, data = log2.ratios(aCGH.obj), plotthres=0.263,lines=F,coldata = aws.ratios(aCGH.obj), colthres = 0.15, chrominfo = hg_18, lw= -2, hi= 2,points.col="darkblue", samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { yScale=c(lw,hi) datainfo <- clones.info(aCGH.obj) nsamples <- length(samplenames) ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] coldata <- coldata[ord,] ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##removing chromosome not to plot: data <- data[chrom==chr,] coldata <- coldata[chrom==chr,] kb <- kb[chrom==chr] chrom <- chrom[chrom==chr] chrominfo <- chrominfo[chr,] chrom.start <- c(0, cumsum(chrominfo$length))[chr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) chrom.length<-chrominfo$length par(cex = .6, pch = 16, lab = c(10, 6, 7), cex.axis = 1.2,xaxt="s", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] colvec <- coldata[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) #i<-chr clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##lines if (lines) { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "",type="b", xlim=c(0,chrom.length/1000), col=points.col) } else { ## no change blue vec -> vec1 plot(kb / 1000, vec1, ylim = yScale, xlab = "", ylab = "", xlim=c(0,chrom.length/1000), col="darkblue") ## gain green which (colvec > colthres) -> v2 vec[v2] -> vec2 kb[v2] -> kb2 points(kb2 / 1000, vec2, col="green") ## loss red which (colvec < - colthres) -> v3 vec[v3] -> vec3 kb[v3] -> kb3 points(kb3 / 1000, vec3, col="red") } title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) mtext(paste("Chromosome", chr, " (Mb)"), side = 1, at = (chrom.mid[i]/1000),line=3, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) abline(h = seq(lw,hi, b=.5), lty = 3,col="gray25") abline(h=0,lty=1,col="black") ##abline(h=-(plotthresout[[k]]),lty=5,lwd=1.5,col="red") ##abline(h=plotthresout[[k]],lty=5,lwd=1.5,col="green") abline(v = (chrominfo$centromere) / 1000, lty = 3, col = "purple4") } } ########################################################################################## ## ## cbs.Rcgh ## ## ## from DNAcopy ## ## no NAs allowed at this point ## require(DNAcopy) cbs.Rcgh <- function (aCGH.obj,data=log2.ratios(aCGH.obj),datadir,h=10000) { #data<-log2.ratios(aCGH.obj) #chr<-clones.info(aCGH.obj)$Chrom #chrout<-c(NULL) z <- NULL for (i in 1:ncol(data)) { #create CNA obj CNA(cbind(data[,i]), clones.info(aCGH.obj)$Chrom, clones.info(aCGH.obj)$kb, data.type="logratio",sampleid=sample.names(Mcgh)[i]) -> CNA.obj #smooth smooth.CNA(CNA.obj) -> CNA.smo #run CBS segment(CNA.smo,verbose=2,nperm=h) -> CNA.seg #write write.table(CNA.seg$output,file=paste(datadir,sample.names(Mcgh)[i],".cbs.segment.txt",sep=""),sep="\t",col.names=NA) y <- NULL for (i in 1:nrow(CNA.seg$output)) { rep(CNA.seg$output$seg.mean[i],times=CNA.seg$output$num.mark[i])->x c(y,x)->y } cbind(z,y) -> z } return(z) } cbs.ratios <- function(aCGH.obj) aCGH.obj$cbs.ratios "cbs.ratios<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$cbs.ratios)) { cbs.ratios.ok <- all( sapply(1:length(aCGH.obj$cbs.ratios), function(i) all(dim(aCGH.obj$cbs.ratios[[i]]) == dim(value[[i]])) ) ) if (cbs.ratios.ok) stop("invalid replacement dimensions") } aCGH.obj$cbs.ratios<- value aCGH.obj } ########################################################################################## ## ## cbs.smooth.Rcgh ## ## ## from DNAcopy ## ## no NAs allowed at this point ## require(DNAcopy) cbs.smooth.Rcgh <- function (aCGH.obj,data=log2.ratios(aCGH.obj)) { #data<-log2.ratios(aCGH.obj) #chr<-clones.info(aCGH.obj)$Chrom chrout<-c(NULL) z <- NULL for (i in 1:ncol(data)) ##or whatever { #create CNA obj CNA(cbind(data[,i]), clones.info(aCGH.obj)$Chrom, clones.info(aCGH.obj)$kb, data.type="logratio") -> CNA.obj qq<-NULL #smooth smooth.CNA(CNA.obj) -> CNA.smo CNA.smo[,3] -> qq cbind(z,qq)->z #run CBS #segment(CNA.smo) -> CNA.seg #y <- NULL #for (i in 1:nrow(CNA.smo)) # { #rep(CNA.seg$output$seg.mean[i],times=CNA.seg$output$num.mark[i])->x #c(y,x)->y #} #cbind(z,y) -> z } return(z) } cbs.smoothed.ratios <- function(aCGH.obj) aCGH.obj$cbs.smoothed.ratios "cbs.smoothed.ratios<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$cbs.smoothed.ratios)) { cbs.smoothed.ratios.ok <- all( sapply(1:length(aCGH.obj$cbs.smoothed.ratios), function(i) all(dim(aCGH.obj$cbs.smoothed.ratios[[i]]) == dim(value[[i]])) ) ) if (cbs.smoothed.ratios.ok) stop("invalid replacement dimensions") } aCGH.obj$cbs.smoothed.ratios<- value aCGH.obj } ############################################################################################ ## ## idChrom.Rcgh ## ## ## plot of single chromosome data from experiment ## ## attempt to click and label points idChrom.Rcgh <- function(aCGH.obj, samples = 1:num.samples(aCGH.obj), chr, data = log2.ratios(aCGH.obj), id=Gene_plus,plotthres=0.263,lines=F, chrominfo = hg_18, lw= -2, hi= 2,points.col="darkblue", samplenames = sample.names(aCGH.obj), ylb = "Log2Ratio") { yScale=c(lw,hi) datainfo <- clones.info(aCGH.obj) nsamples <- length(samplenames) ##reordering according to genomic position ord <- order(datainfo$Chrom, datainfo$kb) chrom <- datainfo$Chrom[ord] kb <- datainfo$kb[ord] data <- data[ord,] genes<-datainfo$Genes[ord] ## sorting out plot thresholds if (length(plotthres)==1) plotthresout<-rep(plotthres,times=length(samples)) else plotthres->plotthresout ##removing chromosome not to plot: data <- data[chrom==chr,] kb <- kb[chrom==chr] genes <- genes[chrom==chr] chrom <- chrom[chrom==chr] chrominfo <- chrominfo[chr,] chrom.start <- c(0, cumsum(chrominfo$length))[chr] chrom.centr <- chrom.start + chrominfo$centr chrom.mid <- chrom.start + chrominfo$length / 2 chrom.rat <- chrominfo$length / max(chrominfo$length) chrom.length<-chrominfo$length par(cex = .6, pch = 16, lab = c(10, 6, 7), cex.axis = 1.2,xaxt="s", xaxs = "i") for (k in (1:length(samples))) { vec <- data[ ,samples[k] ] name <- samplenames[samples[k]] clone.genomepos <- rep(0, length(kb)) for (i in 1:nrow(chrominfo)) #i<-chr clone.genomepos[chrom == i] <- kb[chrom == i] + chrom.start[i] ##lines if (lines) { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "",type="b", xlim=c(0,chrom.length/1000), col=points.col) } else { plot(kb / 1000, vec, ylim = yScale, xlab = "", ylab = "", xlim=c(0,chrom.length/1000), col=points.col) } title(main = paste(samples[k], " ", name), ylab = ylb, xlab = "", cex.lab = 1.25, cex.main = 1.25) mtext(paste("Chromosome", chr, " (Mb)"), side = 1, at = (chrom.mid[i]/1000),line=3, col="purple4",cex=0.75,font=2) abline(v = c(chrom.start / 1000, (chrom.start[nrow(chrominfo)] + chrominfo$length[nrow(chrominfo)]) / 1000), lty = 1) abline(h = seq(lw,hi, b=.5), lty = 3,col="gray25") abline(h=0,lty=1,col="black") abline(h=-(plotthresout[[k]]),lty=5,lwd=1.5,col="red") abline(h=plotthresout[[k]],lty=5,lwd=1.5,col="green") abline(v = (chrominfo$centromere) / 1000, lty = 3, col = "purple4") identify (kb / 1000, vec, labels=genes, col="purple4") } } #################################### ## ## ## merge.Rcgh ## ## ## merge.Rcgh <- function (aCGH.obj, pred.values=cbs.ratios(aCGH.obj), obs.values=log2.ratios(aCGH.obj)) { merge.out <- c(NULL) for (i in 1:ncol(obs.values)) { mergeLevels(vecObs=obs.values[,i], vecPred=pred.values[,i], verbose=1) -> merge.tmp cbind(merge.out,merge.tmp$vecMerged) -> merge.out } return (merge.out) } cbs.merged.ratios <- function(aCGH.obj) aCGH.obj$cbs.merged.ratios "cbs.merged.ratios<-" <- function(aCGH.obj, value) { if (!is.aCGH(aCGH.obj)) stop("object is not of class aCGH") if (!is.null(aCGH.obj$cbs.merged.ratios)) { cbs.merged.ratios.ok <- all( sapply(1:length(aCGH.obj$cbs.merged.ratios), function(i) all(dim(aCGH.obj$cbs.merged.ratios[[i]]) == dim(value[[i]])) ) ) if (cbs.merged.ratios.ok) stop("invalid replacement dimensions") } aCGH.obj$cbs.merged.ratios<- value aCGH.obj }