library(fabia) library(Matrix) source("/seppdata/sepp/linkage/release/scripts/funct.R") #source("/home/k3307/k337870/Rscripts/funct.R") ######################################## minruns <- 42 maxruns <- 42 output_index <- "A42" ######################################## continue <- FALSE #For extraction shunkFact <- 1.5 mismatches <- 0 dofabia <- FALSE #dobeagle <- TRUE doplink <- TRUE #dogermline <- TRUE #dodash <- TRUE dobeagle <- FALSE #doplink <- FALSE dogermline <- FALSE dodash <- FALSE dorelate <- FALSE domcmc <- FALSE pars <- read.table("dataSimParameters.txt") maxrunsS <- pars[1,2] snps <- pars[2,2] samplesN <- pars[3,2] ShunkLength <- pars[4,2] noImplanted <- pars[5,2] ## how many IBDs implanted <- pars[6,2] ## in how many samples lengthI <- pars[7,2] haploN <- 2*samplesN dataA <- 1:samplesN g2 <- 2*(1:snps) g1 <- g2 -1 s2 <- 2*dataA s1 <- s2-1 indi <- cbind(as.character(1:haploN),as.character(1:haploN),as.character(1:haploN),as.character(1:haploN)) ### FABIA Parameters: ps, inteA, thresA, minSNPs I <- 200000 SS <- 10 # biclusters per iteration A <- 1 # number iterations IBDlength <- 5000 # IBD length in kbp thresCount <- 1e-5 # p-value of random histogram hit, default 1e-5 minSNPsFactor <- 3/4 # percentage of IBD overlap; 1/2 for large to 3/4 for for small intervals ps <- 0.9 # percentage of Ls to remove / quantile psZ <- 0.8 # percentage of Zs to remove / quantile inteA <- IBDlength*10 # histogram length gives inteA/10 kbp DNA length pMAF <- 0.035 # max. minor allele frequency # estimated for 1000 genomes: 0.035 # max MAF is 0.05 ... kk <- 1 while ((I/inteA)*choose(samplesN,2)*(1-pbinom(kk,inteA,pMAF*pMAF))>thresCount) { kk <- kk +1} thresA <- kk minSNPs <- round(minSNPsFactor*thresA) ### END FABIA Parameters: ps, inteA, thresA, minSNPs ######################################################## ######################################################## ################# for simulation set explicitely thresA <- 122 minSNPs <- 92 ######################################################## ######################################################## ######################################################## if (!continue) { write(paste("SS: ",SS,sep=""),file=paste("fabiapars.txt",sep=""),ncolumns=10) write(paste("A: ",A,sep=""),file=paste("fabiapars.txt",sep=""),append=TRUE,ncolumns=10) write(paste("ps: ",ps,sep=""),file=paste("fabiapars.txt",sep=""),append=TRUE,ncolumns=10) write(paste("inteA: ",inteA,sep=""),file=paste("fabiapars.txt",sep=""),append=TRUE,ncolumns=10) write(paste("thresA: ",thresA,sep=""),file=paste("fabiapars.txt",sep=""),append=TRUE,ncolumns=10) write(paste("minSNPs: ",minSNPs,sep=""),file=paste("fabiapars.txt",sep=""),append=TRUE,ncolumns=10) } #################################################### dataS <- c(maxruns,snps,samplesN,ShunkLength,noImplanted,implanted,lengthI) dim(dataS) <- c(1,length(dataS)) if (dofabia) { if (!continue) { write(dataS,file=paste("fabiares",output_index,".txt",sep=""),ncolumns=100) write(dataS,file=paste("fabiaresIBD",output_index,".txt",sep=""),ncolumns=100) } fabiares <- list() fabiaresIBD <- list() } if (dobeagle) { if (!continue) { write(dataS,file=paste("beagleres1",output_index,".txt",sep=""),ncolumns=100) } beagleres1 <- list() if (!continue) { write(dataS,file=paste("beagleres2",output_index,".txt",sep=""),ncolumns=100) } beagleres2 <- list() } if (doplink) { if (!continue) { write(dataS,file=paste("plinkres",output_index,".txt",sep=""),ncolumns=100) } plinkres <- list() } if (dogermline) { if (!continue) { write(dataS,file=paste("germlineres1",output_index,".txt",sep=""),ncolumns=100) write(dataS,file=paste("germlineres2",output_index,".txt",sep=""),ncolumns=100) } germlineres1 <- list() germlineres2 <- list() } if (dodash) { if (!continue) { write(dataS,file=paste("dashres",output_index,".txt",sep=""),ncolumns=100) write(dataS,file=paste("dashIBD",output_index,".txt",sep=""),ncolumns=100) } dashres <- list() dashresIBD <- list() } if (dorelate) { if (!continue) { write(dataS,file=paste("relateres",output_index,".txt",sep=""),ncolumns=100) } relateres <- list() } if (domcmc) { if (!continue) { write(dataS,file=paste("mcmcres",output_index,".txt",sep=""),ncolumns=100) } mcmcres <- list() } #################################################### # loop for (run in minruns:maxruns) { # impl,implG,inter,implantI,implantN,intervalImp load(file=paste("dataSim",run,"Impl.Rda",sep="")) load(file=paste("dataSim",run,"Anno.Rda",sep="")) allsaF <- haploN allsa <- samplesN allsn <- snps #### j=0 noImplanted <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 impl <- list() implG <- list() inter <- list() implantI <- list() implantN <- list() intervalImp <- list() implantedA <- list() NegsaFA <- list() NegsaA <- list() PossaA <- list() NegsnA <- list() PossnA <- list() NegsaF <- 0 Negsa <- 0 Possa <- 0 Negsn <- 0 Possn <- 0 for (noIm in 1:noImplanted) { impl[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="numeric",nlines=1,skip=j)) j <- j+1 implG[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 inter[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 implantI[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 implantN[[noIm]] <- scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="character",nlines=1,skip=j) j <- j+1 intervalImp[[noIm]] <- matrix(0,implanted,4) interImp <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 for (i in 1:implanted) { intervalImp[[noIm]][i,] <- interImp } implantedA[[noIm]] <- length(impl[[noIm]]) NegsaFA[[noIm]] <- haploN - implantedA[[noIm]] NegsaF <- NegsaF + NegsaFA[[noIm]] NegsaA[[noIm]] <- samplesN - implantedA[[noIm]] Negsa <- Negsa + NegsaA[[noIm]] PossaA[[noIm]] <- implantedA[[noIm]] Possa <- Possa + PossaA[[noIm]] NegsnA[[noIm]] <- snps - length(inter[[noIm]]) Negsn <- Negsn + NegsnA[[noIm]] PossnA[[noIm]] <- length(inter[[noIm]]) Possn <- Possn + PossnA[[noIm]] } implantTmax <- list() implantT <- Matrix(0,samplesN,snps,sparse=TRUE) for (noIm in 1:noImplanted) { implantTmax[[noIm]] <- matrix(0,samplesN,snps) for (i in 1:implanted) { implantT[implG[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 implantTmax[[noIm]][implG[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 } } implantHmax <- list() implantH <- Matrix(0,haploN,snps,sparse=TRUE) for (noIm in 1:noImplanted) { implantHmax[[noIm]] <- Matrix(0,haploN,snps,sparse=TRUE) for (i in 1:implanted) { implantH[impl[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 implantHmax[[noIm]][impl[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 } } implGA <- c() implA <- c() interA <- c() for (noIm in 1:noImplanted) { implGA <- union(implGA,implG[[noIm]]) implA <- union(implA,impl[[noIm]]) interA <- union(interA,inter[[noIm]]) } implantedSF <- length(implA) implantedS <- length(implGA) #################################################### #FABIA ###### if (dofabia) { leeb <- 0 nob <- 0 while ((leeb<1)&&(nob<2)) { # source("/ssd1/bioinf/sepp/run_fabia.R") #source("/BIGtmp/scratch/k3307/k337870/SFSsimulation/run_fabia.R") source("/seppdata/sepp/linkage/SFSsimulation/run_fabia.R") leeb <- length(mergedIBD) nob <- nob + 1 } maxtrueP <- list() maxfalseP <- list() maxfalseN <- list() maxtrueN <- list() maxsens <- list() maxspec <- list() maxfdr <- list() maxfpr <- list() maxacc <- list() maxF1 <- list() maxmcc <- list() maxtruePsa <- list() maxfalsePsa <- list() maxfalseNsa <- list() maxtrueNsa <- list() maxsenssa <- list() maxspecsa <- list() maxfdrsa <- list() maxfprsa <- list() maxaccsa <- list() maxF1sa <- list() maxmccsa <- list() maxtruePsn <- list() maxfalsePsn <- list() maxfalseNsn <- list() maxtrueNsn <- list() maxsenssn <- list() maxspecsn <- list() maxfdrsn <- list() maxfprsn <- list() maxaccsn <- list() maxF1sn <- list() maxmccsn <- list() for (noIm in 1:noImplanted) { maxtrueP[[noIm]] <- 0 maxfalseP[[noIm]] <- 0 maxfalseN[[noIm]] <- 0 maxtrueN[[noIm]] <- 0 maxsens[[noIm]] <- 0 maxspec[[noIm]] <- 0 maxfdr[[noIm]] <- 0 maxfpr[[noIm]] <- 0 maxacc[[noIm]] <- 0 maxF1[[noIm]] <- 0 maxmcc[[noIm]] <- 0 maxtruePsa[[noIm]] <- 0 maxfalsePsa[[noIm]] <- 0 maxfalseNsa[[noIm]] <- 0 maxtrueNsa[[noIm]] <- 0 maxsenssa[[noIm]] <- 0 maxspecsa[[noIm]] <- 0 maxfdrsa[[noIm]] <- 0 maxfprsa[[noIm]] <- 0 maxaccsa[[noIm]] <- 0 maxF1sa[[noIm]] <- 0 maxmccsa[[noIm]] <- 0 maxtruePsn[[noIm]] <- 0 maxfalsePsn[[noIm]] <- 0 maxfalseNsn[[noIm]] <- 0 maxtrueNsn[[noIm]] <- 0 maxsenssn[[noIm]] <- 0 maxspecsn[[noIm]] <- 0 maxfdrsn[[noIm]] <- 0 maxfprsn[[noIm]] <- 0 maxaccsn[[noIm]] <- 0 maxF1sn[[noIm]] <- 0 maxmccsn[[noIm]] <- 0 } allSamp <- c() allSnps <- c() foundAll <- matrix(0,haploN,snps) if (leeb>0) { for (i in 1:leeb) { samplesF <- mergedIBD[[i]]$noSamples snpsF <- mergedIBD[[i]]$noSnps noSnpsF <- mergedIBD[[i]]$numberSnps noSamplesF <- mergedIBD[[i]]$numberSamples range <- snpsF[1]:snpsF[noSnpsF] allSamp <- union(allSamp,samplesF) allSnps <- union(allSnps,range) if (snpsF[1]==0) { snpsF[1] <- 1 } lengthIBD <- pos[snpsF[noSnpsF]] - pos[snpsF[1]] if (lengthIBD>shunkFact*ShunkLength) { for (noIm in 1:noImplanted) { foundAllmax <- matrix(0,haploN,snps) startS <- rep((snps+1),noSamplesF) endS <- rep(0,noSamplesF) for (j in 1:noSnpsF) { ssa <- sPF$sL[[snpsF[j]]] ssa1 <- intersect(samplesF,ssa) ssa2 <- match(ssa1,samplesF) if (length(ssa2)>1) { for (k in 1:length(ssa1)) { if (startS[ssa2[k]]>snpsF[j]) { startS[ssa2[k]] <- snpsF[j] } if (endS[ssa2[k]]0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } if (mcc>maxmcc[[noIm]]) { maxtrueP[[noIm]] <- trueP maxfalseP[[noIm]] <- falseP maxfalseN[[noIm]] <- falseN maxtrueN[[noIm]] <- trueN maxsens[[noIm]] <- sens maxspec[[noIm]] <- spec maxfdr[[noIm]] <- fdr maxfpr[[noIm]] <- fpr maxacc[[noIm]] <- acc maxF1[[noIm]] <- F1 maxmcc[[noIm]] <- mcc } ######## sampI <- intersect(impl[[noIm]],samplesF) minorI <- intersect(inter[[noIm]],snpsF) snpsI <- intersect(inter[[noIm]],range) ## samples detectedsa <- noSamplesF truePsa <- length(sampI) falsePsa <- detectedsa-truePsa falseNsa <- implantedA[[noIm]]-truePsa trueNsa <- allsaF - truePsa - falseNsa - falsePsa senssa <- truePsa/PossaA[[noIm]] specsa <- trueNsa/NegsaFA[[noIm]] if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/NegsaFA[[noIm]] accsa <- (truePsa+trueNsa)/allsaF st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } if (mccsa>maxmccsa[[noIm]]) { maxtruePsa[[noIm]] <- truePsa maxfalsePsa[[noIm]] <- falsePsa maxfalseNsa[[noIm]] <- falseNsa maxtrueNsa[[noIm]] <- trueNsa maxsenssa[[noIm]] <- senssa maxspecsa[[noIm]] <- specsa maxfdrsa[[noIm]] <- fdrsa maxfprsa[[noIm]] <- fprsa maxaccsa[[noIm]] <- accsa maxF1sa[[noIm]] <- F1sa maxmccsa[[noIm]] <- mccsa } ## snps detectedsn <- length(range) truePsn <- length(snpsI) falsePsn <- detectedsn-truePsn falseNsn <- length(inter[[noIm]]) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/PossnA[[noIm]] specsn <- trueNsn/NegsnA[[noIm]] if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/NegsnA[[noIm]] accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } if (mccsn>maxmccsn[[noIm]]) { maxtruePsn[[noIm]] <- truePsn maxfalsePsn [[noIm]]<- falsePsn maxfalseNsn[[noIm]] <- falseNsn maxtrueNsn[[noIm]] <- trueNsn maxsenssn[[noIm]] <- senssn maxspecsn[[noIm]] <- specsn maxfdrsn[[noIm]] <- fdrsn maxfprsn[[noIm]] <- fprsn maxaccsn[[noIm]] <- accsn maxF1sn[[noIm]] <- F1sn maxmccsn[[noIm]] <- mccsn } } ## all startS <- rep((snps+1),noSamplesF) endS <- rep(0,noSamplesF) for (j in 1:noSnpsF) { ssa <- sPF$sL[[snpsF[j]]] ssa1 <- intersect(samplesF,ssa) ssa2 <- match(ssa1,samplesF) if (length(ssa2)>1) { for (k in 1:length(ssa1)) { if (startS[ssa2[k]]>snpsF[j]) { startS[ssa2[k]] <- snpsF[j] } if (endS[ssa2[k]]0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa-truePsa falseNsa <- implantedSF-truePsa trueNsa <- allsaF - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/NegsaF if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/NegsaF accsa <- (truePsa+trueNsa)/allsaF st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn-truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } fabiaresIBD[[run]] <- matrix(0,noImplanted,35) for (noIm in 1:noImplanted) { dataRmax <- c(run,noIm,maxtruePsa[[noIm]],maxfalseNsa[[noIm]],maxfalsePsa[[noIm]],maxtrueNsa[[noIm]],maxsenssa[[noIm]],maxspecsa[[noIm]],maxfdrsa[[noIm]],maxfprsa[[noIm]],maxaccsa[[noIm]],maxF1sa[[noIm]],maxmccsa[[noIm]], maxtruePsn[[noIm]],maxfalseNsn[[noIm]],maxfalsePsn[[noIm]],maxtrueNsn[[noIm]],maxsenssn[[noIm]],maxspecsn[[noIm]],maxfdrsn[[noIm]],maxfprsn[[noIm]],maxaccsn[[noIm]],maxF1sn[[noIm]],maxmccsn[[noIm]], maxtrueP[[noIm]],maxfalseN[[noIm]],maxfalseP[[noIm]],maxtrueN[[noIm]],maxsens[[noIm]],maxspec[[noIm]],maxfdr[[noIm]],maxfpr[[noIm]],maxacc[[noIm]],maxF1[[noIm]],maxmcc[[noIm]]) write(dataRmax,file=paste("fabiaresIBD",output_index,".txt",sep=""),append=TRUE,ncolumns=100) fabiaresIBD[[run]][noIm,] <- dataRmax } dataR <- c(run,nob,leeb, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) fabiares[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("fabiares",output_index,".txt",sep=""),append=TRUE,ncolumns=100) } ################################################################# #BEAGLE ####### if (dobeagle) { beagle_com <- paste("java -jar /system/user/hochreit/bin/beagle.jar unphased=dataSim",run,"beagle.txt fastibd=true missing=N out=beagleres",output_index,".txt",sep="") print("Start BEAGLE") print(date()) system(beagle_com) print("End BEAGLE") print(date()) beagle_com <- paste("rm beagleres",output_index,".txt.dataSim",run,"beagle.txt.fibd",sep="") system(beagle_com) beagle_com <- paste("gunzip beagleres",output_index,".txt.dataSim",run,"beagle.txt.fibd.gz",sep="") system(beagle_com) resBeaglet <- read.table(file=paste("beagleres",output_index,".txt.dataSim",run,"beagle.txt.fibd",sep="")) ############################## #####RES1 with 1.0e-50 thres1 <- 1.0e-50 nnrr <- 0 while ((nnrr<1)&&(thres1<1.0e-4)) { resBeagle <- resBeaglet[which(resBeaglet[,5]0) { for (i in 1:nrow(resBeagle)) { samp1 <- resBeagle[i,1] samp2 <- resBeagle[i,2] begin <- resBeagle[i,3] end <- resBeagle[i,4] if (begin==0) { begin <- 1 } lengthIBD <- pos[end] - pos[begin] if ((lengthIBD>shunkFact*ShunkLength)) { allSamp <- union(allSamp,samp1) allSamp <- union(allSamp,samp2) range <- begin:end allSnps <- union(allSnps,range) foundAll[samp1,range] <- 1 foundAll[samp2,range] <- 1 } } } all <- samplesN*snps detected <- sum(foundAll) Pos <- sum(implantT) truePM <- foundAll*implantT trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implGA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa-truePsa falseNsa <- implantedS-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/Negsa if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/Negsa accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn - truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } dataR <- c(run, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) beagleres1[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("beagleres1",output_index,".txt",sep=""),append=TRUE,ncolumns=100) ################################# #####RES2 with 1.0e-80 thres1 <- 1.0e-80 nnrr <- 0 while ((nnrr<1)&&(thres1<1.0e-4)) { resBeagle <- resBeaglet[which(resBeaglet[,5]0) { for (i in 1:nrow(resBeagle)) { samp1 <- resBeagle[i,1] samp2 <- resBeagle[i,2] begin <- resBeagle[i,3] end <- resBeagle[i,4] if (begin==0) { begin <- 1 } lengthIBD <- pos[end] - pos[begin] if (lengthIBD>shunkFact*ShunkLength) { allSamp <- union(allSamp,samp1) allSamp <- union(allSamp,samp2) range <- begin:end allSnps <- union(allSnps,range) foundAll[samp1,range] <- 1 foundAll[samp2,range] <- 1 } } } all <- samplesN*snps detected <- sum(foundAll) Pos <- sum(implantT) truePM <- foundAll*implantT trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implGA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa -truePsa falseNsa <- implantedS-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/Negsa if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/Negsa accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn -truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } dataR <- c(run, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) beagleres2[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("beagleres2",output_index,".txt",sep=""),append=TRUE,ncolumns=100) } ################################################ #PLINK ###### if (doplink) { print("Start PLINK") print(date()) plink_com <- paste("plink --file dataSim",run,"plink --indep 50 5 2 --out dataSim",run,"plink",sep="") system(plink_com) # get new snp-set and convert into binary fileset plink_com <- paste("plink --file dataSim",run,"plink"," --extract dataSim",run,"plink.prune.in --make-bed --out dataSim",run,"plinkpruned",sep="") system(plink_com) # --genome with binary fileset plink_com <- paste("plink --bfile dataSim",run,"plinkpruned"," --noweb --genome --allow-no-sex --out plink",output_index,sep="") system(plink_com) # segmental sharing with binary fileset plink_com <- paste("plink --bfile dataSim",run,"plinkpruned"," --noweb --read-genome plink",output_index,".genome --segment --all-pairs --segment-length 600 --segment-snp 6000 --out plink",output_index,sep="") system(plink_com) print("End PLINK") print(date()) #plink --file dataSim30plink --indep 50 5 2 --out dataSim30plink #plink --file dataSim30plink --extract dataSim30plink.prune.in --make-bed --out dataSim30plinkpruned #plink --bfile dataSim30plinkpruned --noweb --genome --allow-no-sex --out plinkTT #plink --bfile dataSim30plinkpruned --noweb --read-genome plinkTT.genome --segment --all-pairs --segment-length 0.1 --segment-snp 20 --out plinkTT #plink_com <- paste("plink --file dataSim",run,"plink --noweb --genome --allow-no-sex --out plink",output_index,sep="") #system(plink_com) #plink_com <- paste("plink --file dataSim",run,"plink --noweb --read-genome plink",output_index,".genome --segment --all-pairs --segment-length 0.1 --segment-snp 20 --out plink",output_index,sep="") #system(plink_com) resPlink <- read.table(file= paste("plink",output_index,".segment",sep=""),header=TRUE) allSamp <- c() allSnps <- c() foundAll <- matrix(0,samplesN,snps) if (nrow(resPlink)>0) { for (i in 1:nrow(resPlink)) { samp1 <- resPlink[i,2] samp2 <- resPlink[i,4] begin <- resPlink[i,9] end <- resPlink[i,10] if (begin==0) { begin <- 1 } lengthIBD <- pos[end] - pos[begin] if (lengthIBD>shunkFact*ShunkLength) { allSamp <- union(allSamp,samp1) allSamp <- union(allSamp,samp2) range <- begin:end allSnps <- union(allSnps,range) foundAll[samp1,range] <- 1 foundAll[samp2,range] <- 1 } } } all <- samplesN*snps detected <- sum(foundAll) Pos <- sum(implantT) truePM <- foundAll*implantT trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implGA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa -truePsa falseNsa <- implantedS-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/Negsa if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/Negsa accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn -truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } dataR <- c(run, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) plinkres[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("plinkres",output_index,".txt",sep=""),append=TRUE,ncolumns=100) } ################################################ # GERMLINE ########## if (dogermline) { germline_com <- paste( "echo '1' > germin",output_index, " echo 'dataSim",run,"plink.map' >> germin",output_index, " echo 'dataSim",run,"plink.ped' >> germin",output_index, " echo 'germline_output",output_index,".txt' >> germin",output_index,sep="") system(germline_com) germline_com <- paste("germline -haploid -min_m 0.5 -err_hom ",mismatches," -err_het ",mismatches," -bits 1000 < germin",output_index,sep="") print("Start GERMLINE") print(date()) system(germline_com) print("End GERMLINE") print(date()) germline_com <- paste("rm germin",output_index,sep="") system(germline_com) resGermlinet <- read.table(file= paste("germline_output",output_index,".txt.match",sep="")) ########################## ## Filter 7000 resGermlinet1 <- resGermlinet[which(resGermlinet[,10]>7000),] resGermline <- resGermlinet1[which(resGermlinet1[,10]<20000),] #write.table(resGermline,file= paste("germline_filtered",output_index,".txt",sep="")) allSamp <- c() allSnps <- c() foundAll <- matrix(0,samplesN,snps) if (nrow(resGermline)>0) { for (i in 1:nrow(resGermline)) { samp1 <- resGermline[i,1] samp2 <- resGermline[i,3] begin <- resGermline[i,8] end <- resGermline[i,9] if (begin==0) { begin <- 1 } lengthIBD <- pos[end] - pos[begin] if (lengthIBD>shunkFact*ShunkLength) { allSamp <- union(allSamp,samp1) allSamp <- union(allSamp,samp2) range <- begin:end allSnps <- union(allSnps,range) foundAll[samp1,range] <- 1 foundAll[samp2,range] <- 1 } } } all <- samplesN*snps detected <- sum(foundAll) Pos <- sum(implantT) truePM <- foundAll*implantT trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implGA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa -truePsa falseNsa <- implantedS-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/Negsa if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/Negsa accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn -truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } dataR <- c(run, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) germlineres1[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("germlineres1",output_index,".txt",sep=""),append=TRUE,ncolumns=100) ########################## ## Filter 5000 resGermlinet1 <- resGermlinet[which(resGermlinet[,10]>5000),] resGermline <- resGermlinet1[which(resGermlinet1[,10]<20000),] #write.table(resGermline,file= paste("germline_filtered",output_index,".txt",sep="")) allSamp <- c() allSnps <- c() foundAll <- matrix(0,samplesN,snps) if (nrow(resGermline)>0) { for (i in 1:nrow(resGermline)) { samp1 <- resGermline[i,1] samp2 <- resGermline[i,3] begin <- resGermline[i,8] end <- resGermline[i,9] if (begin==0) { begin <- 1 } lengthIBD <- pos[end] - pos[begin] if (lengthIBD>shunkFact*ShunkLength) { allSamp <- union(allSamp,samp1) allSamp <- union(allSamp,samp2) range <- begin:end allSnps <- union(allSnps,range) foundAll[samp1,range] <- 1 foundAll[samp2,range] <- 1 } } } all <- samplesN*snps detected <- sum(foundAll) Pos <- sum(implantT) truePM <- foundAll*implantT trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implGA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa -truePsa falseNsa <- implantedS-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/Negsa if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/Negsa accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn -truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } dataR <- c(run, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) germlineres2[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("germlineres2",output_index,".txt",sep=""),append=TRUE,ncolumns=100) } ################################################ # DASH ###### if (dodash) { resGermlinet <- read.table(file=paste("germline_output",output_index,".txt.match",sep="")) resGermline <- resGermlinet[which(resGermlinet[,10]>150),] outG <- cbind(paste(resGermline[,1],format(as.double(resGermline[,2]),nsmall=1),sep=" "), paste(resGermline[,3],format(as.double(resGermline[,4]),nsmall=1),sep=" "), paste(resGermline[,5],sep=""), paste(resGermline[,6],resGermline[,7],sep=" "), paste(resGermline[,8],resGermline[,9],sep=" "), paste(resGermline[,10],sep=""), paste(resGermline[,11],sep=""), paste(resGermline[,12],sep=""), paste(resGermline[,13],sep=""), paste(resGermline[,14],sep="")) write.table(outG,file=paste("germline_filtered",output_index,".txt",sep=""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep="\t") dash_com <- paste("cut -f 1,2,4 germline_filtered",output_index,".txt | dash_cc dataSim",run,"plink.fam dash_output",output_index," -min 2",sep="") print("Start DASH") print(date()) system(dash_com) print("End DASH") print(date()) h2 <- 2*(1:haploN) h1 <- h2-1 Abb <- rep(0,(2*haploN+5)) Abb[h1+5] <- c(dataA,dataA) Abb[h2+5] <- c(format(as.double(dataA),nsmall=1),format((as.double(dataA)+0.1),nsmall=1)) Abb[1] <- "ccs" write(Abb,file= paste("tmp",output_index,".txt",sep=""),ncolumns=100000) dash_com <- paste("cat tmp",output_index,".txt dash_output",output_index,".clst > dash_in",output_index,".txt",sep="") system(dash_com) resDash <- read.table(file= paste("dash_in",output_index,".txt",sep=""),fill=TRUE) resDash <- resDash[-1,] posDasht <- read.table(file=paste("dataSim",run,"plink.map",sep="")) posDash <- posDasht[,4] nD <- ncol(resDash) noClusters <- nrow(resDash) maxtrueP <- list() maxfalseP <- list() maxfalseN <- list() maxtrueN <- list() maxsens <- list() maxspec <- list() maxfdr <- list() maxfpr <- list() maxacc <- list() maxF1 <- list() maxmcc <- list() maxtruePsa <- list() maxfalsePsa <- list() maxfalseNsa <- list() maxtrueNsa <- list() maxsenssa <- list() maxspecsa <- list() maxfdrsa <- list() maxfprsa <- list() maxaccsa <- list() maxF1sa <- list() maxmccsa <- list() maxtruePsn <- list() maxfalsePsn <- list() maxfalseNsn <- list() maxtrueNsn <- list() maxsenssn <- list() maxspecsn <- list() maxfdrsn <- list() maxfprsn <- list() maxaccsn <- list() maxF1sn <- list() maxmccsn <- list() for (noIm in 1:noImplanted) { maxtrueP[[noIm]] <- 0 maxfalseP[[noIm]] <- 0 maxfalseN[[noIm]] <- 0 maxtrueN[[noIm]] <- 0 maxsens[[noIm]] <- 0 maxspec[[noIm]] <- 0 maxfdr[[noIm]] <- 0 maxfpr[[noIm]] <- 0 maxacc[[noIm]] <- 0 maxF1[[noIm]] <- 0 maxmcc[[noIm]] <- 0 maxtruePsa[[noIm]] <- 0 maxfalsePsa[[noIm]] <- 0 maxfalseNsa[[noIm]] <- 0 maxtrueNsa[[noIm]] <- 0 maxsenssa[[noIm]] <- 0 maxspecsa[[noIm]] <- 0 maxfdrsa[[noIm]] <- 0 maxfprsa[[noIm]] <- 0 maxaccsa[[noIm]] <- 0 maxF1sa[[noIm]] <- 0 maxmccsa[[noIm]] <- 0 maxtruePsn[[noIm]] <- 0 maxfalsePsn[[noIm]] <- 0 maxfalseNsn[[noIm]] <- 0 maxtrueNsn[[noIm]] <- 0 maxsenssn[[noIm]] <- 0 maxspecsn[[noIm]] <- 0 maxfdrsn[[noIm]] <- 0 maxfprsn[[noIm]] <- 0 maxaccsn[[noIm]] <- 0 maxF1sn[[noIm]] <- 0 maxmccsn[[noIm]] <- 0 } allSamp <- c() allSnps <- c() foundAll <- matrix(0,samplesN,snps) if (noClusters>0) { for (i in 1:noClusters) { noSamples <- sort(unique(as.integer(round(as.numeric(resDash[i,5+which(is.na(resDash[i,6:nD])==FALSE)]))))) start <- which(posDash==resDash[i,4]) end <- which(posDash==resDash[i,5]) lengthIBD <- pos[end] - pos[start] if (lengthIBD>shunkFact*ShunkLength) { range <- start:end foundAll[noSamples,range] <- 1 allSamp <- union(allSamp,noSamples) allSnps <- union(allSnps,range) numberSamples <- length(noSamples) numberSnps <- length(range) for (noIm in 1:noImplanted) { foundAllmax <- matrix(0,samplesN,snps) foundAllmax[noSamples,range] <- 1 all <- samplesN*snps detected <- sum(foundAllmax) Pos <- sum(implantTmax[[noIm]]) truePM <- foundAllmax*implantTmax[[noIm]] trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } if (mcc>maxmcc[[noIm]]) { maxtrueP[[noIm]] <- trueP maxfalseP[[noIm]] <- falseP maxfalseN[[noIm]] <- falseN maxtrueN[[noIm]] <- trueN maxsens[[noIm]] <- sens maxspec[[noIm]] <- spec maxfdr[[noIm]] <- fdr maxfpr[[noIm]] <- fpr maxacc[[noIm]] <- acc maxF1[[noIm]] <- F1 maxmcc[[noIm]] <- mcc } ######## sampI <- intersect(implG[[noIm]],noSamples) snpsI <- intersect(inter[[noIm]],range) ## samples detectedsa <- numberSamples truePsa <- length(sampI) falsePsa <- detectedsa -truePsa falseNsa <- implantedA[[noIm]]-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/PossaA[[noIm]] specsa <- trueNsa/NegsaA[[noIm]] if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/NegsaA[[noIm]] accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } if (mccsa>maxmccsa[[noIm]]) { maxtruePsa[[noIm]] <- truePsa maxfalsePsa[[noIm]] <- falsePsa maxfalseNsa[[noIm]] <- falseNsa maxtrueNsa[[noIm]] <- trueNsa maxsenssa[[noIm]] <- senssa maxspecsa[[noIm]] <- specsa maxfdrsa[[noIm]] <- fdrsa maxfprsa[[noIm]] <- fprsa maxaccsa[[noIm]] <- accsa maxF1sa[[noIm]] <- F1sa maxmccsa[[noIm]] <- mccsa } ## snps detectedsn <- numberSnps truePsn <- length(snpsI) falsePsn <- detectedsn -truePsn falseNsn <- length(inter[[noIm]]) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/PossnA[[noIm]] specsn <- trueNsn/NegsnA[[noIm]] if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/NegsnA[[noIm]] accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } if (mccsn>maxmccsn[[noIm]]) { maxtruePsn[[noIm]] <- truePsn maxfalsePsn[[noIm]] <- falsePsn maxfalseNsn[[noIm]] <- falseNsn maxtrueNsn[[noIm]] <- trueNsn maxsenssn[[noIm]] <- senssn maxspecsn[[noIm]] <- specsn maxfdrsn[[noIm]] <- fdrsn maxfprsn[[noIm]] <- fprsn maxaccsn[[noIm]] <- accsn maxF1sn[[noIm]] <- F1sn maxmccsn[[noIm]] <- mccsn } } } } } all <- samplesN*snps detected <- sum(foundAll) Pos <- sum(implantT) truePM <- foundAll*implantT trueP <- sum(truePM) falseP <- detected-trueP falseN <- Pos-trueP trueN <- all - trueP - falseN - falseP Neg <- trueN+falseP sens <- trueP/Pos spec <- trueN/Neg if (detected>0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implGA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa -truePsa falseNsa <- implantedS-truePsa trueNsa <- allsa - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/Negsa if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/Negsa accsa <- (truePsa+trueNsa)/allsa st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn -truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } dashresIBD[[run]] <- matrix(0,noImplanted,35) for (noIm in 1:noImplanted) { dataRmax <- c(run,noIm, maxtruePsa[[noIm]],maxfalseNsa[[noIm]],maxfalsePsa[[noIm]],maxtrueNsa[[noIm]],maxsenssa[[noIm]],maxspecsa[[noIm]],maxfdrsa[[noIm]],maxfprsa[[noIm]],maxaccsa[[noIm]],maxF1sa[[noIm]],maxmccsa[[noIm]], maxtruePsn[[noIm]],maxfalseNsn[[noIm]],maxfalsePsn[[noIm]],maxtrueNsn[[noIm]],maxsenssn[[noIm]],maxspecsn[[noIm]],maxfdrsn[[noIm]],maxfprsn[[noIm]],maxaccsn[[noIm]],maxF1sn[[noIm]],maxmccsn[[noIm]], maxtrueP[[noIm]],maxfalseN[[noIm]],maxfalseP[[noIm]],maxtrueN[[noIm]],maxsens[[noIm]],maxspec[[noIm]],maxfdr[[noIm]],maxfpr[[noIm]],maxacc[[noIm]],maxF1[[noIm]],maxmcc[[noIm]]) write(dataRmax,file=paste("dashresIBD",output_index,".txt",sep=""),append=TRUE,ncolumns=100) dashresIBD[[run]][noIm,] <- dataRmax } dataR <- c(run,noClusters, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) dashres[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("dashres",output_index,".txt",sep=""),append=TRUE,ncolumns=100) } ################################################ # MCMC ###### if (domcmc) { mcmc_com <- paste("MCMC_IBDfinder -g dataSim",run,"mcmc.genotype -p dataSim",run,"mcmc.posmaf -z dataSim",run,"mcmc.initz",sep="") system(mcmc_com) dataR <- c(run) mcmcres[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("mcmcres",output_index,".txt",sep=""),append=TRUE,ncolumns=100) } ################################################ # RELATE ######## if (dorelate) { relate_com <- paste( "echo '1 #1=allpairs 0=normal run' > relate_options",output_index, " echo '8 #pair[0]' >> relate_options",output_index, " echo '9 #pair[1]' >> relate_options",output_index, " echo '0 #double recombination' >> relate_options",output_index, " echo '0 #LD=0=rsq2 LD=1=D' >> relate_options",output_index, " echo '0.001 #min default: 0.025' >> relate_options",output_index, " echo '0.001 # alim[0]' >> relate_options",output_index, " echo '5.0 # alim[1]' >> relate_options",output_index, " echo '0 #doParameter calculation (pars)' >> relate_options",output_index, " echo '0.3 # par[0] = a this is only used if doParameter is set to 1' >> relate_options",output_index, " echo '0.25 # par[1] = k2 this is only used if doParameter is set to 1' >> relate_options",output_index, " echo '0.5 # par[2] = k1 this is only used if doParameter is set to 1' >> relate_options",output_index, " echo '1 #ld_adj' >> relate_options",output_index, " echo '0.01 #epsilon' >> relate_options",output_index, " echo '5 #back default 50' >> relate_options",output_index, " echo '0 #doPrune' >> relate_options",output_index, " echo '0.1 #prune_value' >> relate_options",output_index, " echo '0 #fixA' >> relate_options",output_index, " echo '0.0 #fixA_value' >> relate_options",output_index, " echo '1 #fixK2' >> relate_options",output_index, " echo '0.0 #fixk2_value' >> relate_options",output_index, " echo '1 #calculateA' >> relate_options",output_index, " echo '0.013 #phi_value' >> relate_options",output_index, " echo '0.1 #convergence_tolerance' >> relate_options",output_index, " echo '3 #times_to_converge' >> relate_options",output_index, " echo '8 #times_to_run' >> relate_options",output_index, " echo '100 #back2' >> relate_options",output_index ,sep="") system(relate_com) system("rm relate_stat.txt*") system("rm relate_post.txt*") relate_com <- paste("relateHMM -o relate_options",output_index," -g dataSim",run,"relate.geno -c dataSim",run,"relate.chr -p dataSim",run,"relate.pos -post relate_post",output_index,".txt -k relate_stat",output_index,".txt",sep="") system(relate_com) probs <- read.table(file= paste("relate_stat",output_index,".txt",sep="")) popo <- which(probs<1e-5) noSamples <- c() for (i in length(popo)) { t <- 0 s <- 0 while (s