startRun <- 486 endRun <- 486 runIndex="g" lengthAll <- 3201157 shift <- 5000 intervallAll <- 10000 over <- intervallAll%/%shift N1 <- lengthAll%/%shift endRunAll <- (N1-over+1) # 639 source("/seppdata/sepp/linkage/release/funct.R") #exonic <- read.table(paste("/seppdata/sepp/linkage/release/1000g20101123.txt.exonic_variant_function",sep=""),header = FALSE, sep = "\t", quote = "",as.is = TRUE,skip=0) #variant <- read.table(paste("/seppdata/sepp/linkage/release/1000g20101123.txt.variant_function",sep=""),header = FALSE, sep = "\t", quote = "",as.is = TRUE,skip=0) #tfbs <- read.table(paste("/seppdata/sepp/linkage/release/1000g20101123.txt.hg19_tfbsConsSites",sep=""),header = FALSE, sep = "\t", quote = "",as.is = TRUE,skip=0) #save(exonic,variant,tfbs,file="1000g20101123_snp_annot.Rda") load(file="../1000g20101123_snp_annot.Rda") #exonic$V2 exonicClass <- c("frameshift deletion","frameshift insertion","frameshift substitution","nonframeshift deletion","nonframeshift insertion","nonframeshift substitution","nonsynonymous SNV","stopgain SNV","stoploss SNV","synonymous SNV","unknown") #variant$V1 variantClass <- c("downstream","exonic","exonic;splicing","intergenic","intronic","ncRNA_exonic","ncRNA_intronic","ncRNA_splicing","ncRNA_UTR3","ncRNA_UTR5","ncRNA_UTR5;ncRNA_UTR3","splicing","upstream","upstream;downstream","UTR3","UTR5","UTR5;UTR3") #tfbs$V2 #soE <- sort.int(exonic$V5, decreasing = FALSE, index.return = TRUE) soE <- exonic$V5 #soV <- sort.int(variant$V4, decreasing = FALSE, index.return = TRUE) soV <- variant$V4 #soT <- sort.int(tfbs$V4, decreasing = FALSE, index.return = TRUE) soT <- tfbs$V4 avnoIBD <- list() avnoSnps <- list() avnoSamp <- list() avibdLength <- list() avibdPos <- list() ibdD <- list() tfbsCount <- list() exonicCount <- rep(0,length(exonicClass)) variantCount <- rep(0,length(variantClass)) allCount <- 0 for (posAll in (startRun+1):(endRun+1)) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste("resIBD_chr1",pRange,".Rda",sep="")) #save(mergedIBD,resIBD1,resIBD2,mergedIBD1,mergedIBD2,file=paste("resIBD_chr1",pRange,".Rda",sep="")) #res_t <- list( #[[1]]number=ibd_res_idx, #[[2]]bicluster=n, #[[3]]chrom=chrom, #[[4]]ibdPos=ibdPos, #[[5]]ibdLength=ibdLength, #[[6]]numberSamples=lq1A, #[[7]]numberSnps=lq2A, #[[8]]noSamples=noSample, #[[9]]noSnps=noSnp, #[[10]]countrySamples=labelsEUA, #[[11]]idSamples=labelsNAA, #[[12]]labelSamples=labels_ALLA, #[[13]]techSamples=labelsTechA, #[[14]]maxSamples=labelsNA2, #[[15]]snpPositions=physRangeA, #[[16]]snpAlleles=allelesA, #[[17]]snpNames=snpNamesA, #[[18]]snpFreq=snpFreqA, #[[19]]snpGroupFreq=snpGroupFreqA, #[[20]]snpChange=snpChangeA, #[[21]]snpsPerSample=snpsPerSample, #[[22]]samplePerSnp=samplePerSnp) noIBD <- length(mergedIBD) avnoIBD[[posAll]] <- noIBD #setSnps <- sapply(mergedIBD,function(x) {x[[9]]} , simplify=FALSE ) #setSamp <- sapply(mergedIBD,function(x) {x[[8]]} , simplify=FALSE) IBDannot <- list() if (noIBD > 0) { ibdPos <- sapply(mergedIBD,function(x) {x[[4]]} , simplify=FALSE) ibdS <- sort.int( unlist(ibdPos), decreasing = FALSE, index.return = TRUE) avibdPos[[posAll]] <- ibdS$x ibdD[[posAll]] <- diff(ibdS$x) ibdLength <- sapply(mergedIBD,function(x) {x[[5]]} , simplify=FALSE) avibdLength[[posAll]] <- unlist(ibdLength)[ibdS$ix] noSamp <- sapply(mergedIBD,function(x) {x[[6]]} , simplify=FALSE ) noSnps <- sapply(mergedIBD,function(x) {x[[7]]} , simplify=FALSE) avnoSnps[[posAll]] <- unlist(noSnps)[ibdS$ix] avnoSamp[[posAll]] <- unlist(noSamp)[ibdS$ix] for (ibdC in 1:noIBD) { allCount <- allCount + 1 IBDannot[[ibdC]] <- mergedIBD[[ibdC]] snpPositions <- mergedIBD[[ibdC]][[15]] mE <- match(snpPositions,soE,nomatch=0) g0 <- which(mE>0) exPos <- snpPositions[g0] mE <- mE[g0] ccE <- exonic$V2[mE] tA <- table(match(ccE,exonicClass)) typeA <- as.integer(names(tA)) ttA <- rep(0,length(exonicClass)) ttA[typeA] <- tA IBDannot[[ibdC]][[23]] <- exPos IBDannot[[ibdC]][[24]] <- ccE IBDannot[[ibdC]][[25]] <- ttA ppl <- which(ttA>0) exonicCount[ppl] <- exonicCount[ppl]+1 mV <- match(snpPositions,soV,nomatch=0) g0 <- which(mV>0) exPos <- snpPositions[g0] mV <- mV[g0] ccE <- variant$V1[mV] tA <- table(match(ccE,variantClass)) typeA <- as.integer(names(tA)) ttA <- rep(0,length(variantClass)) ttA[typeA] <- tA IBDannot[[ibdC]][[26]] <- exPos IBDannot[[ibdC]][[27]] <- ccE IBDannot[[ibdC]][[28]] <- ttA ppl <- which(ttA>0) variantCount[ppl] <- variantCount[ppl]+1 mT <- match(snpPositions,soT,nomatch=0) g0 <- which(mT>0) exPos <- snpPositions[g0] mT <- mT[g0] ccE <- tfbs$V2[mT] IBDannot[[ibdC]][[29]] <- exPos IBDannot[[ibdC]][[30]] <- ccE tfbsCount[[allCount]] <- length(g0) } } save(IBDannot,file=paste("resIBD_chr1",pRange,"annot.Rda",sep="")) ibd2excel(IBDannot,paste("ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes",pRange,"_annot.csv",sep="")) } noIBD <- unlist(avnoIBD) allIBDs <- sum(noIBD) print(allIBDs) sunoIBD <- summary(noIBD) print(sunoIBD) noSnps <- unlist(avnoSnps) sunoSnps <- summary(noSnps) print(sunoSnps) noSamp <- unlist(avnoSamp) sunoSamp <- summary(noSamp) print(sunoSamp) ibdLength <- unlist(avibdLength) suibdLength <- summary(ibdLength) print(suibdLength) ibdPos <- unlist(avibdPos) ibdDist <- unlist(ibdD) suibdDist <- summary(ibdDist) print(suibdDist) names(exonicCount) <- exonicClass print(exonicCount) names(variantCount) <- variantClass print(variantCount) tbfsA <- unlist(tfbsCount) print(summary(tbfsA)) save(startRun,endRun,posAll,noIBD,allIBDs,noSnps,noSamp,ibdLength,ibdPos,ibdDist,exonicCount,variantCount,tbfsA,file=paste("resAnno_",runIndex,".Rda",sep=""))