library(savR)
library(reshape2)

args = commandArgs(trailingOnly=TRUE)
project <- savR(args[1])

################
##  Indexing  ##
################

#total reads
total_reads<- clusters(project, 1L)
pf_reads<- pfClusters(project, 1L)


################
##    Plots   ##
################

##
# Data By Cycle
##

extraction<- extractionMetrics((project))

pdf("QCResults/SAV.pdf")
# Data By Cycle, FWHM/All Lanes / Both surfaces / All Bases
reshaped_extraction <- melt(extraction, measure.vars= c("FWHM_A","FWHM_C", "FWHM_T","FWHM_G"))
FWHM<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
colnames(FWHM) <- c("Cycles","FWHM", "Value")
FWHM$FWHM<- sub("FWHM_","",FWHM$FWHM)
ggplot(data=FWHM )+
  geom_line( aes(x=Cycles , y =Value, color=FWHM)) +
  ggtitle("Data by Cycle - FWHM") +
  xlab("Cycle") +
  ylab("All bases FWHM")
ggsave(paste(args[2], "/data_by_cycle_fwhm.png", sep=""))

# Data By Cycle,Intensity /All Lanes / Both surfaces / All Bases
reshaped_extraction <- melt(extraction, measure.vars= c("int_A","int_C", "int_T","int_G"))
intensity<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
colnames(intensity) <- c("Cycles","Intensity", "Value")
intensity$Intensity<- sub("int_","", intensity$Intensity)
ggplot(data=intensity )+
  geom_line( aes(x=Cycles , y =Value, color=Intensity))+
  ggtitle("Data By Cycle - Intensity")+
  xlab("Cycle")+ylab("All bases intensity")
ggsave(paste(args[2], "/data_by_cycle_intensity.png", sep=""))

# Data By Cycle, %Base /All Lanes / Both surfaces / All Bases
#
corr<- correctedIntensities(project)


corr[,seq(14,17)]<-round(corr[,seq(14,17)] / apply(corr[,seq(14,17)], 1, sum) *100,2)
corr<- melt(corr, measure.vars= c("num_A","num_C", "num_T","num_G"))
corr<-(aggregate(corr$value, by=list(corr$cycle, corr$variable), FUN=mean))
colnames(corr)<- c("Cycle", "Base", "Perc_Base")
corr$Base<- sub("num_","", corr$Base)
ggplot(corr) +
  geom_line(aes(x=Cycle, y= Perc_Base, color=Base)) +
  ylab("All Bases % Base") +
  ggtitle("Data by Cycle - % Base")
ggsave(paste(args[2], "/data_by_cycle_base.png" , sep =""))


##
# Data By Lane
##

tiles<- tileMetrics(project)
# Density, Both Surfaces
#pfBoxplot(project) # Generate a boxplot of the numbers of clusters and the number of Illumina pass-filter clusters per tile and lane
dens <-(tiles[which(tiles$code==100 | tiles$code==101 ),])
dens[which(dens$code==100),]$code <- "Raw Clusters"
dens[which(dens$code==101),]$code<- "PF Clusters"
dens$value <- dens$value/1000
ggplot(data = dens , aes(x=lane, y=value, fill=code))+
  geom_boxplot() +
  ggtitle("Data By Lane - Cluster Density") +
  xlab("Lane")+ylab("Cluster Density (K/mm2)")
ggsave(paste(args[2], "/data_by_lane_cluster.png", sep=""))

# Phasing, Both Surfaces, All Bases
phasing_code <- seq(200, (200 + (length(project@reads)-1)*2),2)
phasing <-(tiles[which(tiles$code %in% phasing_code) ,])
for(i in phasing_code){
  cat(paste("Read ",((i-200)/2)+1))
  phasing[which(phasing$code==i),]$code = paste("Read ",((i-200)/2)+1)
}
ggplot(data = phasing[which(phasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
  geom_boxplot() +
  ggtitle("Data By Lane - Phasing")+
  xlab("Lane")+
  ylab("% Phasing")+
  scale_x_continuous(breaks = unique(phasing$lane))
ggsave(paste(args[2], "/data_by_lane_phasing.png", sep=""))

# Pre-Phasing, Both Surfaces, All Bases
prephasing_code <- seq(201, (201 + (length(project@reads)-1)*2),2)
prephasing <-(tiles[which(tiles$code %in% prephasing_code) ,])
for(i in prephasing_code){

  prephasing[which(prephasing$code==i),]$code = paste("Read ",((i-201)/2)+1)
}
ggplot(data = prephasing[which(prephasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
  geom_boxplot() +
  ggtitle("Data By Lane - Prephasing")+
  xlab("Lane")+
  ylab("% Prephasing") +
  scale_x_continuous(breaks = unique(prephasing$lane))
ggsave(paste(args[2], "/data_by_lane_prephasing.png", sep=""))


##
# QScore Heatmap
##
png(paste(args[2], "/qscore_heatmap.png", sep=""), height=1025, width = 2571, res = 200)
qualityHeatmap(project, lane=seq(1,project@layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())
dev.off()
qualityHeatmap(project, lane=seq(1,project@layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())

qualy<- qualityMetrics(project)
qualy<- data.frame(apply(qualy, 2, as.numeric))


qualy_all<- melt(qualy, measure.vars= colnames(qualy)[4:ncol(qualy)])
qualy_all<- aggregate(qualy_all$value, by=list(qualy_all$variable), FUN=sum)
colnames(qualy_all)<- c("QScore","Total")
qualy_all$Total <- qualy_all$Total/1000000
qualy_all$QScore <- as.numeric(qualy_all$QScore)


ggplot(qualy_all, aes(x=QScore, y = Total )) +
  geom_bar(stat="identity", aes(fill=QScore>=30)) +
  ylab("Total (million)") +
  geom_vline(aes(xintercept=30), linetype="dashed") +
  geom_text(aes(x=35, y=max(Total)-max(Total)*0.1 ,label=(paste("QScore >=30 \n",
                                                                round(sum(qualy_all[which(qualy_all$QScore>=30),]$Total)/1000,2),
                                                                "G \n",
                                                                round(sum(qualy_all[which(qualy_all$QScore>=30),]$Total)/ sum(qualy_all$Total)*100,2),
                                                                "%")
  ))) +
  ggtitle("QScore Distribution") +
  theme(legend.position="none")
ggsave(paste(args[2], "/qscore_distr.png", sep=""))

over_q30 <- which(colnames(qualy) =="Q30"):ncol(qualy)
qualy_q30 <- as.data.frame(cbind(qualy[which(qualy$cycle>=25),"cycle"], apply(qualy[which(qualy$cycle>=25),over_q30],1, sum)))
colnames(qualy_q30) <- c("cycle", "sum")

sum_per_cycle <- cbind(qualy[which(qualy$cycle>=25),"cycle"], apply(qualy[which(qualy$cycle>=25),],1, sum))
colnames(sum_per_cycle) <- c("cycle", "sum")
qualy_q30$sum <-100* qualy_q30$sum/ sum_per_cycle[,"sum"]

ggplot(qualy_q30, aes(x=cycle, y = as.numeric(sum) )) +
  geom_point()+
  ylab("% >=Q30") +
  ggtitle("Data by Cycle - %>=Q30")

ggsave(paste(args[2], "/qscore_q30.png", sep=""))

dev.off()
