# Load library for opening xlsx files
list.files(path = 'Coverage Results', pattern = "\\_consensus_seqs_kma.mapstat",
recursive = TRUE, include.dirs = FALSE)
samples = gsub(x = coverage_results,
pattern = "^(.+)_consensus_seqs_kma.mapstat", replacement = "\\1")
# Collate coverage results into a single file
combined_coverage_results = NULL
for (sampleID in 1:length(samples)) {
paste0("Coverage Results/", samples[sampleID], "_consensus_seqs_kma.mapstat")
coverage_file = readLines(con = coverage_filename)
coverage_file[[7]] = gsub(x = coverage_file[[7]], pattern = "# ", replacement = "")
noRecords = sum(!grepl(pattern = "##", x = coverage_file,fixed = TRUE)) - 1
coverage = read.table(file = textConnection(coverage_file),
header = TRUE, stringsAsFactors = FALSE, sep = "\t", skip = 6)
coverage$sample = samples[sampleID]
combined_coverage_results = rbind(combined_coverage_results, coverage)
# Import file linking reference sequences to genotype
taxa_results_field_names =
c('query','name','sequence','length','Genus','Genotype-B-region-score',
'Genotype-B-region-plot','Genotype','Genotype-C-region-plot',
'Genotype-C-region-score','Genotype-plot')
read.xlsx(xlsxFile = "calicivirus_typing_output.xlsx",
sheet = "sheet1", startRow = 2)
colnames(combined_taxa_results) = taxa_results_field_names
# Merge coverage results with genotype
combined_coverage_results2 =
merge(x = combined_coverage_results,
y = combined_taxa_results[,c('name','Genotype')],
by.x = c('refSequence'), by.y = c('name'), all.x = TRUE)
combined_coverage_results2[
is.na(combined_coverage_results2$Genotype),
aggregate(formula = readCount ~ Genotype + refSequence + sample,
data = combined_coverage_results2,
coverage_per_genotype_wide =
reshape(data = coverage_per_genotype,
idvar = c("Genotype","refSequence"),
timevar = "sample", direction = "wide")
coverage_per_genotype_wide[is.na(coverage_per_genotype_wide)] = 0
colnames(coverage_per_genotype_wide) =
gsub(x = colnames(coverage_per_genotype_wide),
pattern = "^readCount\\.(.+)$", replacement = "\\1")
# Save results as a csv file
write.csv(x = coverage_per_genotype_wide,
file = "coverage_per_genotype_wide.csv", row.names = FALSE)