#!/usr/bin/bash
infile=$1
export PATH=/mnt/raid8/sunchuqing/Softwares/blast2.2.26/ncbi-blast-2.2.26+/bin:/mnt/raid7/sunchuqing/Softwares/bin:$PATH:/home/sunchuqing/.local/bin:/mnt/raid6/sunchuqing/Softwares/VirSorter/VirSorter/bin:/mnt/raid6/sunchuqing/Softwares/VirSorter/VirSorter:/usr/bin:/mnt/raid6/sunchuqing/Softwares/miniconda3/envs/virsorter/bin:/mnt/raid6/sunchuqing/Softwares/PPR-Meta:/mnt/raid6/sunchuqing/Softwares/miniconda3/envs/virsorter/bin:/usr/local/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/mnt/raid6/sunchuqing/Softwares/MCR/v94/bin/glnxa64:/mnt/raid6/sunchuqing/Softwares/MCR/v94/runtime/glnxa64:/mnt/raid6/sunchuqing/Softwares/MCR/v94/sys/os/glnxa64:/mnt/raid6/sunchuqing/Softwares/MCR/v94/extern/glnxa64:/mnt/raid3/wchen/miniconda2/pkgs/libgcc-7.2.0-h69d50b8_2/lib
echo "Dealing with $infile"
export LD_LIBRARY_PATH=/mnt/raid8/sunchuqing/Softwares/lib/perllib:$LD_LIBRARY_PATH
export PERL5LIB=/mnt/raid8/sunchuqing/Softwares/lib/perllib:$PERL5LIB
fafile="00_CAT/${infile}_cdhit.len1.5k.fa"
seq="00_CAT/${infile}_cdhit.len1.5k.fa"
if [ ! -s $seq ];then
awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0" ":$0 }' 00_CAT/${infile}_cdhit.fa |awk 'length($NF)>=1500 {print $1"\n"$NF}' > 00_CAT/${infile}_cdhit.len1.5k.fa
fi
n1=`ls 01_Glimmer/${infile}/*.predict|wc -l`
n2=`grep -c ">" 00_CAT/${infile}_cdhit.len1.5k.fa `
if [ $n1 -ne $n2 ];then
#awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0" ":$0 }' 00_CAT/${infile}_cdhit.fa |awk 'length($NF)>=1500 {print $1"\n"$NF}'| > 00_CAT/${infile}_cdhit.len1.5k.fa
#awk '/^>/{s=++num}{print > "01_Glimmer/"file"/"file"_"s"";close("01_Glimmer/"file"/"file"_"s"")}' file="${infile}" $fafile
rm -rf 01_Glimmer/${infile}
mkdir -p 01_Glimmer/${infile}
#awk '/^>/{s=++num}{print > "01_Glimmer/"file"/"file"_"s""}' file="${infile}" $fafile
#awk '/^>/{s=++num}{print >> "01_Glimmer/"file"/"file"_"s"";close("01_Glimmer/"file"/"file"_"s"")}' file="${infile}" $fafile
awk '/^>/{s=++num}{print > "01_Glimmer/"file"/"file"_"s"";("01_Glimmer/"file"/"file"_"s"")}' file="${infile}" $fafile
cd 01_Glimmer/${infile}
ls | grep -v "\." | grep "." |awk 'BEGIN {FS="."} {print $1}'|sort|uniq > ../${infile}.list
while read -r line
do
/mnt/raid5/sunchuqing/Softwares/glimmer3.02/bin/long-orfs -n -t 1.15 ${line} ${line}.longorfs
/mnt/raid5/sunchuqing/Softwares/glimmer3.02/bin/extract -t ${line} ${line}.longorfs > ${line}.train
/mnt/raid5/sunchuqing/Softwares/glimmer3.02/bin/build-icm -r ${line}.icm < ${line}.train
/mnt/raid5/sunchuqing/Softwares/glimmer3.02/bin/glimmer3 -o50 -g100 -t30 ${line} ${line}.icm ${line}
/mnt/raid5/sunchuqing/Softwares/glimmer3.02/bin/extract -t ${line} ${line}.predict > ${line}_predict.fasta
done < "../${infile}.list"
cd ../..
fi
if [ ! -s "01_Glimmer/${infile}/${infile}.faa " ];then
rm 01_Glimmer/${infile}/${infile}.faa
while read -r line
do
declare -i len=0
declare -i aorf=0
declare -i orflen=0
len=`grep '>' 01_Glimmer/${infile}/${line}.predict |awk 'BEGIN {FS=" "} {print $2}' |awk 'BEGIN {FS="_"} {print $2}'`
orflen=`grep ">" 01_Glimmer/${infile}/${line}_predict.fasta | awk 'BEGIN {FS="="} {print $2}'| awk '{sum+=$1}END{print sum}'`
((aorf=$orflen*10000))
if [ $len -gt $aorf ]; then
echo ${line} >> 02_Annotate/${infile}/${infile}.no_assignmnt_under10k
continue
fi
sed "s/^>/>${line}_/" 01_Glimmer/${infile}/${line}_predict.fasta >> 01_Glimmer/${infile}/${infile}.faa
done < "01_Glimmer/${infile}.list"
fi
#VirSorter
mkdir -p 04_is_phage
cd 04_is_phage
if [ ! -s "04_Positive/${infile}.VirSorter.genome" ];then
echo "--Start virSorter--"
#rm -rf ./01_virsorter_result/${infile}
mkdir -p 01_virsorter_result
__conda_setup="$('/mnt/raid6/sunchuqing/Softwares/miniconda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/mnt/raid6/sunchuqing/Softwares/miniconda/etc/profile.d/conda.sh" ]; then
. "/mnt/raid6/sunchuqing/Softwares/miniconda/etc/profile.d/conda.sh"
else
export PATH="/mnt/raid6/sunchuqing/Softwares/miniconda/bin:$PATH"
fi
fi
unset __conda_setup
# # <<< conda initialize <<<
# source activate virsorter
conda activate /mnt/raid6/sunchuqing/Softwares/miniconda3/envs/vs2
virsorter run \
-w ./01_virsorter_result/${infile} \
-i ../00_CAT/${infile}_cdhit.len1.5k.fa \
--db-dir /mnt/raid8/sunchuqing/Softwares/Virsorterdb\
-j 16 --rm-tmpdir\
--tmpdir ./01_virsorter_result/${infile}/temp --min-score 0.7
conda deactivate
# mkdir -p 02_vir_positive
# cat ./01_virsorter_result/${infile}/Predicted_viral_sequences/VIRSorter_cat-1.fasta ./01_virsorter_result/${infile}/Predicted_viral_sequences/VIRSorter_cat-2.fasta > 02_vir_positive/${infile}.vir.fasta
# #VirSprter Positive 基因组 02_vir_positive/${infile}.vir.fasta
mkdir -p 04_Positive
#grep '>' 02_vir_positive/${infile}.vir.fasta |sed 's/>VIRSorter_//g'|sed 's/-cat_1//g'|sed 's/-cat_2//g'|sed 's/-circular//g' > 04_Positive/${infile}.VirSorter.genome
cat ./01_virsorter_result/${infile}/final-viral-score.tsv | awk 'BEGIN {FS="|"} {print $1}'|sort|uniq > 04_Positive/${infile}.VirSorter.genome
echo "--End virSorter--"
fi
#Complete
if [ ! -s "04_Positive/${infile}.cir.genome" ];then
mkdir -p 03_circle/db 03_circle/${infile}
/mnt/raid8/sunchuqing/Softwares/blast2.2.26/ncbi-blast-2.2.26+/bin/makeblastdb -in ../00_CAT/${infile}_cdhit.len1.5k.fa -out 03_circle/db/${infile} -dbtype nucl
blastall -p blastn \
-i ../00_CAT/${infile}_cdhit.len1.5k.fa \
-d 03_circle/db/${infile} \
-o 03_circle/${infile}/${infile}.cir.tab \
-m 8 -e 1e-5 -a 16
awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0" ":$0 }' ../00_CAT/${infile}_cdhit.len1.5k.fa |awk '{print $1","length($NF)}'|sed 's/>//g' > 03_circle/${infile}/${infile}.len
awk 'BEGIN {FS="\t"} $1==$2 && $7==1 && $3==100.00 {print $0}' 03_circle/${infile}/${infile}.cir.tab > 03_circle/${infile}/${infile}.cir711.tab
rm 03_circle/${infile}/${infile}.cir.len.tab
while read -r line
do
contig=`echo $line |awk 'BEGIN {FS=","} {print $1}'`
sed "s/^$contig\t/$line\t/g" 03_circle/${infile}/${infile}.cir711.tab |grep "$line" >> 03_circle/${infile}/${infile}.cir.len.tab
done <"03_circle/${infile}/${infile}.len"
awk 'BEGIN {FS="[,\t]"} ( $2==$10 || $2==$11 ) && $2!=$9 {print $0}' 03_circle/${infile}/${infile}.cir.len.tab > 03_circle/${infile}/${infile}.circle.tab
awk 'BEGIN {FS=","} {print $1}' 03_circle/${infile}/${infile}.circle.tab |sed 's/^/>/g' > 03_circle/${infile}/${infile}.complete
awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0"\n":$0 }' ../00_CAT/${infile}_cdhit.len1.5k.fa > 03_circle/${infile}/${infile}.fna
#grep -w -A 1 -Ff 03_circle/${infile}/${infile}.complete 03_circle/${infile}/${infile}.fna |grep -v "-" > 03_circle/${infile}/${infile}.complete.fa
#mkdir -p 04_complete
#cp 03_circle/${infile}/${infile}.complete.fa 04_complete/
sed 's/>//g' 03_circle/${infile}/${infile}.complete > 04_Positive/${infile}.cir.genome
fi
#成环基因组 03_circle/${infile}/${infile}.complete
#02_vir_positive/${infile}.vir.fasta
#pip install numpy
#pip install h5py
#pip install tensorflow==1.4.1 #CPU version
#pip install keras==2.0.8
if [ ! -s "04_Positive/${infile}.ppr.genome" ];then
pathh=`pwd`
#PPR_Meta
# export PATH=/usr/bin:$PATH
# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
__conda_setup="$('/mnt/raid6/sunchuqing/Softwares/miniconda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/mnt/raid6/sunchuqing/Softwares/miniconda/etc/profile.d/conda.sh" ]; then
. "/mnt/raid6/sunchuqing/Softwares/miniconda/etc/profile.d/conda.sh"
else
export PATH="/mnt/raid6/sunchuqing/Softwares/miniconda/bin:$PATH"
fi
fi
unset __conda_setup
# <<< conda initialize <<<
conda activate /mnt/raid6/sunchuqing/Softwares/miniconda3/envs/tensorflow
pathh=`pwd`
mkdir -p 03_PPR_META
cd /mnt/raid6/sunchuqing/Softwares/PPR-Meta
./PPR_Meta $pathh/../00_CAT/${infile}_cdhit.len1.5k.fa $pathh/03_PPR_META/${infile}.csv
cd $pathh
awk 'BEGIN {FS=","} $3>0.7 {print $1}' 03_PPR_META/${infile}.csv |awk 'BEGIN {FS=" "} {print $1}' > 04_Positive/${infile}.ppr.genome
conda deactivate
fi
#VirFinder
#Bork
if [ ! -s "04_Positive/${infile}.virfiner.genome" ];then
mkdir -p 03_VirFinder
/usr/bin/Rscript /mnt/raid5/sunchuqing/Buffalo_gut/VirFinder.R ../00_CAT/${infile}_cdhit.len1.5k.fa 03_VirFinder/${infile}.csv
#VirFinder输出文件
awk 'BEGIN {FS=","} $3>0.6 {print $1}' 03_VirFinder/${infile}.csv|awk 'BEGIN {FS=" "} {print $1}' > 04_Positive/${infile}.virfiner.genome
fi
#blast Virus ref
if [ ! -s "04_Positive/${infile}.ref.genome" ];then
mkdir -p 03_Blast_m8/${infile}
blastall -p blastn \
-i ../00_CAT/${infile}_cdhit.len1.5k.fa \
-d /mnt/raid6/sunchuqing/Database/Virus/virus \
-o 03_Blast_m8/${infile}/${infile}.m8.tab \
-m 8 -e 1e-10 -a 16
grep -v -wFf /mnt/raid6/sunchuqing/Database/Virus/not.list 03_Blast_m8/${infile}/${infile}.m8.tab >tmp && mv tmp 03_Blast_m8/${infile}/${infile}.m8.tab
#echo "chrom start end" > 03_Blast_m8/${infile}/${infile}.bed
awk 'BEGIN {FS="\t"} $3>50 {print $1 "\t" $7 "\t" $8 }' 03_Blast_m8/${infile}/${infile}.m8.tab |sort -k 1 -t $'\t' |sed 's/[[:space:]]*$//'|tr -d " "|sort -k1,1 -k2,2n> 03_Blast_m8/${infile}/${infile}.bed.1
/mnt/raid6/sunchuqing/Softwares/miniconda3/bin/bedtools merge -i 03_Blast_m8/${infile}/${infile}.bed.1 >03_Blast_m8/${infile}/${infile}.bed
sed 's/,/\t/g' 03_circle/${infile}/${infile}.len > 03_Blast_m8/${infile}/${infile}.len
cut -f 1 03_Blast_m8/${infile}/${infile}.len > 03_Blast_m8/${infile}/${infile}.list
/mnt/raid6/sunchuqing/Softwares/miniconda3/bin/bedtools genomecov -i 03_Blast_m8/${infile}/${infile}.bed -g 03_Blast_m8/${infile}/${infile}.len |awk 'BEGIN {FS="\t"} $2>=1 {print $0}'|grep -v "genome" > 03_Blast_m8/${infile}/${infile}.bedtools
rm 03_Blast_m8/${infile}/${infile}.genomecov.csv
# while read -r line
# do
# grep "$line" 03_Blast_m8/${infile}/${infile}.bedtools | awk 'BEGIN {FS="\t"} {sum += $NF} {print $1 "," sum*100}'|tail -n 1 >> 03_Blast_m8/${infile}/${infile}.genomecov.csv
# name=`grep "$line" 03_Blast_m8/${infile}/${infile}.bedtools |wc -l`
# if [ $name -eq 0 ];then
# echo "$line,0" >> 03_Blast_m8/${infile}/${infile}.genomecov.csv
# fi
# done <"03_Blast_m8/${infile}/${infile}.list"
awk 'BEGIN {FS=","} $NF>0.9 {print $1}' 03_Blast_m8/${infile}/${infile}.genomecov.csv > 04_Positive/${infile}.ref.genome
# awk 'BEGIN {FS=","} $2>90 {print $0}' 03_Blast_m8/${infile}/${infile}.genomecov.csv > 03_Blast_m8/${infile}/${infile}.genomecov.over90.csv
# awk 'BEGIN {FS=","} {print $1}' 03_Blast_m8/${infile}/${infile}.genomecov.over90.csv > 04_Positive/${infile}.ref.genome
fi
#覆盖度超过90的基因组 03_Blast_m8/${infile}/${infile}.genomecov.over90.csv
pathh=`pwd`
#blast pVOGs
if [ ! -s "04_Positive/${infile}.pvog.genome" ];then
mkdir -p 03_Blast_pVOG/${infile}
rm 03_Blast_pVOG/${infile}/${infile}_cds_num.csv
cd ../01_Glimmer/${infile}
ls | grep -v "\." | grep "." |awk 'BEGIN {FS="."} {print $1}'|sort|uniq > ../${infile}.list
cd $pathh
blastall -p blastx -i ../01_Glimmer/${infile}/${infile}.faa -d /mnt/raid6/sunchuqing/Database/Virus/blastdb/POGseqs \
-o 03_Blast_pVOG/${infile}/${infile}.m8.tab \
-m 8 -e 1e-10 -a 16
while read -r line
do
file="../01_Glimmer/${infile}/${line}"
filename=`echo "$file" | awk 'BEGIN {FS="/"} {print $NF}'`
CDSnum=`grep '>' ${file}_predict.fasta |wc -l `
name=`grep '>' ../01_Glimmer/${infile}/${line} |head -n 1|sed 's/>//g' `
length=`grep -w "$name" 03_Blast_m8/${infile}/${infile}.len`
hitnum=`grep "${filename}_" 03_Blast_pVOG/${infile}/${infile}.m8.tab |awk 'BEGIN {FS="\t"} $3>50 {print $1} ' |sort |uniq |wc -l`
echo "$length,$CDSnum,$hitnum"|sed 's/\t/,/g'|awk 'BEGIN {FS=","} $3>3 && $2/5000<$3 && $2/5000<$4 {print $0}' >> 03_Blast_pVOG/${infile}/${infile}_cds_num.csv
done <"../01_Glimmer/${infile}.list"
awk 'BEGIN {FS=","} {print $1}' 03_Blast_pVOG/${infile}/${infile}_cds_num.csv > 04_Positive/${infile}.pvog.genome
fi
mkdir -p 05_Phage_Positive
awk 'BEGIN {FS=","} {print $1}' 03_Blast_pVOG/${infile}/${infile}_cds_num.csv > 04_Positive/${infile}.pvog.genome
awk 'BEGIN {FS=","} {print $1}' 03_Blast_m8/${infile}/${infile}.genomecov.over90.csv > 04_Positive/${infile}.ref.genome
sed 's/>//g' 03_circle/${infile}/${infile}.complete > 04_Positive/${infile}.cir.genome
awk 'BEGIN {FS=","} $3>0.6 {print $1}' 03_VirFinder/${infile}.csv|awk 'BEGIN {FS=" "} {print $1}' > 04_Positive/${infile}.virfiner.genome
awk 'BEGIN {FS=","} $3>0.7 {print $1}' 03_PPR_META/${infile}.csv |awk 'BEGIN {FS=" "} {print $1}' > 04_Positive/${infile}.ppr.genome
#grep '>' 02_vir_positive/${infile}.vir.fasta |sed 's/>VIRSorter_//g'|sed 's/-cat_1//g'|sed 's/-cat_2//g'|sed 's/-circular//g' > 04_Positive/${infile}.VirSorter.genome
#cat 04_Positive/${infile}*|sort |uniq -c|sort -n|awk 'BEGIN {FS=" "} $1>=1 {print $2}'|sed 's/_length/ length/g'|awk 'BEGIN {FS=" "} {print $1}' >05_Phage_Positive/${infile}.phage.genome
cat 04_Positive/${infile}*|sort |uniq -c|sort -n|awk 'BEGIN {FS=" "} $1>=2 {print $2}'|sed 's/_length/ length/g'|awk 'BEGIN {FS=" "} {print $1}' >05_Phage_Positive/${infile}.phage.genome2
cat 04_Positive/${infile}.cir.genome >> 05_Phage_Positive/${infile}.phage.genome2
sed 's/_length/ length/g' 03_circle/${infile}/${infile}.fna > 03_circle/${infile}/${infile}.fna2
#grep -w -A 1 -Ff 05_Phage_Positive/${infile}.phage.genome 03_circle/${infile}/${infile}.fna2 |grep -v -e "--" > 05_Phage_Positive/${infile}.phage.genome.fa
grep -w -A 1 -Ff 05_Phage_Positive/${infile}.phage.genome2 03_circle/${infile}/${infile}.fna2 |grep -v -e "--" |awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0" ":$0 }' |awk '{print $1","length($NF)}'|sed 's/>//g' |awk 'BEGIN {FS=","} $2>1500 {print $1}'|sort|uniq > 05_Phage_Positive/${infile}_fullfill2
grep -w -A 1 -Ff 05_Phage_Positive/${infile}.phage.genome2 03_circle/${infile}/${infile}.fna2 |grep -v -e "--" |awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0" ":$0 }' |awk '{print $1","length($NF)}'|sed 's/>//g' |awk 'BEGIN {FS=","} $2>1500 {print $0}'|sort|uniq > 05_Phage_Positive/${infile}_fullfill2.length.csv
grep -w -A 1 -Ff 05_Phage_Positive/${infile}_fullfill2 03_circle/${infile}/${infile}.fna2 |grep -v -e "--" > 05_Phage_Positive/${infile}.phage.1.5k.genome.fa