### Calling in parallel for chromosome 1
## Step 1: create regions files for each chromosome
RegSize=1000000 # set region size in bp
# List chromosome lengths using reference genome index
samtools faidx /path/reference.fa
cut -f1-2 /path/reference.fa.fai > /path/chrSize.txt
# Length of target chromosome
chrL=$(grep -w "$chr" /path/chrSize.txt | cut -f2)
# List regions (e.g., “1000001-2000000”)
<(seq 1 $RegSize $chrL) \
<(printf "$(seq $RegSize $RegSize $chrL)\n$chrL") \
# Add chromosome prefix (e.g., “chr1:1000001-2000000”)
sed -i -e 's/^/'"$chr"':/' /path/$chr_regions.txt
## Step 2: variant call each region
cat path/$chr_regions.txt | parallel -j20 ' # -j sets number of threads
bcftools mpileup -Ou -a FORMAT/AD,FORMAT/DP,FORMAT/SP,INFO/AD \
-b /path/list.of.bams.txt \
bcftools call -mOz -f GQ,GP -o /path/{}.vcf.gz'
# Note: output VCFs will be named by region (e.g., chr1:1-1000000.vcf.gz)
## Step 3: concatenate VCFs of each region
ls /path/$chr*.vcf.gz > /path/list.of.regions.vcfs.txt
bcftools concat -f /path/list.of.regions.vcfs.txt |\
bcftools sort -Oz - > /output/$chr.raw.vcf.gz
rm /path/$chr:*.vcf.gz /path/list.of.regions.vcf.txt /path/$chr_regions.txt