MCscanX做共线性分析

## g. 使用MCScanX进行共线性区块分析
mkdir -p /home/train/14.genome_comparison/g.MCScanX
cd /home/train/14.genome_comparison/g.MCScanX

# 准备2个物种基因组的蛋白质序列文件和GFF文件
ln -s ../a.preparing_data/laame.geneModels.gff3 ./
ln -s ../a.preparing_data/laame.protein.fasta ./
ln -s ../a.preparing_data/laame.genome.fasta ./
ln -s ../a.preparing_data/plost.geneModels.gff3 .
ln -s ../a.preparing_data/plost.protein.fasta .
ln -s ../a.preparing_data/plost.genome.fasta ./

cat laame.protein.fasta plost.protein.fasta > all.fasta
perl -p -i -e 's/\*$//; s/\*/X/g;' all.fasta
diamond makedb --in all.fasta --db all
diamond blastp --db all --query all.fasta --out diamond.out --outfmt 5 --sensitive --max-target-seqs 100 --evalue 1e-5 --id 10 --tmpdir /dev/shm --threads 8
mkdir data
parsing_blast_result.pl --no-header --max-hit-num 100 --evalue 1e-6 --identity 0.5 --subject-coverage 0.5 --query-coverage 0.5 diamond.out > data/input.blast
perl -e 'while (<>) { if (m/^(\S+)\t.*\tgene\t(\d+)\t(\d+).*ID=([^\s;]+)/) { print "$1\t$4\t$2\t$3\n" } }' plost.geneModels.gff3 laame.geneModels.gff3 > data/input.gff
MCScanX data/input
grep -v -P "plost.*plost" data/input.collinearity | grep -P "plost" > data/input.collinearity_interspecific
mcscanx_stats_blocks.pl data/input.collinearity_interspecific > data/input.collinearity_interspecific.stats
grep -P "plost.*plost" data/input.collinearity > data/input.collinearity_intraspecific
mcscanx_stats_blocks.pl data/input.collinearity_intraspecific > data/input.collinearity_intraspecific.stats

circos_from_MCScanX_out.pl --out-ref-WGD out_WGD --ref-gff3 plost.geneModels.gff3 --ref-fasta plost.genome.fasta --query-gff3 laame.geneModels.gff3 --query-fasta laame.genome.fasta --ref-label PO --query-label LA --min-block-size 5 data/input.collinearity
cd out
circos -conf circos.conf
cd ../out_WGD
circos -conf circos.conf
cd ..