test/lambda-phage/inversion.fasta created to test inversions

inversion.fasta made from reference.fasta using makeinversion.py.
In test the modified biodiff finds the inversions, and fixes them
in a temp file before passing to the rest of biodiff, resulting
in cleaner outout.
parent e67f3206
......@@ -12,6 +12,9 @@ unset tmpdir
tmpdir=$(mktemp -p /tmp -d biodiff.XXXXXX)
refdir="$tmpdir/ref"
qrydir="$tmpdir/qry"
#refdir=refdirect
#qrydir=querydirect
cleanup () {
rm -rf "$tmpdir"
......@@ -56,7 +59,7 @@ isolatenpath=${query%.fasta} # get isolate ID by stripping suffix
isolate=${isolatenpath##*/} # and stripping path
dnadiff -p $isolate $reference $query
echo "dnadiff finished"
python /home/dconkleg/workspace/biodiff/inversionfix.py -r $isolate.rdiff -q $isolate.qdiff -v $isolate-temp.vcf -f $query -t $isolate-temp.fasta
./inversionfix.py -r $isolate.rdiff -q $isolate.qdiff -v $isolate-temp.vcf -f $query -t $isolate-temp.fasta
# inversionfix.py will need to be made an exacutable in the path eventually
# call $isolate-temp.fasta instead of $query from now on below
......
File mode changed from 100644 to 100755
#!/usr/bin/env python
import os
import argparse
from Bio import SeqIO
def main():
parser = argparse.ArgumentParser(description='creates new fasta with inversions for testing')
parser.add_argument('-f', '--fasta', help='fasta file', default='bit-H37Rv.fasta')
parser.add_argument('-t', '--ifasta', help='inversion fasta file', default='ibit-H37Rv.fasta')
parser.add_argument('-c', '--cfasta', help='normal fasta file with same line output', default='cbit-H37Rv.fasta')
args = parser.parse_args()
record = SeqIO.parse(args.fasta, 'fasta').next() # read in the fasta file, should read only the first contig
SeqIO.write(record, args.cfasta, 'fasta') # write out same record in fasta with same bp per line for quick compare
seq = record.seq # get the sequence of the genome
inversion1 = seq[200:570].reverse_complement()
iseq = seq[0:200] + inversion1 + seq[570:]
inversion2 = seq[1140:1590].reverse_complement()
iseq2 = iseq[0:1140] + inversion2 + iseq[1590:]
irecord = record
irecord.seq = iseq2
SeqIO.write(irecord, args.ifasta, 'fasta')
if __name__ == '__main__':
main()
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment