Commit 68695d0d authored by Denis Jacob Machado's avatar Denis Jacob Machado

Upload new file

parent 2a139efe
import re,sys
# Returns reference head and uppercase sequence
def Reference(ref):
input=open(ref,"rU")
fasta=input.read()
input.close()
head,seq=re.compile(">\s*([^>\n\r]+)\s+([^>]+)\s*",re.M|re.S).findall(fasta)[0]
seq=seq.upper() # Transform all nucleotides into uppercase letters
seq=re.sub("\s+","",seq) # Remove empty characters
seq=re.sub("[\.\-]","",seq) # Remove gaps
seq=re.sub("[X]+","N",seq) # Remove gaps
seq=re.sub("[^ACGTURYSWKMBDHVN]","X",seq) # The following lines check if there are any unexpected characters
if("X" in seq):
sys.stderr.write("! ERROR: found unexpected character(s) in sequence {} (file {})\n".format(head,ref))
exit()
return head,seq
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment