import re,sys
# Returns reference head and uppercase sequence
def Reference(ref):
seq=seq.upper() # Transform all nucleotides into uppercase letters
seq=re.sub("\s+","",seq) # Remove empty characters
seq=re.sub("[\.\-]","",seq) # Remove gaps
seq=re.sub("[X]+","N",seq) # Remove gaps
seq=re.sub("[^ACGTURYSWKMBDHVN]","X",seq) # The following lines check if there are any unexpected characters
if("X" in seq):
sys.stderr.write("! ERROR: found unexpected character(s) in sequence {} (file {})\n".format(head,ref))
return head,seq
