Commit 3361440c authored by Juris Laivins's avatar Juris Laivins

Video 9. Final Video. DNA Toolkit V1.0

parent fea45de0
from bio_structs import DNA_Codons, DNA_Nucleotides
from bio_structs import DNA_Codons, RNA_Codons, NUCLEOTIDE_BASE
from collections import Counter
import random
......@@ -18,7 +18,7 @@ class bio_seq:
def __validate(self):
"""Check the sequence to make sure it is a valid DNA string"""
return set(DNA_Nucleotides).issuperset(self.seq)
return set(NUCLEOTIDE_BASE[self.seq_type]).issuperset(self.seq)
def get_seq_biotype(self):
"""Returns sequence type"""
......@@ -30,7 +30,7 @@ class bio_seq:
def generate_rnd_seq(self, length=10, seq_type="DNA"):
"""Generate a random DNA sequence, provided the length"""
seq = ''.join([random.choice(DNA_Nucleotides)
seq = ''.join([random.choice(NUCLEOTIDE_BASE[seq_type])
for x in range(length)])
self.__init__(seq, seq_type, "Randomly generated sequence")
......@@ -40,14 +40,19 @@ class bio_seq:
def transcription(self):
"""DNA -> RNA Transcription. Replacing Thymine with Uracil"""
return self.seq.replace("T", "U")
if self.seq_type == "DNA":
return self.seq.replace("T", "U")
return "Not a DNA sequence"
def reverse_complement(self):
"""
Swapping adenine with thymine and guanine with cytosine.
Reversing newly generated string
"""
mapping = str.maketrans('ATCG', 'TAGC')
if self.seq_type == "DNA":
mapping = str.maketrans('ATCG', 'TAGC')
else:
mapping = str.maketrans('AUCG', 'UAGC')
return self.seq.translate(mapping)[::-1]
def gc_content(self):
......@@ -65,14 +70,23 @@ class bio_seq:
def translate_seq(self, init_pos=0):
"""Translates a DNA sequence into an aminoacid sequence"""
return [DNA_Codons[self.seq[pos:pos + 3]] for pos in range(init_pos, len(self.seq) - 2, 3)]
if self.seq_type == "DNA":
return [DNA_Codons[self.seq[pos:pos + 3]] for pos in range(init_pos, len(self.seq) - 2, 3)]
elif self.seq_type == "RNA":
return [RNA_Codons[self.seq[pos:pos + 3]] for pos in range(init_pos, len(self.seq) - 2, 3)]
def codon_usage(self, aminoacid):
"""Provides the frequency of each codon encoding a given aminoacid in a DNA sequence"""
tmpList = []
for i in range(0, len(self.seq) - 2, 3):
if DNA_Codons[self.seq[i:i + 3]] == aminoacid:
tmpList.append(self.seq[i:i + 3])
if self.seq_type == "DNA":
for i in range(0, len(self.seq) - 2, 3):
if DNA_Codons[self.seq[i:i + 3]] == aminoacid:
tmpList.append(self.seq[i:i + 3])
elif self.seq_type == "RNA":
for i in range(0, len(self.seq) - 2, 3):
if RNA_Codons[self.seq[i:i + 3]] == aminoacid:
tmpList.append(self.seq[i:i + 3])
freqDict = dict(Counter(tmpList))
totalWight = sum(freqDict.values())
......
DNA_Nucleotides = ['A', 'C', 'G', 'T']
NUCLEOTIDE_BASE = {
"DNA": ["A", "T", "C", "G"],
"RNA": ["A", "U", "C", "G"]
}
DNA_Codons = {
# 'M' - START, '_' - STOP
......@@ -24,3 +27,28 @@ DNA_Codons = {
"TAT": "Y", "TAC": "Y",
"TAA": "_", "TAG": "_", "TGA": "_"
}
RNA_Codons = {
# 'M' - START, '_' - STOP
"GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
"UGU": "C", "UGC": "C",
"GAU": "D", "GAC": "D",
"GAA": "E", "GAG": "E",
"UUU": "F", "UUC": "F",
"GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G",
"CAU": "H", "CAC": "H",
"AUA": "I", "AUU": "I", "AUC": "I",
"AAA": "K", "AAG": "K",
"UUA": "L", "UUG": "L", "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
"AUG": "M",
"AAU": "N", "AAC": "N",
"CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
"CAA": "Q", "CAG": "Q",
"CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R", "AGA": "R", "AGG": "R",
"UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S", "AGU": "S", "AGC": "S",
"ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
"GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
"UGG": "W",
"UAU": "Y", "UAC": "Y",
"UAA": "_", "UAG": "_", "UGA": "_"
}
>Rosalind_6404
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
TCCCACTAATAATTCTGAGG
>Rosalind_5959
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
ATATCCATTTGTCAGCAGACACGC
>Rosalind_0808
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
TGGGAACCTGCGGGCAGTAGGTGGAAT
# DNA Toolset/Code testing file
from bio_seq import bio_seq
from utilities import read_FASTA, readTextFile, writeTextFile
test_dna = bio_seq()
test_dna.generate_rnd_seq(40, "DNA")
test_dna.generate_rnd_seq(40, "RNA")
print(test_dna.get_seq_info())
print(test_dna.nucleotide_frequency())
......
......@@ -17,3 +17,30 @@ def colored(seq):
tmpStr += bcolors['reset'] + nuc
return tmpStr + '\033[0;0m'
def readTextFile(filePath):
with open(filePath, 'r') as f:
return "".join([l.strip() for l in f.readlines()])
def writeTextFile(filePath, seq, mode='w'):
with open(filePath, mode) as f:
f.write(seq + '\n')
def read_FASTA(filePath):
with open(filePath, 'r') as f:
FASTAFile = [l.strip() for l in f.readlines()]
FASTADict = {}
FASTALabel = ""
for line in FASTAFile:
if '>' in line:
FASTALabel = line
FASTADict[FASTALabel] = ""
else:
FASTADict[FASTALabel] += line
return FASTADict
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment