Loading gbprocess/operations/trimming.py +2 −2 Original line number Diff line number Diff line Loading @@ -177,7 +177,7 @@ class CutadaptPositionalTrimmer(CommandLineWrapper, Trimmer): # Will be needed if we handle spacers! ] if len(output_files) == 2: args.extend(["-U", len(self._common_side_cutsite_remnant) + 1]) args.extend(["-U", len(self._common_side_cutsite_remnant)]) args.extend(output_file_arg) args.extend(files) Loading Loading @@ -406,7 +406,7 @@ class CutadaptPatternTrimmer(CommandLineWrapper, Trimmer): def _build_patterns(self, barcode): result = [] result.append(self._reverse_complement(self._common_side_cutsite_remnant + self._common_adapter_sequence)) result.append(self._reverse_complement(self._common_adapter_sequence + self._common_side_cutsite_remnant)) if self._barcode_adapter_sequence: result.append(self._barcode_side_cutsite_remnant + \ self._reverse_complement(barcode) + \ Loading test/test_operations.py +89 −16 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from gbprocess.operations.filtering import (AverageQualityFilter, LengthFilter, MaxNFilter, SlidingWindowQualityFilter) from gbprocess.operations.merging import FastqJoinMerger from gbprocess.pipeline import SerialPipeline from gbprocess.operations.trimming import CutadaptPatternTrimmer from gbprocess.operations.trimming import CutadaptPatternTrimmer, CutadaptPositionalTrimmer from utils import CustomTestCase from data import barcodes, fastq_forward, fastq_reverse, fastq_forward_no_at, fastq_reverse_no_at, invalid_fastq_content Loading Loading @@ -624,13 +624,86 @@ class TestSlidingWindowQualityFilter(CustomOperationTestCase): extension = self.single_end_fastq.extension self.assertEqual(str(forward.name), f'{run}{extension}') class TestCutadaptPositionalTrimmer(CustomOperationTestCase): def setUp(self): super().setUp() barcode = dedent( """ >barcode1 ATCCCCGG """).strip() with open(self.tempdir.name + "/barcodes.fasta", 'w+') as barcodes_file: barcodes_file.write(barcode) barcodes_file.flush() self.barcodes = self.tempdir.name + "/barcodes.fasta" self.paired_fastq.sample_name = "barcode1" self.single_end_fastq.sample_name = "barcode1" self.forward_result = dedent(""" @EU861894-140/1 TCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATCACTGTACCTGCTCCACCGCTTGTGCGGGCCCTCGTCA + HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#C#G@#C=)I@#BHBBCDCD;ACDCB;??CD>#D#:DA @EU861894-138/1 TCCACATGTGGTCCCCCGCTTCGGGGCAGGTTGCCCACGTGTTACGCGACCGTTCGCCATTAACCAC + HHG?BAJJG0BII#GI8;FJIBFBHGJI>+EC=BECBDCHDEAAD#=E6DDFF=9CD#A#8H@@>#D """).strip() self.reverse_result = dedent(""" @EU861894-140/2 GAATTGACAGGGGCACGCATAAGCGGTGCGGTATGTGCATTAATTCGTCACTAACTGAAGAACCTCACCAGGCTTTGAAACCCACGGAGAGCGGGAG + F1FFEFFA#AGGJB!EHDI434:J?GJI##B#)BJIICJJGEBFIBJ>GGDJIGI#)II<H6=ID#E?CD4##CDEFB#C#CA#-<#?#FCE#!DC' @EU861894-138/2 CGGGGGTGGTTAGGCAACCCCCCCCGAAGCGGGGGACAACAGCCTTAAACGGTTCCTAATACCGCATGGTGA + FB2@FHB2HFJGFFHJ?8=##JDGHDEIBH?H#HI)EFFEF#C#B#HE?#D?#;#DDCA#:DD>BCB###D' """).strip() def test_trim_single(self): trim_op = CutadaptPositionalTrimmer(barcode_side_cutsite_remnant="", common_side_cutsite_remnant="GCG", barcodes=self.barcodes, output_file_name_template="{run}_{extension}", output_directory=self.tempdir.name) result = trim_op.perform(self.single_end_fastq) fastq_result = list(result) self.assertTrue(len(fastq_result) == 1) fastq, = fastq_result.pop().files self.assertFileContentEquals(fastq, self.forward_result) def test_trim_paired(self): trim_op = CutadaptPositionalTrimmer(barcode_side_cutsite_remnant="", common_side_cutsite_remnant="GCG", barcodes=self.barcodes, output_file_name_template="{run}_{orientation}{extension}", output_directory=self.tempdir.name) output_seq_data = trim_op.perform(self.paired_fastq) fastq, = output_seq_data forward, reverse = fastq.files # Check output file contents self.assertFileContentEquals(forward, self.forward_result) self.assertFileContentEquals(reverse, self.reverse_result) # Check output directory self.assertEqual(forward.parent, reverse.parent) self.assertEqual(self.tempdir.name, str(forward.parent)) # Check output name run = self.single_end_fastq.run extension = self.single_end_fastq.extension self.assertEqual(forward.name, f"{run}_1{extension}") self.assertEqual(reverse.name, f"{run}_2{extension}") class TestCutadaptPatternTrimmer(CustomOperationTestCase): def setUp(self): super().setUp() barcode = dedent( """ >barcode1 ATCCCCGGG CCGCTT """).strip() with open(self.tempdir.name + "/barcodes.fasta", 'w+') as barcodes_file: barcodes_file.write(barcode) Loading @@ -640,9 +713,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): self.single_end_fastq.sample_name = "barcode1" self.forward_result = dedent(""" @EU861894-140/1 CCGATCTCTCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATCACT CCGATCTCTCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATC + ??CFFF?;HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#C#G ??CFFF?;HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD# @EU861894-138/1 AGAGCGCATCCACATGTGGTCCCCCGCTTCGGGGCAGGTTGCCCACGTGTTACGCGACCGTTCGCCATTAACCAC + Loading @@ -651,9 +724,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): self.reverse_result = dedent(""" @EU861894-140/2 AAGGAATTGACAGGGGCACGCATAAGCGGTGCGGTATGTGCATTAATTCGTCACTAACTGAAGAACCTCACCAGGCTTTGAAACCCACGGAGAGCGGGAG AAGGAATTGACAGGGGCACG + @1@F1FFEFFA#AGGJB!EHDI434:J?GJI##B#)BJIICJJGEBFIBJ>GGDJIGI#)II<H6=ID#E?CD4##CDEFB#C#CA#-<#?#FCE#!DC' @1@F1FFEFFA#AGGJB!EH @EU861894-138/2 CTTCGGGGGTGGTTAGGCAACCCCCCCCGAAGCGGGGGACAACAGCCTTAAACGGTTCCTAATACCGCATGGTGA + Loading @@ -663,7 +736,7 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): def test_trim_single(self): trim_op = CutadaptPatternTrimmer(common_adapter_sequence="GTGGAGCAGGTAC", barcode_adapter_sequence="", common_side_cutsite_remnant="GCG", common_side_cutsite_remnant="AGT", barcode_side_cutsite_remnant="", barcodes=self.barcodes, minimum_length=1, Loading @@ -678,9 +751,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): def test_trim_paired(self): trim_op = CutadaptPatternTrimmer(common_adapter_sequence="GTGGAGCAGGTAC", barcode_adapter_sequence="", common_side_cutsite_remnant="GCG", barcode_side_cutsite_remnant="", barcode_adapter_sequence="ACATACCGCA", common_side_cutsite_remnant="AGT", barcode_side_cutsite_remnant="CAT", barcodes=self.barcodes, minimum_length=1, error_rate=0, Loading Loading
gbprocess/operations/trimming.py +2 −2 Original line number Diff line number Diff line Loading @@ -177,7 +177,7 @@ class CutadaptPositionalTrimmer(CommandLineWrapper, Trimmer): # Will be needed if we handle spacers! ] if len(output_files) == 2: args.extend(["-U", len(self._common_side_cutsite_remnant) + 1]) args.extend(["-U", len(self._common_side_cutsite_remnant)]) args.extend(output_file_arg) args.extend(files) Loading Loading @@ -406,7 +406,7 @@ class CutadaptPatternTrimmer(CommandLineWrapper, Trimmer): def _build_patterns(self, barcode): result = [] result.append(self._reverse_complement(self._common_side_cutsite_remnant + self._common_adapter_sequence)) result.append(self._reverse_complement(self._common_adapter_sequence + self._common_side_cutsite_remnant)) if self._barcode_adapter_sequence: result.append(self._barcode_side_cutsite_remnant + \ self._reverse_complement(barcode) + \ Loading
test/test_operations.py +89 −16 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from gbprocess.operations.filtering import (AverageQualityFilter, LengthFilter, MaxNFilter, SlidingWindowQualityFilter) from gbprocess.operations.merging import FastqJoinMerger from gbprocess.pipeline import SerialPipeline from gbprocess.operations.trimming import CutadaptPatternTrimmer from gbprocess.operations.trimming import CutadaptPatternTrimmer, CutadaptPositionalTrimmer from utils import CustomTestCase from data import barcodes, fastq_forward, fastq_reverse, fastq_forward_no_at, fastq_reverse_no_at, invalid_fastq_content Loading Loading @@ -624,13 +624,86 @@ class TestSlidingWindowQualityFilter(CustomOperationTestCase): extension = self.single_end_fastq.extension self.assertEqual(str(forward.name), f'{run}{extension}') class TestCutadaptPositionalTrimmer(CustomOperationTestCase): def setUp(self): super().setUp() barcode = dedent( """ >barcode1 ATCCCCGG """).strip() with open(self.tempdir.name + "/barcodes.fasta", 'w+') as barcodes_file: barcodes_file.write(barcode) barcodes_file.flush() self.barcodes = self.tempdir.name + "/barcodes.fasta" self.paired_fastq.sample_name = "barcode1" self.single_end_fastq.sample_name = "barcode1" self.forward_result = dedent(""" @EU861894-140/1 TCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATCACTGTACCTGCTCCACCGCTTGTGCGGGCCCTCGTCA + HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#C#G@#C=)I@#BHBBCDCD;ACDCB;??CD>#D#:DA @EU861894-138/1 TCCACATGTGGTCCCCCGCTTCGGGGCAGGTTGCCCACGTGTTACGCGACCGTTCGCCATTAACCAC + HHG?BAJJG0BII#GI8;FJIBFBHGJI>+EC=BECBDCHDEAAD#=E6DDFF=9CD#A#8H@@>#D """).strip() self.reverse_result = dedent(""" @EU861894-140/2 GAATTGACAGGGGCACGCATAAGCGGTGCGGTATGTGCATTAATTCGTCACTAACTGAAGAACCTCACCAGGCTTTGAAACCCACGGAGAGCGGGAG + F1FFEFFA#AGGJB!EHDI434:J?GJI##B#)BJIICJJGEBFIBJ>GGDJIGI#)II<H6=ID#E?CD4##CDEFB#C#CA#-<#?#FCE#!DC' @EU861894-138/2 CGGGGGTGGTTAGGCAACCCCCCCCGAAGCGGGGGACAACAGCCTTAAACGGTTCCTAATACCGCATGGTGA + FB2@FHB2HFJGFFHJ?8=##JDGHDEIBH?H#HI)EFFEF#C#B#HE?#D?#;#DDCA#:DD>BCB###D' """).strip() def test_trim_single(self): trim_op = CutadaptPositionalTrimmer(barcode_side_cutsite_remnant="", common_side_cutsite_remnant="GCG", barcodes=self.barcodes, output_file_name_template="{run}_{extension}", output_directory=self.tempdir.name) result = trim_op.perform(self.single_end_fastq) fastq_result = list(result) self.assertTrue(len(fastq_result) == 1) fastq, = fastq_result.pop().files self.assertFileContentEquals(fastq, self.forward_result) def test_trim_paired(self): trim_op = CutadaptPositionalTrimmer(barcode_side_cutsite_remnant="", common_side_cutsite_remnant="GCG", barcodes=self.barcodes, output_file_name_template="{run}_{orientation}{extension}", output_directory=self.tempdir.name) output_seq_data = trim_op.perform(self.paired_fastq) fastq, = output_seq_data forward, reverse = fastq.files # Check output file contents self.assertFileContentEquals(forward, self.forward_result) self.assertFileContentEquals(reverse, self.reverse_result) # Check output directory self.assertEqual(forward.parent, reverse.parent) self.assertEqual(self.tempdir.name, str(forward.parent)) # Check output name run = self.single_end_fastq.run extension = self.single_end_fastq.extension self.assertEqual(forward.name, f"{run}_1{extension}") self.assertEqual(reverse.name, f"{run}_2{extension}") class TestCutadaptPatternTrimmer(CustomOperationTestCase): def setUp(self): super().setUp() barcode = dedent( """ >barcode1 ATCCCCGGG CCGCTT """).strip() with open(self.tempdir.name + "/barcodes.fasta", 'w+') as barcodes_file: barcodes_file.write(barcode) Loading @@ -640,9 +713,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): self.single_end_fastq.sample_name = "barcode1" self.forward_result = dedent(""" @EU861894-140/1 CCGATCTCTCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATCACT CCGATCTCTCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATC + ??CFFF?;HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#C#G ??CFFF?;HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD# @EU861894-138/1 AGAGCGCATCCACATGTGGTCCCCCGCTTCGGGGCAGGTTGCCCACGTGTTACGCGACCGTTCGCCATTAACCAC + Loading @@ -651,9 +724,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): self.reverse_result = dedent(""" @EU861894-140/2 AAGGAATTGACAGGGGCACGCATAAGCGGTGCGGTATGTGCATTAATTCGTCACTAACTGAAGAACCTCACCAGGCTTTGAAACCCACGGAGAGCGGGAG AAGGAATTGACAGGGGCACG + @1@F1FFEFFA#AGGJB!EHDI434:J?GJI##B#)BJIICJJGEBFIBJ>GGDJIGI#)II<H6=ID#E?CD4##CDEFB#C#CA#-<#?#FCE#!DC' @1@F1FFEFFA#AGGJB!EH @EU861894-138/2 CTTCGGGGGTGGTTAGGCAACCCCCCCCGAAGCGGGGGACAACAGCCTTAAACGGTTCCTAATACCGCATGGTGA + Loading @@ -663,7 +736,7 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): def test_trim_single(self): trim_op = CutadaptPatternTrimmer(common_adapter_sequence="GTGGAGCAGGTAC", barcode_adapter_sequence="", common_side_cutsite_remnant="GCG", common_side_cutsite_remnant="AGT", barcode_side_cutsite_remnant="", barcodes=self.barcodes, minimum_length=1, Loading @@ -678,9 +751,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase): def test_trim_paired(self): trim_op = CutadaptPatternTrimmer(common_adapter_sequence="GTGGAGCAGGTAC", barcode_adapter_sequence="", common_side_cutsite_remnant="GCG", barcode_side_cutsite_remnant="", barcode_adapter_sequence="ACATACCGCA", common_side_cutsite_remnant="AGT", barcode_side_cutsite_remnant="CAT", barcodes=self.barcodes, minimum_length=1, error_rate=0, Loading