Commit 86399bbc authored by Dries Schaumont's avatar Dries Schaumont
Browse files

Fix incorrect trimming of reverse reads.

parent 5b3bb4fa
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -177,7 +177,7 @@ class CutadaptPositionalTrimmer(CommandLineWrapper, Trimmer):
            # Will be needed if we handle spacers! 
        ]
        if len(output_files) == 2:
            args.extend(["-U", len(self._common_side_cutsite_remnant) + 1])
            args.extend(["-U", len(self._common_side_cutsite_remnant)])

        args.extend(output_file_arg)
        args.extend(files)
@@ -406,7 +406,7 @@ class CutadaptPatternTrimmer(CommandLineWrapper, Trimmer):

    def _build_patterns(self, barcode):
        result = []
        result.append(self._reverse_complement(self._common_side_cutsite_remnant + self._common_adapter_sequence))
        result.append(self._reverse_complement(self._common_adapter_sequence + self._common_side_cutsite_remnant))
        if self._barcode_adapter_sequence:
            result.append(self._barcode_side_cutsite_remnant + \
                          self._reverse_complement(barcode) + \
+89 −16
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ from gbprocess.operations.filtering import (AverageQualityFilter, LengthFilter,
                                      MaxNFilter, SlidingWindowQualityFilter)
from gbprocess.operations.merging import FastqJoinMerger
from gbprocess.pipeline import SerialPipeline
from gbprocess.operations.trimming import CutadaptPatternTrimmer
from gbprocess.operations.trimming import CutadaptPatternTrimmer, CutadaptPositionalTrimmer
from utils import CustomTestCase

from data import barcodes, fastq_forward, fastq_reverse, fastq_forward_no_at, fastq_reverse_no_at, invalid_fastq_content
@@ -624,13 +624,86 @@ class TestSlidingWindowQualityFilter(CustomOperationTestCase):
        extension = self.single_end_fastq.extension
        self.assertEqual(str(forward.name), f'{run}{extension}')

class TestCutadaptPositionalTrimmer(CustomOperationTestCase):
    def setUp(self):
        super().setUp()
        barcode = dedent(
            """
            >barcode1
            ATCCCCGG
            """).strip()
        with open(self.tempdir.name + "/barcodes.fasta", 'w+') as barcodes_file:
            barcodes_file.write(barcode)
            barcodes_file.flush() 
        self.barcodes = self.tempdir.name + "/barcodes.fasta"
        self.paired_fastq.sample_name = "barcode1"
        self.single_end_fastq.sample_name = "barcode1"

        self.forward_result = dedent("""
            @EU861894-140/1
            TCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATCACTGTACCTGCTCCACCGCTTGTGCGGGCCCTCGTCA
            +
            HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#C#G@#C=)I@#BHBBCDCD;ACDCB;??CD>#D#:DA
            @EU861894-138/1
            TCCACATGTGGTCCCCCGCTTCGGGGCAGGTTGCCCACGTGTTACGCGACCGTTCGCCATTAACCAC
            +
            HHG?BAJJG0BII#GI8;FJIBFBHGJI>+EC=BECBDCHDEAAD#=E6DDFF=9CD#A#8H@@>#D
            """).strip()

        self.reverse_result = dedent("""
            @EU861894-140/2
            GAATTGACAGGGGCACGCATAAGCGGTGCGGTATGTGCATTAATTCGTCACTAACTGAAGAACCTCACCAGGCTTTGAAACCCACGGAGAGCGGGAG
            +
            F1FFEFFA#AGGJB!EHDI434:J?GJI##B#)BJIICJJGEBFIBJ>GGDJIGI#)II<H6=ID#E?CD4##CDEFB#C#CA#-<#?#FCE#!DC'
            @EU861894-138/2
            CGGGGGTGGTTAGGCAACCCCCCCCGAAGCGGGGGACAACAGCCTTAAACGGTTCCTAATACCGCATGGTGA
            +
            FB2@FHB2HFJGFFHJ?8=##JDGHDEIBH?H#HI)EFFEF#C#B#HE?#D?#;#DDCA#:DD>BCB###D'
            """).strip()

    def test_trim_single(self):
        trim_op = CutadaptPositionalTrimmer(barcode_side_cutsite_remnant="",
                                            common_side_cutsite_remnant="GCG",
                                            barcodes=self.barcodes,
                                            output_file_name_template="{run}_{extension}", 
                                            output_directory=self.tempdir.name)
        result = trim_op.perform(self.single_end_fastq)
        fastq_result = list(result)
        self.assertTrue(len(fastq_result) == 1) 
        fastq, = fastq_result.pop().files
        self.assertFileContentEquals(fastq, self.forward_result)

    def test_trim_paired(self):
        trim_op = CutadaptPositionalTrimmer(barcode_side_cutsite_remnant="",
                                            common_side_cutsite_remnant="GCG",
                                            barcodes=self.barcodes,
                                            output_file_name_template="{run}_{orientation}{extension}", 
                                            output_directory=self.tempdir.name)
        output_seq_data = trim_op.perform(self.paired_fastq)
        fastq, = output_seq_data
        forward, reverse = fastq.files
        # Check output file contents
        self.assertFileContentEquals(forward, self.forward_result)
        self.assertFileContentEquals(reverse, self.reverse_result)

        # Check output directory
        self.assertEqual(forward.parent, reverse.parent)
        self.assertEqual(self.tempdir.name, str(forward.parent))

        # Check output name
        run = self.single_end_fastq.run
        extension = self.single_end_fastq.extension
        self.assertEqual(forward.name, f"{run}_1{extension}")
        self.assertEqual(reverse.name, f"{run}_2{extension}")


class TestCutadaptPatternTrimmer(CustomOperationTestCase):
    def setUp(self):
        super().setUp()
        barcode = dedent(
            """
            >barcode1
            ATCCCCGGG
            CCGCTT
            """).strip()
        with open(self.tempdir.name + "/barcodes.fasta", 'w+') as barcodes_file:
            barcodes_file.write(barcode)
@@ -640,9 +713,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase):
        self.single_end_fastq.sample_name = "barcode1"
        self.forward_result = dedent("""
                             @EU861894-140/1
                              CCGATCTCTCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATCACT
                             CCGATCTCTCGGCCTGCCCGGGGATCTCAAACNCTGGTAAGCTTCTCCGGTTAGTGACGAATC
                             +
                              ??CFFF?;HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#C#G
                             ??CFFF?;HHAH#III#I:IHJIJG#JIJJI?3IIJ0JJ#G3JGHG#I90?HIJG9JEIIBD#
                             @EU861894-138/1
                             AGAGCGCATCCACATGTGGTCCCCCGCTTCGGGGCAGGTTGCCCACGTGTTACGCGACCGTTCGCCATTAACCAC
                             +
@@ -651,9 +724,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase):

        self.reverse_result = dedent("""
                                    @EU861894-140/2
                                    AAGGAATTGACAGGGGCACGCATAAGCGGTGCGGTATGTGCATTAATTCGTCACTAACTGAAGAACCTCACCAGGCTTTGAAACCCACGGAGAGCGGGAG
                                    AAGGAATTGACAGGGGCACG
                                    +
                                    @1@F1FFEFFA#AGGJB!EHDI434:J?GJI##B#)BJIICJJGEBFIBJ>GGDJIGI#)II<H6=ID#E?CD4##CDEFB#C#CA#-<#?#FCE#!DC'
                                    @1@F1FFEFFA#AGGJB!EH
                                    @EU861894-138/2
                                    CTTCGGGGGTGGTTAGGCAACCCCCCCCGAAGCGGGGGACAACAGCCTTAAACGGTTCCTAATACCGCATGGTGA
                                    +
@@ -663,7 +736,7 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase):
    def test_trim_single(self):
        trim_op = CutadaptPatternTrimmer(common_adapter_sequence="GTGGAGCAGGTAC", 
                                         barcode_adapter_sequence="", 
                                         common_side_cutsite_remnant="GCG", 
                                         common_side_cutsite_remnant="AGT", 
                                         barcode_side_cutsite_remnant="", 
                                         barcodes=self.barcodes, 
                                         minimum_length=1, 
@@ -678,9 +751,9 @@ class TestCutadaptPatternTrimmer(CustomOperationTestCase):

    def test_trim_paired(self):
        trim_op = CutadaptPatternTrimmer(common_adapter_sequence="GTGGAGCAGGTAC", 
                                    barcode_adapter_sequence="", 
                                    common_side_cutsite_remnant="GCG", 
                                    barcode_side_cutsite_remnant="", 
                                    barcode_adapter_sequence="ACATACCGCA", 
                                    common_side_cutsite_remnant="AGT", 
                                    barcode_side_cutsite_remnant="CAT", 
                                    barcodes=self.barcodes, 
                                    minimum_length=1, 
                                    error_rate=0,