download_vcf_portion.sh 2.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash

set -e

# Arguments
VCF_BUCKET="$1"
VCF_KEY="$2"
CSI_BUCKET="$3"
CSI_KEY="$4"
REGION="$5"

# Read env variables
if [ -z "$GENCOVE_AWS_NO_SIGN_REQUEST" ]; then
    export AWS_SIGN_REQUEST=""
else
    export AWS_SIGN_REQUEST="--no-sign-request"
fi

SCRIPT=`basename "$0"`

print_usage () {
    echo "Usage:"
    echo "      $ $SCRIPT vcf_bucket vcf_key csi_bucket csi_key region"
}

if [ -z "$VCF_BUCKET" ]; then
    echo "S3 bucket where VCF file is located not specified, please specify as the first argument"
    print_usage
    exit 1
fi
if [ -z "$VCF_KEY" ]; then
    echo "S3 key where VCF file is located not specified, please specify as the second argument"
    print_usage
    exit 1
fi
if [ -z "$CSI_BUCKET" ]; then
    echo "S3 bucket where CSI file is located not specified, please specify as the third argument"
    print_usage
    exit 1
fi
if [ -z "$CSI_KEY" ]; then
    echo "S3 key where CSI file is located not specified, please specify as the fourth argument"
    print_usage
    exit 1
fi
if [ -z "$REGION" ]; then
    echo "Region not specified, please specify as the fifth argument"
    print_usage
    exit 1
fi


# S3 paths to vcf and csi
PATH_TO_VCF="s3://$VCF_BUCKET/$VCF_KEY"
PATH_TO_CSI="s3://$CSI_BUCKET/$CSI_KEY"


# Get byte offsets for VCF file
export RESULT=$(aws s3 cp $AWS_SIGN_REQUEST $PATH_TO_CSI - | htsutil csi offsets --region $REGION) &&\

export BEG_RESULT=$(echo $RESULT | cut -d '-' -f 1) &&\
export END=$(echo $RESULT | cut -d '-' -f 2) &&\

export BEG=$(echo $BEG_RESULT | cut -d ':' -f 1) &&\
export OFFSET_BEG=$(echo $BEG_RESULT | cut -d ':' -f 2) &&\


# Final output
#   1. Download VCF header and print to stdout (ignore broken pipe error)
#   2. Download portion of VCF file and print to stdout
#   3. Output cleanup with bcftools
cat \
    <(aws s3 cp $AWS_SIGN_REQUEST $PATH_TO_VCF - 2>/dev/null | bcftools view --header-only --no-version -Oz -o - -) \
    <(aws s3api get-object $AWS_SIGN_REQUEST --bucket $VCF_BUCKET --key $VCF_KEY --range "bytes=$BEG-$END" /dev/stdout | gzip -dcq | tail -c "+$(($OFFSET_BEG + 1))" | htsutil misc contains-newline | bgzip -c) \
        | bcftools view - -Oz -o - -t $REGION