... | ... | @@ -420,12 +420,12 @@ UniProt amino acid sequences are in `20200311_ENSEMBL_IDs_to_UniProtKB_mapping.t |
|
|
awk -v q="\"" 'BEGIN {FS="\t"; OFS=","} {split($9,a,"[=;]")} \
|
|
|
{print "INSERT IGNORE INTO b10k_gff(unigene,seqid,source,type,start,end,score,strand,phase,attribute) \
|
|
|
VALUES(" q a[2] q, q $1 q, q $2 q, q $3 q, q $4 q, q $5 q, q $6 q, q $7 q, q $8 q, q $9 q ");"}' \
|
|
|
${i}.gff > ../../../import/insert_b10k_${i}.sql; \
|
|
|
${i}.gff > ../../../import/b10k_annotation/insert_b10k_${i}.sql; \
|
|
|
done
|
|
|
```
|
|
|
- Import data
|
|
|
```shell
|
|
|
cd ../../../import
|
|
|
cd ../../../import/b10k_annotation
|
|
|
for i in insert_b10k_*_immune.sql; do echo "processing ${i}"; my_immunome < $i; done
|
|
|
```
|
|
|
|
... | ... | @@ -437,11 +437,12 @@ UniProt amino acid sequences are in `20200311_ENSEMBL_IDs_to_UniProtKB_mapping.t |
|
|
cd data/b10k/assembly/raw
|
|
|
IFS=$(echo -en "\n\b"); for i in $(cat sample_sheet.txt); do \
|
|
|
old=$(echo $i | awk '{print $1}'); new=$(echo $i | awk '{print $2}'); \
|
|
|
mv ${old}.genomic.fa.gz ${new}.fa.gz; \
|
|
|
mv ${old}.genomic.fa.gz ../${new}.fa.gz; \
|
|
|
done
|
|
|
```
|
|
|
- Decompress and index genomes (download and install samtools from https://htslib.org)
|
|
|
```shell
|
|
|
cd ..
|
|
|
pigz -d *.fa.gz && for i in $(ls *.fa); do \
|
|
|
echo "processing ${i}"; \
|
|
|
samtools faidx $i; \
|
... | ... | @@ -479,17 +480,17 @@ UniProt amino acid sequences are in `20200311_ENSEMBL_IDs_to_UniProtKB_mapping.t |
|
|
print "UPDATE b10k_gff SET nt_seq = " q $3 q " WHERE unigene LIKE", \
|
|
|
q s "_%" q " AND seqid = " q $1 q " AND start =", \
|
|
|
q a[1] q " AND end = " q a[2] q ";"}' $i \
|
|
|
> ../../import/`basename $i .fa`.sql; \
|
|
|
> ../../import/b10k_nt_seq/`basename $i .fa`.sql; \
|
|
|
done
|
|
|
```
|
|
|
- Import data (can take several hours)
|
|
|
```shell
|
|
|
cd ../../import
|
|
|
cd ../../import/b10k_nt_seq
|
|
|
for i in $(ls *.sql); do echo "processing ${i}"; my_immunome < ${i}; done
|
|
|
```
|
|
|
- Compress genomes again
|
|
|
```shell
|
|
|
cd ../data/b10k/assembly/raw
|
|
|
cd ../../data/b10k/assembly/raw
|
|
|
pigz *.fa
|
|
|
```
|
|
|
|
... | ... | |