{"description": "Transcriptome index for salmon, produced with salmon index using partial selective alignment method. Preparation includes transcriptome mapping to the genome and extraction of the relevant portion out from the genome and indexing it along with the transcriptome. Recipe source -- https://github.com/COMBINE-lab/SalmonTools/blob/master/scripts/generateDecoyTranscriptome.sh", "required_files": [], "required_assets": [{"key": "fasta", "default": "fasta", "description": "fasta asset for genome"}, {"key": "fasta_txome", "default": "fasta_txome", "description": "fasta asset for transcriptome"}, {"key": "gtf", "default": "ensembl_gtf", "description": "GTF file for exonic features extraction"}], "required_parameters": [{"key": "threads", "default": "8", "description": "Number of threads to use for parallel computing"}, {"key": "kmer", "default": "31", "description": "The length of kmer to use to create the indices"}], "container": "combinelab/salmon", "assets": {"salmon_partial_sa_index": "."}, "command_list": ["gunzip -c {gtf} > {asset_outfolder}/{genome}.gtf", "awk -v OFS='\t' '{{if ($3==\"exon\") {{print $1,$4,$5}}}}' {asset_outfolder}/{genome}.gtf > {asset_outfolder}/exons.bed", "bedtools maskfasta -fi {fasta} -bed {asset_outfolder}/exons.bed -fo {asset_outfolder}/reference.masked.genome.fa", "mashmap -r {asset_outfolder}/reference.masked.genome.fa -q {fasta_txome} -t {threads} --pi 80 -s 500 -o {asset_outfolder}/mashmap.out", "awk -v OFS='\t' '{{print $6,$8,$9}}' {asset_outfolder}/mashmap.out | sort -k1,1 -k2,2n - > {asset_outfolder}/genome_found.sorted.bed", "bedtools merge -i {asset_outfolder}/genome_found.sorted.bed > {asset_outfolder}/genome_found_merged.bed", "bedtools getfasta -fi {asset_outfolder}/reference.masked.genome.fa -bed {asset_outfolder}/genome_found_merged.bed -fo {asset_outfolder}/genome_found.fa", "awk '{{a=$0; getline;split(a, b, \":\");  r[b[1]] = r[b[1]]\"\"$0}} END {{ for (k in r) {{ print k\"\\n\"r[k] }} }}' {asset_outfolder}/genome_found.fa > {asset_outfolder}/decoy.fa", "cat {fasta_txome} {asset_outfolder}/decoy.fa > {asset_outfolder}/gentrome.fa", "grep '>' {asset_outfolder}/decoy.fa | awk '{{print substr($1,2); }}' > {asset_outfolder}/decoys.txt", "rm {asset_outfolder}/exons.bed {asset_outfolder}/reference.masked.genome.fa {asset_outfolder}/mashmap.out {asset_outfolder}/genome_found.sorted.bed {asset_outfolder}/genome_found_merged.bed {asset_outfolder}/genome_found.fa {asset_outfolder}/decoy.fa {asset_outfolder}/reference.masked.genome.fa.fai", "salmon index -t {asset_outfolder}/gentrome.fa -d {asset_outfolder}/decoys.txt -i {asset_outfolder} -k {kmer} -p {threads}"]}