Added the wdl and input json files

2019-08-27 14:25:21 -04:00 · 2019-08-27 14:25:21 -04:00 · b950ebd65d
--- a/gatk4-rna-best-practices.wdl
+++ b/gatk4-rna-best-practices.wdl
@ -0,0 +1,818 @@
+## Copyright Broad Institute, 2019
+##
+## Workflows for processing RNA data for germline short variant discovery with GATK (v4) and related tools
+##
+## Requirements/expectations :
+## - BAM 
+##
+## Output :
+## - A BAM file and its index.
+## - A VCF file and its index. 
+## - A Filtered VCF file and its index. 
+##
+## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
+## For program versions, see docker containers.
+##
+## LICENSING :
+## This script is released under the WDL source code license (BSD-3) (see LICENSE in
+## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
+## be subject to different licenses. Users are responsible for checking that they are
+## authorized to run all programs before running this script. Please see the docker
+## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
+## licensing information pertaining to the included programs. 
+ 
+ workflow RNAseq {
+
+	File inputBam
+	String sampleName = basename(inputBam,".bam")
+
+	File refFasta
+	File refFastaIndex
+	File refDict
+
+	String? gatk4_docker_override
+	String gatk4_docker = select_first([gatk4_docker_override, "broadinstitute/gatk:latest"])
+	String? gatk_path_override
+	String gatk_path = select_first([gatk_path_override, "/gatk/gatk"])
+	String? star_docker_override
+	String star_docker = select_first([star_docker_override, "quay.io/humancellatlas/secondary-analysis-star:v0.2.2-2.5.3a-40ead6e"])
+
+	Array[File] knownVcfs
+	Array[File] knownVcfsIndices
+
+	File dbSnpVcf
+	File dbSnpVcfIndex
+
+	Int? minConfidenceForVariantCalling
+
+	## Inputs for STAR
+	Int? readLength
+	File? zippedStarReferences
+	File annotationsGTF
+  
+	## Optional user optimizations
+	Int? haplotypeScatterCount
+	Int scatterCount = select_first([haplotypeScatterCount, 6])
+
+	Int? preemptible_tries
+	Int preemptible_count = select_first([preemptible_tries, 3])
+
+	call gtfToCallingIntervals {
+	    input:
+	        gtf = annotationsGTF,
+	        ref_dict = refDict,
+	        preemptible_count = preemptible_count,
+	        gatk_path = gatk_path,
+	        docker = gatk4_docker
+	}
+
+	call RevertSam {
+		input:
+			input_bam = inputBam,
+			base_name = sampleName + ".reverted",
+			sort_order = "queryname",
+			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+	call SamToFastq {
+		input:
+			unmapped_bam = RevertSam.output_bam,
+			base_name = sampleName,
+			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+	if (!defined(zippedStarReferences)) {
+
+		call StarGenerateReferences { 
+			input:
+				ref_fasta = refFasta,
+				ref_fasta_index = refFastaIndex,
+				annotations_gtf = annotationsGTF,
+				read_length = readLength,
+				preemptible_count = preemptible_count,
+				docker = star_docker
+		}
+	}
+
+	File starReferences = select_first([zippedStarReferences,StarGenerateReferences.star_genome_refs_zipped,""])
+
+	call StarAlign { 
+		input: 
+			star_genome_refs_zipped = starReferences,
+			fastq1 = SamToFastq.fastq1,
+			fastq2 = SamToFastq.fastq2,
+			base_name = sampleName + ".star",
+			read_length = readLength,
+			preemptible_count = preemptible_count,
+			docker = star_docker
+	}
+
+	call MergeBamAlignment {
+		input: 
+			unaligned_bam = RevertSam.output_bam,
+			star_bam = StarAlign.output_bam,
+			base_name = ".merged",
+			ref_fasta = refFasta,
+			ref_dict = refDict,
+			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+	call MarkDuplicates {
+		input:
+			input_bam = MergeBamAlignment.output_bam,
+			base_name = sampleName + ".dedupped",
+			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+
+    call SplitNCigarReads {
+        input:
+            input_bam = MarkDuplicates.output_bam,
+            input_bam_index = MarkDuplicates.output_bam_index,
+            base_name = sampleName + ".split",
+            ref_fasta = refFasta,
+            ref_fasta_index = refFastaIndex,
+            ref_dict = refDict,
+            interval_list = gtfToCallingIntervals.interval_list,
+            preemptible_count = preemptible_count,
+            docker = gatk4_docker,
+            gatk_path = gatk_path
+    }
+
+
+	call BaseRecalibrator {
+		input:
+			input_bam = SplitNCigarReads.output_bam,
+			input_bam_index = SplitNCigarReads.output_bam_index,
+			recal_output_file = sampleName + ".recal_data.csv",
+  			dbSNP_vcf = dbSnpVcf,
+  			dbSNP_vcf_index = dbSnpVcfIndex,
+  			known_indels_sites_VCFs = knownVcfs,
+  			known_indels_sites_indices = knownVcfsIndices,
+  			ref_dict = refDict,
+  			ref_fasta = refFasta,
+  			ref_fasta_index = refFastaIndex,
+  			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+	call ApplyBQSR {
+		input:
+			input_bam =  SplitNCigarReads.output_bam,
+			input_bam_index = SplitNCigarReads.output_bam_index,
+			base_name = sampleName + ".aligned.duplicates_marked.recalibrated",
+			ref_fasta = refFasta,
+			ref_fasta_index = refFastaIndex,
+			ref_dict = refDict,
+			recalibration_report = BaseRecalibrator.recalibration_report,
+			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+
+    call ScatterIntervalList {
+        input:
+            interval_list = gtfToCallingIntervals.interval_list,
+            scatter_count = scatterCount,
+            preemptible_count = preemptible_count,
+            docker = gatk4_docker,
+            gatk_path = gatk_path
+    }
+
+
+	scatter (interval in ScatterIntervalList.out) {
+        call HaplotypeCaller {
+            input:
+                input_bam = ApplyBQSR.output_bam,
+                input_bam_index = ApplyBQSR.output_bam_index,
+                base_name = sampleName + ".hc",
+                interval_list = interval,
+                ref_fasta = refFasta,
+                ref_fasta_index = refFastaIndex,
+                ref_dict = refDict,
+                dbSNP_vcf = dbSnpVcf,
+                dbSNP_vcf_index = dbSnpVcfIndex,
+                stand_call_conf = minConfidenceForVariantCalling,
+                preemptible_count = preemptible_count,
+                docker = gatk4_docker,
+                gatk_path = gatk_path
+        }
+
+		File HaplotypeCallerOutputVcf = HaplotypeCaller.output_vcf
+		File HaplotypeCallerOutputVcfIndex = HaplotypeCaller.output_vcf_index
+	}
+
+    call MergeVCFs {
+        input:
+            input_vcfs = HaplotypeCallerOutputVcf,
+            input_vcfs_indexes =  HaplotypeCallerOutputVcfIndex,
+            output_vcf_name = sampleName + ".g.vcf.gz",
+            preemptible_count = preemptible_count,
+            docker = gatk4_docker,
+            gatk_path = gatk_path
+    }
+	
+	call VariantFiltration {
+		input:
+			input_vcf = MergeVCFs.output_vcf,
+			input_vcf_index = MergeVCFs.output_vcf_index,
+			base_name = sampleName + ".variant_filtered.vcf.gz",
+			ref_fasta = refFasta,
+			ref_fasta_index = refFastaIndex,
+			ref_dict = refDict,
+			preemptible_count = preemptible_count,
+			docker = gatk4_docker,
+			gatk_path = gatk_path
+	}
+
+	output {
+		File recalibrated_bam = ApplyBQSR.output_bam
+		File recalibrated_bam_index = ApplyBQSR.output_bam_index
+		File merged_vcf = MergeVCFs.output_vcf
+		File merged_vcf_index = MergeVCFs.output_vcf_index
+		File variant_filtered_vcf = VariantFiltration.output_vcf
+		File variant_filtered_vcf_index = VariantFiltration.output_vcf_index
+	}
+}
+
+task gtfToCallingIntervals {
+    File gtf
+    File ref_dict
+
+    String output_name = basename(gtf, ".gtf") + ".exons.interval_list"
+
+    String docker
+    String gatk_path
+    Int preemptible_count
+
+    command <<<
+        Rscript --no-save -<<'RCODE'
+            gtf = read.table("${gtf}", sep="\t")
+            gtf = subset(gtf, V3 == "exon")
+            write.table(data.frame(chrom=gtf[,'V1'], start=gtf[,'V4'], end=gtf[,'V5']), "exome.bed", quote = F, sep="\t", col.names = F, row.names = F)
+        RCODE
+
+        awk '{print $1 "\t" ($2 - 1) "\t" $3}' exome.bed > exome.fixed.bed
+
+        ${gatk_path} \
+            BedToIntervalList \
+            -I=exome.fixed.bed \
+            -O=${output_name} \
+            -SD=${ref_dict}
+    >>>
+
+    output {
+        File interval_list = "${output_name}"
+    }
+
+    runtime {
+        docker: docker
+        preemptible: preemptible_count
+    }
+}
+
+#NOTE: assuming aggregated bams & paired end fastqs
+task SamToFastq {
+    File unmapped_bam
+    String base_name
+
+    String gatk_path
+
+    String docker
+	Int preemptible_count
+
+	command <<<
+	 	${gatk_path} \
+	 	    SamToFastq \
+	 	    --INPUT ${unmapped_bam} \
+	 	    --VALIDATION_STRINGENCY SILENT \
+	 	    --FASTQ ${base_name}.1.fastq.gz \
+	 	    --SECOND_END_FASTQ ${base_name}.2.fastq.gz
+	>>>
+
+	output {
+		File fastq1 = "${base_name}.1.fastq.gz"
+        File fastq2 = "${base_name}.2.fastq.gz"
+	}
+
+	runtime {
+		docker: docker
+		memory: "4 GB"
+		disks: "local-disk " + sub(((size(unmapped_bam,"GB")+1)*5),"\\..*","") + " HDD"
+		preemptible: preemptible_count
+	}
+}
+
+task StarGenerateReferences {
+	File ref_fasta
+	File ref_fasta_index
+	File annotations_gtf
+	Int? read_length  ## Should this be an input, or should this always be determined by reading the first line of a fastq input
+
+	Int? num_threads
+	Int threads = select_first([num_threads, 8])
+    
+	Int? additional_disk
+        Int add_to_disk = select_first([additional_disk, 0])
+	Int disk_size = select_first([100 + add_to_disk, 100])
+	Int? mem_gb
+	Int mem = select_first([100, mem_gb])
+	String docker
+	Int preemptible_count
+
+	command <<<
+		set -e
+		mkdir STAR2_5
+
+		STAR \
+		--runMode genomeGenerate \
+		--genomeDir STAR2_5 \
+		--genomeFastaFiles ${ref_fasta} \
+		--sjdbGTFfile ${annotations_gtf} \
+		${"--sjdbOverhang "+(read_length-1)} \
+		--runThreadN ${threads}
+
+		ls STAR2_5
+
+		tar -zcvf star-HUMAN-refs.tar.gz STAR2_5
+	>>>
+
+	output {
+		Array[File] star_logs = glob("*.out")
+		File star_genome_refs_zipped = "star-HUMAN-refs.tar.gz"
+	}
+
+	runtime {
+		docker: docker
+		disks: "local-disk " + disk_size + " HDD"
+		cpu: threads
+		memory: mem +" GB"
+		preemptible: preemptible_count
+	}
+}
+
+
+task StarAlign {
+	File star_genome_refs_zipped
+	File fastq1
+	File fastq2
+	String base_name
+	Int? read_length
+
+	Int? num_threads
+	Int threads = select_first([num_threads, 8])
+	Int? star_mem_max_gb
+	Int star_mem = select_first([star_mem_max_gb, 45])
+	#Is there an appropriate default for this?
+	Int? star_limitOutSJcollapsed
+
+	Int? additional_disk
+	Int add_to_disk = select_first([additional_disk, 0])
+	String docker
+	Int preemptible_count
+
+	command <<<
+		set -e
+
+		tar -xvzf ${star_genome_refs_zipped}
+
+		STAR \
+		--genomeDir STAR2_5 \
+		--runThreadN ${threads} \
+		--readFilesIn ${fastq1} ${fastq2} \
+		--readFilesCommand "gunzip -c" \
+		${"--sjdbOverhang "+(read_length-1)} \
+		--outSAMtype BAM SortedByCoordinate \
+		--twopassMode Basic \
+		--limitBAMsortRAM ${star_mem+"000000000"} \
+		--limitOutSJcollapsed ${default=1000000 star_limitOutSJcollapsed} \
+		--outFileNamePrefix ${base_name}.
+	>>>
+
+	output {
+		File output_bam = "${base_name}.Aligned.sortedByCoord.out.bam"
+		File output_log_final = "${base_name}.Log.final.out"
+		File output_log = "${base_name}.Log.out"
+		File output_log_progress = "${base_name}.Log.progress.out"
+		File output_SJ = "${base_name}.SJ.out.tab"
+	}
+
+	runtime {
+		docker: docker
+		disks: "local-disk " + sub(((size(fastq1,"GB")+size(fastq2,"GB")*10)+30+add_to_disk),"\\..*","") + " HDD"
+		memory: (star_mem+1) + " GB"
+		cpu: threads
+		preemptible: preemptible_count
+	}
+}
+
+task MergeBamAlignment {
+
+    File ref_fasta
+    File ref_dict
+
+    File unaligned_bam
+    File star_bam
+    String base_name
+
+    String gatk_path
+
+    String docker
+    Int preemptible_count
+    #Using default for max_records_in_ram
+ 
+    command <<<
+        ${gatk_path} \
+            MergeBamAlignment \
+            --REFERENCE_SEQUENCE ${ref_fasta} \
+            --UNMAPPED_BAM ${unaligned_bam} \
+            --ALIGNED_BAM ${star_bam} \
+            --OUTPUT ${base_name}.bam \
+            --INCLUDE_SECONDARY_ALIGNMENTS false \
+            --PAIRED_RUN False \
+            --VALIDATION_STRINGENCY SILENT
+    >>>
+ 
+    output {
+        File output_bam="${base_name}.bam"
+    }
+
+    runtime {
+        docker: docker
+        disks: "local-disk " + sub(((size(unaligned_bam,"GB")+size(star_bam,"GB")+1)*5),"\\..*","") + " HDD"
+        memory: "4 GB"
+        preemptible: preemptible_count
+    }
+}
+
+task MarkDuplicates {
+
+ 	File input_bam
+ 	String base_name
+
+  String gatk_path
+
+  String docker
+ 	Int preemptible_count
+
+ 	command <<<
+ 	    ${gatk_path} \
+ 	        MarkDuplicates \
+ 	        --INPUT ${input_bam} \
+ 	        --OUTPUT ${base_name}.bam  \
+ 	        --CREATE_INDEX true \
+ 	        --VALIDATION_STRINGENCY SILENT \
+ 	        --METRICS_FILE ${base_name}.metrics
+ 	>>>
+
+ 	output {
+ 		File output_bam = "${base_name}.bam"
+ 		File output_bam_index = "${base_name}.bai"
+ 		File metrics_file = "${base_name}.metrics"
+ 	}
+
+	runtime {
+		disks: "local-disk " + sub(((size(input_bam,"GB")+1)*3),"\\..*","") + " HDD"
+		docker: docker
+		memory: "4 GB"
+		preemptible: preemptible_count
+	}
+}
+
+task SplitNCigarReads {
+
+  File input_bam
+  File input_bam_index
+  String base_name
+  File interval_list
+
+  File ref_fasta
+  File ref_fasta_index
+  File ref_dict
+
+	String gatk_path
+	String docker
+        Int preemptible_count
+
+    command <<<
+        ${gatk_path} \
+                SplitNCigarReads \
+                -R ${ref_fasta} \
+                -I ${input_bam} \
+                -O ${base_name}.bam 
+    >>>
+
+        output {
+                File output_bam = "${base_name}.bam"
+                File output_bam_index = "${base_name}.bai"
+        }
+
+    runtime {
+        disks: "local-disk " + sub(((size(input_bam,"GB")+1)*5 + size(ref_fasta,"GB")),"\\..*","") + " HDD"
+        docker: docker
+        memory: "4 GB"
+        preemptible: preemptible_count
+    }
+}
+
+task BaseRecalibrator {
+
+    File input_bam
+    File input_bam_index
+    String recal_output_file
+
+    File dbSNP_vcf
+    File dbSNP_vcf_index
+    Array[File] known_indels_sites_VCFs
+    Array[File] known_indels_sites_indices
+
+    File ref_dict
+    File ref_fasta
+    File ref_fasta_index
+
+    String gatk_path
+
+    String docker
+    Int preemptible_count
+
+    command <<<
+        ${gatk_path} --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -XX:+PrintFlagsFinal \
+            -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintGCDetails \
+            -Xloggc:gc_log.log -Xms4000m" \
+            BaseRecalibrator \
+            -R ${ref_fasta} \
+            -I ${input_bam} \
+            --use-original-qualities \
+            -O ${recal_output_file} \
+            -known-sites ${dbSNP_vcf} \
+            -known-sites ${sep=" --known-sites " known_indels_sites_VCFs}
+    >>>
+
+    output {
+        File recalibration_report = recal_output_file
+    }
+
+    runtime {
+        memory: "6 GB"
+        disks: "local-disk " + sub((size(input_bam,"GB")*3)+30, "\\..*", "") + " HDD"
+        docker: docker
+        preemptible: preemptible_count
+    }
+}
+
+
+task ApplyBQSR {
+
+    File input_bam
+    File input_bam_index
+    String base_name
+    File recalibration_report
+
+    File ref_dict
+    File ref_fasta
+    File ref_fasta_index
+
+    String gatk_path
+
+    String docker
+    Int preemptible_count
+
+    command <<<
+        ${gatk_path} \
+            --java-options "-XX:+PrintFlagsFinal -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps \
+            -XX:+PrintGCDetails -Xloggc:gc_log.log \
+            -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m" \
+            ApplyBQSR \
+            --add-output-sam-program-record \
+            -R ${ref_fasta} \
+            -I ${input_bam} \
+            --use-original-qualities \
+            -O ${base_name}.bam \
+            --bqsr-recal-file ${recalibration_report}
+    >>>
+
+    output {
+        File output_bam = "${base_name}.bam"
+        File output_bam_index = "${base_name}.bai"
+    }
+
+    runtime {
+        memory: "3500 MB"
+        disks: "local-disk " + sub((size(input_bam,"GB")*4)+30, "\\..*", "") + " HDD"
+        preemptible: preemptible_count
+        docker: docker
+    }
+}
+
+task HaplotypeCaller {
+
+	File input_bam
+	File input_bam_index
+	String base_name
+
+	File interval_list
+
+	File ref_dict
+	File ref_fasta
+	File ref_fasta_index
+
+	File dbSNP_vcf
+	File dbSNP_vcf_index
+
+	String gatk_path
+	String docker
+	Int preemptible_count
+
+	Int? stand_call_conf
+
+	command <<<
+		${gatk_path} --java-options "-Xms6000m -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
+		HaplotypeCaller \
+		-R ${ref_fasta} \
+		-I ${input_bam} \
+		-L ${interval_list} \
+		-O ${base_name}.vcf.gz \
+		-dont-use-soft-clipped-bases \
+		--standard-min-confidence-threshold-for-calling ${default=20 stand_call_conf} \
+		--dbsnp ${dbSNP_vcf}
+	>>>
+
+	output {
+		File output_vcf = "${base_name}.vcf.gz"
+		File output_vcf_index = "${base_name}.vcf.gz.tbi"
+	}
+
+	runtime {
+		docker: docker
+		memory: "6.5 GB"
+		disks: "local-disk " + sub((size(input_bam,"GB")*2)+30, "\\..*", "") + " HDD"
+		preemptible: preemptible_count
+	}
+}
+
+task VariantFiltration {
+
+	File input_vcf
+	File input_vcf_index
+	String base_name
+
+ 	File ref_dict
+ 	File ref_fasta
+ 	File ref_fasta_index
+
+	String gatk_path
+	String docker
+ 	Int preemptible_count
+
+	command <<<
+		 ${gatk_path} \
+		    VariantFiltration \
+			--R ${ref_fasta} \
+			--V ${input_vcf} \
+			--window 35 \
+			--cluster 3 \
+			--filter-name "FS" \
+			--filter "FS > 30.0" \
+			--filter-name "QD" \
+			--filter "QD < 2.0" \
+			-O ${base_name}
+	>>>
+
+	output {
+    	File output_vcf = "${base_name}"
+    	File output_vcf_index = "${base_name}.tbi"
+	}
+
+	runtime {
+		docker: docker
+		memory: "3 GB"
+		disks: "local-disk " + sub((size(input_vcf,"GB")*2)+30, "\\..*", "") + " HDD"
+		preemptible: preemptible_count
+	}
+}
+
+task MergeVCFs {
+    Array[File] input_vcfs
+    Array[File] input_vcfs_indexes
+    String output_vcf_name
+
+    Int? disk_size = 5
+
+    String gatk_path
+
+    String docker
+    Int preemptible_count
+
+    # Using MergeVcfs instead of GatherVcfs so we can create indices
+    # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
+    command <<<
+        ${gatk_path} --java-options "-Xms2000m"  \
+            MergeVcfs \
+            --INPUT ${sep=' --INPUT=' input_vcfs} \
+            --OUTPUT ${output_vcf_name}
+    >>>
+
+    output {
+        File output_vcf = output_vcf_name
+        File output_vcf_index = "${output_vcf_name}.tbi"
+    }
+
+    runtime {
+        memory: "3 GB"
+        disks: "local-disk " + disk_size + " HDD"
+        docker: docker
+        preemptible: preemptible_count
+    }
+}
+
+task ScatterIntervalList {
+
+	File interval_list
+	Int scatter_count
+	String gatk_path
+	String docker
+	Int preemptible_count
+
+    command <<<
+        set -e
+        mkdir out
+        ${gatk_path} --java-options "-Xms1g" \
+            IntervalListTools \
+            --SCATTER_COUNT=${scatter_count} \
+            --SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
+            --UNIQUE=true \
+            --SORT=true \
+            --INPUT=${interval_list} \
+            --OUTPUT=out
+	
+        python3 <<CODE
+        import glob, os
+        # Works around a JES limitation where multiples files with the same name overwrite each other when globbed
+        intervals = sorted(glob.glob("out/*/*.interval_list"))
+        for i, interval in enumerate(intervals):
+          (directory, filename) = os.path.split(interval)
+          newName = os.path.join(directory, str(i + 1) + filename)
+          os.rename(interval, newName)
+        print(len(intervals))
+        f = open("interval_count.txt", "w+")
+        f.write(str(len(intervals)))
+        f.close()
+        CODE
+    >>>
+
+    output {
+        Array[File] out = glob("out/*/*.interval_list")
+        Int interval_count = read_int("interval_count.txt")
+    }
+
+    runtime {
+        disks: "local-disk 1 HDD"
+        memory: "2 GB"
+        docker: docker
+        preemptible: preemptible_count
+    }
+}
+
+task RevertSam {
+    File input_bam
+    String base_name
+    String sort_order
+
+    String gatk_path
+
+    String docker
+    Int preemptible_count
+
+    command <<<
+        ${gatk_path} \
+        	RevertSam \
+        	--INPUT ${input_bam} \
+        	--OUTPUT ${base_name}.bam \
+            --VALIDATION_STRINGENCY SILENT \
+        	--ATTRIBUTE_TO_CLEAR FT \
+        	--ATTRIBUTE_TO_CLEAR CO \
+        	--SORT_ORDER ${sort_order}
+    >>>
+
+    output {
+        File output_bam = "${base_name}.bam"
+    }
+
+    runtime {
+        docker: docker
+        disks: "local-disk " + sub(((size(input_bam,"GB")+1)*5),"\\..*","") + " HDD"
+        memory: "4 GB"
+        preemptible: preemptible_count
+    }
+}
+
--- a/gatk4-rna-germline-variant-calling.inputs.json
+++ b/gatk4-rna-germline-variant-calling.inputs.json
@ -0,0 +1,47 @@
+{
+  "##_COMMENT1": "Input",
+  "RNAseq.inputBam": "gs://gatk-test-data/rna_bam/NA12878_b37/NA12878.bam",
+  
+  "##_COMMENT2": "REFERENCE FILES",
+  "RNAseq.refFasta": "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.fasta",
+  "RNAseq.refFastaIndex": "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.fasta.fai",
+  "RNAseq.refDict": "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.dict",
+
+  "##_COMMENT4": "RESOURCE FILES",
+  "RNAseq.dbSnpVcf": "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.dbsnp138.vcf",
+  "RNAseq.dbSnpVcfIndex": "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.dbsnp138.vcf.idx",
+  "RNAseq.knownVcfs": [
+    "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Mills_and_1000G_gold_standard.indels.b37.sites.vcf",
+    "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.known_indels.vcf"
+  ],
+  "RNAseq.knownVcfsIndices": [
+    "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Mills_and_1000G_gold_standard.indels.b37.sites.vcf.idx",
+    "gs://broad-references/Homo_sapiens_assembly19_1000genomes_decoy/Homo_sapiens_assembly19_1000genomes_decoy.known_indels.vcf.idx"
+  ],
+  "RNAseq.annotationsGTF": "gs://gatk-test-data/intervals/star.gencode.v19.transcripts.patched_contigs.gtf",
+  
+  "##_COMMENT4": "DOCKERS",
+  "#RNAseq.gatk4_docker_override": "String? (optional)",
+  "#RNAseq.star_docker_override": "String? (optional)",
+  "#RNAseq.gitc_docker_override": "String? (optional)",
+  
+  "##_COMMENT5": "PATHS",
+  "#RNAseq.gatk_path_override": "/gatk/gatk",
+
+  "##_COMMENT6": "PREEMPTIBLES",
+  "##RNAseq.preemptible_tries": "(optional) Int?",
+  
+  "##_COMMENT7": "Misc",
+  "#RNAseq.StarAlign.num_threads": "(optional) Int?",
+  "#RNAseq.StarAlign.star_limitOutSJcollapsed": "(optional) Int?",
+  "RNAseq.StarAlign.additional_disk": "50",
+  "#RNAseq.StarAlign.star_mem_max_gb": "(optional) Int?",
+  "RNAseq.StarGenerateReferences.addtional_disk": 50,
+  "#RNAseq.StarGenerateReferences.num_threads": "(optional) Int?",
+  "#RNAseq.StarGenerateReferences.mem_gb": "(optional) Int?",
+  "#RNAseq.haplotypeScatterCount": "(optional) Int?",
+  "#RNAseq.use_gatk4_for_all_tools": "(optional) Boolean",
+  "#RNAseq.minConfidenceForVariantCalling": "(optional) Int?",
+  "#RNAseq.zippedStarReferences": "(optional) File?",
+  "#RNAseq.readLength": "(optional) Int?"
+}