#!/bin/bash #SBATCH -n 1 #SBATCH -N 1 #SBATCH -o fastqCountBarcodes-%A_%a.out #SBATCH -e fastqCountBarcodes-%A_%a.err #SBATCH --mem-per-cpu=8G #SBATCH --mail-type=END,FAIL ml anaconda3/4.1.1 unset PYTHONPATH source activate bclab # Given a FASTQ file, count barcodes. This script can be called by another script with 5 arguments or used directly as a SLURM wrapper. # Calling from another script if [ "$#" -eq 5 ] then prefix=$1 p1AndSpacer=$2 bcSize=$3 bcToCrsFile=$4 scriptPath=$5 # Calling from SLRUM elif [ ! -z "${SLURM_ARRAY_TASK_ID}" ] then argfile=$1 scriptPath=$2 line=$(sed "${SLURM_ARRAY_TASK_ID}q;d" "${argfile}") prefix=$(echo "${line}" | cut -f1) p1AndSpacer=$(echo "${line}" | cut -f2) bcSize=$(echo "${line}" | cut -f3) bcToCrsFile=$(echo "${line}" | cut -f4) # Else error else echo "Usage: fastqCountBarcodes.sh fastqPrefix p1AndSpacer bcSize bcToCrs.txt path/to/this/script/" echo "\t- fastqPrefix is both the prefix for a file called fastqPrefix.fastq and the prefix for output files." echo "\t- p1AndSpacer is the concatenation of the p1 adapter and the spacer before the barcode." echo "\t- bcSize is the expected barcode size." echo "\t- bcToCrs.txt is a file that contains barcodes in the first field and CRS identifiers in the second field." echo "\nOR" echo "sbatch --array=1-arraySize fastqCountBarcodes.sh argumentFile.txt path/to/this/script/" echo "\tWhere argumentFile.txt has 4 fields per line, corresponding to fastqPrefix, p1AndSpacer, bcSize, bcToCrs.txt" echo "\tWhen called this way, the script runs as a SLURM job array." exit 1 fi # Pull lines from the FASTQ that contain at least 30 consecutive letters in the set [ACGTN] ... grep -e "[ACGTN]\{30,\}" "${prefix}".fastq | # ...then get the ${bcSize} [ACGT] letters immediately after the p1 and spacer, i.e. the cBC... sed -n 's/.*'"${p1AndSpacer}"'\([ACGT]\{'"${bcSize}"'\}\).*/\1/p' | # ...sort and count the barcodes... sort | uniq -c | # ...reformat to get rid of leading spacer and change the delimiter to a tab sed -e 's/^ *//;s/ /\t/' > "${prefix}".tmp python3 "${scriptPath}"/fastq_count_barcodes.py "${prefix}" "${bcToCrsFile}" # Clean up rm "${prefix}".tmp