-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhisat2build.py
31 lines (26 loc) · 1.63 KB
/
hisat2build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#hisat2build.py
#Russell O. Stewart
#7/27/2017
#Builds hisat2 indexes needed to run hisat2 mapping. These index files also
#incorporate haplotypes and SNPs.
#Make sure you have converted your dbSNP file into hisat's snp format using
#hisat2_extract_snps_haplotypes_UCSC.py (part of HISAT2) BEFORE running this!
#note: index files will appear in the directory in which you run this script!
#preferably, run this inside of the hisat2 directory itself
#if you plan to use SNP/haplotype files, make sure to use at least 200 Gb of
#memory! If not, 8 Gb will suffice.
#hisat2 is available from:
#https://github.com/infphilo/hisat2
import os
import subprocess
email = 'russells98@gmail.com'#email for openlava output
pathToHisat2 = '/Seibold/proj/Russell_playground/hisat2/hisat2-2.1.0'#where your hisat2 dierectory is stored
threads = 12#how many threads are available to run this on?
genomeFastaFile = '/data/reference/iGenomes/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa'#path to the fasta file
baseName = 'Homo_sapiens_index_with_SNP'#what do you want to name the index files?
snpFile = '/Seibold/proj/Russell_playground/hisat2/hisat2SNPFiles/SNP144_common.snp'#path to SNPs, in HISAT2's format, .snp
haplotypeFile = '/Seibold/proj/Russell_playground/hisat2/hisat2SNPFiles/SNP144_common.haplotype'#same but .haplotype for haplotype
indexBuildCommand = "bsub -u %s -R \"rusage[mem=250000]\" -m f01 '%s/hisat2-build' -f -p %d --snp %s --haplotype %s %s %s" % (email , pathToHisat2 , threads , snpFile , haplotypeFile , genomeFastaFile , baseName)
coreID = subprocess.check_output(indexBuildCommand , shell = True)
print indexBuildCommand
print coreID