reditools: update and add features (#24370)

This PR does the following: - adds version corresponding to commit at 08/03/2020 - adds missing get_DE_events.py script - adds dependencies needed by get_DE_events.py - removes REDItoolDenovo.py.patch and python2to3.patch in favor of running 2to3 and reindent pre-build - add batch_sort.patch to handle differences in string/char handling betweeen python2 and python3 - adds a variant for the Nature Protocol - adds dependencies for the nature_protocol variant - added myself as maintainer This PR adds a new version of reditools from git.
author: Glenn Johnson <glenn-johnson@uiowa.edu> 2021-06-18 21:34:50 -0500
committer: GitHub <noreply@github.com> 2021-06-18 21:34:50 -0500
commit: 91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3 (patch)
tree: 2d4d46c34786faefe2f7fe1a41be21b9ee11e6d0 /var
parent: 383d4cc84c03b23df2bca49442d9904022576656 (diff)
download: spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.tar.gz
spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.tar.bz2
spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.tar.xz
spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.zip
5 files changed, 236 insertions, 944 deletions
diff --git a/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch b/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch
deleted file mode 100644
index a88358829c..0000000000
--- a/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- a/main/REDItoolDenovo.py	2020-07-16 17:35:46.008030346 -0500
-+++ b/main/REDItoolDenovo.py	2020-07-16 17:38:39.700035490 -0500
-@@ -768,7 +768,7 @@
- for j in ridxinfo.split('\n'): #MOD
- 	l=(j.strip()).split('\t')
- 	if l[0] in ['*', '']: continue  #MOD
-- 	if int(l[2])+int(l[3]) > 0: rrefs[l[0]]=int(l[1])
-+	if int(l[2])+int(l[3]) > 0: rrefs[l[0]]=int(l[1])
- frefs=[]
- fidxinfo=open(fastafile+'.fai')
- for j in fidxinfo:
diff --git a/var/spack/repos/builtin/packages/reditools/batch_sort.patch b/var/spack/repos/builtin/packages/reditools/batch_sort.patch
new file mode 100644
index 0000000000..a319d9619e
--- /dev/null
+++ b/var/spack/repos/builtin/packages/reditools/batch_sort.patch
@@ -0,0 +1,168 @@
+diff -ru a/accessory/GFFtoTabix.py b/accessory/GFFtoTabix.py
+--- a/accessory/GFFtoTabix.py	2021-05-19 15:28:02.000000000 -0500
++++ b/accessory/GFFtoTabix.py	2021-05-20 16:06:25.187316573 -0500
+@@ -102,7 +102,7 @@
+ 	chunks = []
+ 	xx=0
+ 	try:
+-		with open(input,'rb',64*1024) as input_file:
++		with open(input,'r',64*1024) as input_file:
+ 			input_iterator = iter(input_file)
+ 			for tempdir in cycle(tempdirs):
+ 				current_chunk2=[]
+@@ -119,13 +119,13 @@
+ 				xx+=len(current_chunk3)
+ 				if not current_chunk3: break
+ 				sys.stdout.write("Loaded and sorted %i lines.\n"%(xx))
+-				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024)
++				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024)
+ 				chunks.append(output_chunk)
+ 				output_chunk.writelines(current_chunk3)
+ 				output_chunk.flush()
+ 				output_chunk.seek(0)
+ 		sys.stdout.write("Merging from %i files.\n"%(len(chunks)))
+-		with open(output,'wb',64*1024) as output_file:
++		with open(output,'w',64*1024) as output_file:
+ 			output_file.writelines(merge(key, *chunks))
+ 	finally:
+ 		for chunk in chunks:
+@@ -150,4 +150,4 @@
+ sys.stdout.write("Tabix file saved on %s.\n" %(GFFfile))
+ sys.stdout.write("Indices saved on %s.tbi.\n" %(GFFfile))
+ script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
+-sys.stdout.write("Script time --> END: %s\n"%(script_time))
+\ No newline at end of file
++sys.stdout.write("Script time --> END: %s\n"%(script_time))
+diff -ru a/accessory/SortGFF.py b/accessory/SortGFF.py
+--- a/accessory/SortGFF.py	2021-05-19 15:28:02.000000000 -0500
++++ b/accessory/SortGFF.py	2021-05-20 16:06:01.023238792 -0500
+@@ -102,7 +102,7 @@
+ 	chunks = []
+ 	xx=0
+ 	try:
+-		with open(input,'rb',64*1024) as input_file:
++		with open(input,'r',64*1024) as input_file:
+ 			input_iterator = iter(input_file)
+ 			for tempdir in cycle(tempdirs):
+ 				current_chunk2=[]
+@@ -119,13 +119,13 @@
+ 				xx+=len(current_chunk3)
+ 				if not current_chunk3: break
+ 				sys.stdout.write("Loaded and sorted %i lines.\n"%(xx))
+-				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024)
++				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024)
+ 				chunks.append(output_chunk)
+ 				output_chunk.writelines(current_chunk3)
+ 				output_chunk.flush()
+ 				output_chunk.seek(0)
+ 		sys.stdout.write("Merging from %i files.\n"%(len(chunks)))
+-		with open(output,'wb',64*1024) as output_file:
++		with open(output,'w',64*1024) as output_file:
+ 			output_file.writelines(merge(key, *chunks))
+ 	finally:
+ 		for chunk in chunks:
+@@ -140,4 +140,4 @@
+ batch_sort(GFFfile,outfile,key_,buffer_size,tempdirs)
+ sys.stdout.write("Sorted GFF saved on %s\n"%(outfile))
+ script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
+-sys.stdout.write("Script time --> END: %s\n"%(script_time))
+\ No newline at end of file
++sys.stdout.write("Script time --> END: %s\n"%(script_time))
+diff -ru a/accessory/SortTable.py b/accessory/SortTable.py
+--- a/accessory/SortTable.py	2021-05-19 15:28:02.000000000 -0500
++++ b/accessory/SortTable.py	2021-05-20 16:05:35.857157751 -0500
+@@ -122,7 +122,7 @@
+ 	chunks = []
+ 	xx=0
+ 	try:
+-		with open(input,'rb',64*1024) as input_file:
++		with open(input,'r',64*1024) as input_file:
+ 			input_iterator = iter(input_file)
+ 			for tempdir in cycle(tempdirs):
+ 				current_chunk2=[]
+@@ -142,13 +142,13 @@
+ 				xx+=len(current_chunk3)
+ 				if not current_chunk3: break
+ 				sys.stdout.write("Loaded and sorted %i lines.\n"%(xx))
+-				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024)
++				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024)
+ 				chunks.append(output_chunk)
+ 				output_chunk.writelines(current_chunk3)
+ 				output_chunk.flush()
+ 				output_chunk.seek(0)
+ 		sys.stdout.write("Merging from %i files.\n"%(len(chunks)))
+-		with open(output,'wb',64*1024) as output_file:
++		with open(output,'w',64*1024) as output_file:
+ 			output_file.writelines(merge(key, *chunks))
+ 	finally:
+ 		for chunk in chunks:
+@@ -163,4 +163,4 @@
+ batch_sort(GFFfile,outfile,key_,buffer_size,tempdirs)
+ sys.stdout.write("Sorted GFF saved on %s\n"%(outfile))
+ script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
+-sys.stdout.write("Script time --> END: %s\n"%(script_time))
+\ No newline at end of file
++sys.stdout.write("Script time --> END: %s\n"%(script_time))
+diff -ru a/accessory/TableToGFF.py b/accessory/TableToGFF.py
+--- a/accessory/TableToGFF.py	2021-05-19 15:28:02.000000000 -0500
++++ b/accessory/TableToGFF.py	2021-05-20 16:05:11.309078667 -0500
+@@ -104,7 +104,7 @@
+ 	chunks = []
+ 	xx=0
+ 	try:
+-		with open(input,'rb',64*1024) as input_file:
++		with open(input,'r',64*1024) as input_file:
+ 			input_iterator = iter(input_file)
+ 			for tempdir in cycle(tempdirs):
+ 				current_chunk2=[]
+@@ -121,13 +121,13 @@
+ 				xx+=len(current_chunk3)
+ 				if not current_chunk3: break
+ 				sys.stdout.write("Loaded and sorted %i lines.\n"%(xx))
+-				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024)
++				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024)
+ 				chunks.append(output_chunk)
+ 				output_chunk.writelines(current_chunk3)
+ 				output_chunk.flush()
+ 				output_chunk.seek(0)
+ 		sys.stdout.write("Merging from %i files.\n"%(len(chunks)))
+-		with open(output,'wb',64*1024) as output_file:
++		with open(output,'w',64*1024) as output_file:
+ 			output_file.writelines(merge(key, *chunks))
+ 	finally:
+ 		for chunk in chunks:
+diff -ru a/accessory/tableToTabix.py b/accessory/tableToTabix.py
+--- a/accessory/tableToTabix.py	2021-05-19 15:28:02.000000000 -0500
++++ b/accessory/tableToTabix.py	2021-05-20 16:04:45.468995382 -0500
+@@ -117,7 +117,7 @@
+ 	chunks = []
+ 	xx=0
+ 	try:
+-		with open(input,'rb',64*1024) as input_file:
++		with open(input,'r',64*1024) as input_file:
+ 			input_iterator = iter(input_file)
+ 			for tempdir in cycle(tempdirs):
+ 				current_chunk2=[]
+@@ -136,13 +136,13 @@
+ 				xx+=len(current_chunk3)
+ 				if not current_chunk3: break
+ 				sys.stdout.write("Loaded and sorted %i lines.\n"%(xx))
+-				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024)
++				output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024)
+ 				chunks.append(output_chunk)
+ 				output_chunk.writelines(current_chunk3)
+ 				output_chunk.flush()
+ 				output_chunk.seek(0)
+ 		sys.stdout.write("Merging from %i files.\n"%(len(chunks)))
+-		with open(output,'wb',64*1024) as output_file:
++		with open(output,'w',64*1024) as output_file:
+ 			output_file.writelines(merge(key, *chunks))
+ 	finally:
+ 		for chunk in chunks:
+@@ -167,4 +167,4 @@
+ sys.stdout.write("Tabix file saved on %s.\n" %(GFFfile))
+ sys.stdout.write("Indices saved on %s.tbi.\n" %(GFFfile))
+ script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
+-sys.stdout.write("Script time --> END: %s\n"%(script_time))
+\ No newline at end of file
++sys.stdout.write("Script time --> END: %s\n"%(script_time))
diff --git a/var/spack/repos/builtin/packages/reditools/package.py b/var/spack/repos/builtin/packages/reditools/package.py
index 256ec5cb0f..64df6ffce8 100644
--- a/var/spack/repos/builtin/packages/reditools/package.py
+++ b/var/spack/repos/builtin/packages/reditools/package.py
@@ -19,14 +19,65 @@ class Reditools(PythonPackage):
     homepage = "https://github.com/BioinfoUNIBA/REDItools"
     git      = "https://github.com/BioinfoUNIBA/REDItools.git"
 
+    maintainers = ['glennpj']
+
+    version('1.3_2020-08-03', commit='2dc71277a25e667797c363d1fca22726249774a3')
     version('1.3_2020-03-20', commit='cf47f3d54f324aeb9650bcf8bfacf5a967762a55')
 
-    depends_on('py-pysam', type=('build', 'run'))
-    depends_on('py-fisher', type=('build', 'run'))
+    variant('nature_protocol', default=False,
+            description='Install the Nature Protocol scripts and files')
+
+    depends_on('py-reindent', type='build', when='^python@3:')
     depends_on('blat', type='run')
+    depends_on('py-fisher', type='run')
+    depends_on('py-numpy', type='run')
+    depends_on('py-pandas', type='run')
+    depends_on('py-pysam', type='run')
+    depends_on('py-scipy', type='run')
     depends_on('tabix', type='run')
 
-    patch('REDItoolDenovo.py.patch')
+    # Nature Protocol
+    depends_on('bcftools', type='run', when='+nature_protocol')
+    depends_on('bedtools2', type='run', when='+nature_protocol')
+    depends_on('bwa', type='run', when='+nature_protocol')
+    depends_on('bzip2', type='run', when='+nature_protocol')
+    depends_on('fastp', type='run', when='+nature_protocol')
+    depends_on('fastqc', type='run', when='+nature_protocol')
+    depends_on('git', type='run', when='+nature_protocol')
+    depends_on('gmap-gsnap', type='run', when='+nature_protocol')
+    depends_on('htslib', type='run', when='+nature_protocol')
+    depends_on('libdeflate', type='run', when='+nature_protocol')
+    depends_on('py-bx-python', type='run', when='+nature_protocol')
+    depends_on('py-rseqc', type='run', when='+nature_protocol')
+    depends_on('samtools', type='run', when='+nature_protocol')
+    depends_on('star', type='run', when='+nature_protocol')
+    depends_on('wget', type='run', when='+nature_protocol')
+
     patch('interpreter.patch')
     patch('setup.py.patch')
-    patch('python2to3.patch', when='^python@3:')
+    patch('batch_sort.patch', when='^python@3:')
+
+    @run_before('build')
+    def p2_to_p3(self):
+        if '^python@3:' in self.spec:
+            # clean up space/tab mixing
+            reindent = which('reindent')
+            reindent('--nobackup', '--recurse', '.')
+
+            # convert to be python3 compatible
+            p2_to_p3 = which('2to3')
+            p2_to_p3('--nobackups', '--write', '.')
+
+    @run_after('install')
+    def nature_protocol(self):
+        if '+nature_protocol' in self.spec:
+            mkdirp(prefix.NPfiles)
+            install_tree('NPfiles', prefix.NPfiles)
+
+            ignore_files = [
+                'conda_pckg_installer_docker.py',
+                'conda_pckgs_installer.py',
+                'download-prepare-data-NP_docker.py',
+            ]
+            docker_conda = lambda p: p in ignore_files
+            install_tree('NPscripts', prefix.bin, ignore=docker_conda)
diff --git a/var/spack/repos/builtin/packages/reditools/python2to3.patch b/var/spack/repos/builtin/packages/reditools/python2to3.patch
deleted file mode 100644
index 94937d175d..0000000000
--- a/var/spack/repos/builtin/packages/reditools/python2to3.patch
+++ /dev/null
@@ -1,926 +0,0 @@
-diff -ru a/accessory/AnnotateTable.py b/accessory/AnnotateTable.py
---- a/accessory/AnnotateTable.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/AnnotateTable.py	2020-07-16 16:17:22.159013630 -0500
-@@ -28,7 +28,7 @@
- sys.stderr.write('Pysam version used: %s\n' %(pysamVersion))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python AnnotateTable.py [options]
- Options:
- -a		Sorted Annotation file
-@@ -44,12 +44,12 @@
- -C		Columns with base distribution [7,12] (in combination with -S)
- -o		Save lines to a file
- -h		Print this help
--"""
-+""")
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], 'i:a:o:hs:c:n:SC:uk:r:',["help"])
--except getopt.GetoptError, err:
--	print str(err) 
-+except getopt.GetoptError as err:
-+	print(str(err)) 
- 	usage()
- 	sys.exit()
- 
-@@ -101,7 +101,7 @@
- 	a={'A':'T','T':'A','C':'G','G':'C'}
- 	ss=''
- 	for i in s.upper():
--		if a.has_key(i): ss+=a[i]
-+		if i in a: ss+=a[i]
- 		elif i==' ': ss+=' '
- 		elif i=='-': ss+='-'
- 		else: ss+='N'
-@@ -125,23 +125,23 @@
- 	anns='+'
- 	for i in res:
- 		if i[3]=='+':
--			if d['+'].has_key(i[1]):
-+			if i[1] in d['+']:
- 				if i[0] not in d['+'][i[1]][0]: d['+'][i[1]][0]=d['+'][i[1]][0]+','+i[0]
- 				if i[2]+'-'+i[0] not in d['+'][i[1]][1]: d['+'][i[1]][1]=d['+'][i[1]][1]+','+i[2]+'-'+i[0]
- 			else:
- 				d['+'][i[1]]=[i[0],i[2]+'-'+i[0]]
- 		elif i[3]=='-':
--			if d['-'].has_key(i[1]):
-+			if i[1] in d['-']:
- 				if i[0] not in d['-'][i[1]][0]: d['-'][i[1]][0]=d['-'][i[1]][0]+','+i[0]
- 				if i[2]+'-'+i[0] not in d['-'][i[1]][1]: d['-'][i[1]][1]=d['-'][i[1]][1]+','+i[2]+'-'+i[0]
- 			else:
- 				d['-'][i[1]]=[i[0],i[2]+'-'+i[0]]
--	gip='$'.join(d['+'].keys())
--	featp='$'.join([d['+'][x][0] for x in d['+'].keys()])
--	tip='$'.join([d['+'][x][1] for x in d['+'].keys()])
--	gim='$'.join(d['-'].keys())
--	featm='$'.join([d['-'][x][0] for x in d['-'].keys()])
--	tim='$'.join([d['-'][x][1] for x in d['-'].keys()])
-+	gip='$'.join(list(d['+'].keys()))
-+	featp='$'.join([d['+'][x][0] for x in list(d['+'].keys())])
-+	tip='$'.join([d['+'][x][1] for x in list(d['+'].keys())])
-+	gim='$'.join(list(d['-'].keys()))
-+	featm='$'.join([d['-'][x][0] for x in list(d['-'].keys())])
-+	tim='$'.join([d['-'][x][1] for x in list(d['-'].keys())])
- 	p=[featp,gip,tip]
- 	m=[featm,gim,tim]
- 	pm=[(featp+'&'+featm).strip('&'),(gip+'&'+gim).strip('&'),(tip+'&'+tim).strip('&')]
-@@ -187,7 +187,7 @@
- 		elif strand=='-': res=ann[1]
- 		else: res=ann[2]		
- 		for i in addc:
--			print prinfo[i]+ res[i]
-+			print(prinfo[i]+ res[i])
- 	except: sys.exit('Error: not correct position.')
- 	
- if af:
-@@ -200,7 +200,7 @@
- 			h=[i.strip()]
- 			for k in addc: h.append(hinfo[k])
- 			if save: o.write('\t'.join(h)+'\n')
--			else: print '\t'.join(h)
-+			else: print('\t'.join(h))
- 			continue
- 		if i.startswith(skip): continue
- 		l=(i.strip()).split('\t')
-@@ -230,7 +230,7 @@
- 		else: res=ann[2]
- 		for j in addc: l.append(res[j])
- 		if save: o.write('\t'.join(l)+'\n')
--		else: print '\t'.join(l)
-+		else: print('\t'.join(l))
- tabix.close()
- if save:
- 	o.close()
-diff -ru a/accessory/FilterTable.py b/accessory/FilterTable.py
---- a/accessory/FilterTable.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/FilterTable.py	2020-07-16 16:17:22.209013627 -0500
-@@ -30,7 +30,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python FilterTable.py [options]
- Options:
- -i		Table file
-@@ -43,12 +43,12 @@
- -p		Print simple statistics
- -h		Print this help
- 
--"""
-+""")
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], 'i:o:f:hs:F:S:Ep',["help"])
--except getopt.GetoptError, err:
--	print str(err) 
-+except getopt.GetoptError as err:
-+	print(str(err)) 
- 	usage()
- 	sys.exit()
- 
-diff -ru a/accessory/get_DE_events.py b/accessory/get_DE_events.py
---- a/accessory/get_DE_events.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/get_DE_events.py	2020-07-16 16:17:22.646013607 -0500
-@@ -104,7 +104,7 @@
- 	#WARNING those are np arrays.
- 
- 	for i in range(0,edit_level_table.shape[0]):
--	  print i  #keep track of progress
-+	  print(i)  #keep track of progress
- 	  disease_edit_row = edit_level_table.loc[i, disease_people]
- 	  control_edit_row = edit_level_table.loc[i, control_people]
- 	  disease_cov_row = cov_table.loc[i, disease_people]
-@@ -206,7 +206,7 @@
- 	#write the full_table to output
- 	full_table.to_csv(output_file, sep='\t', index=False)
- 
--	print "job completed\n"
-+	print("job completed\n")
- 
- 
- 
-@@ -225,12 +225,12 @@
- 
- def Sample_percentage(row):
- 	"""Percentage of samples from each type"""
--	percentage = (len(filter(lambda x: x!= '-', row))/float(len(row)))*100
-+	percentage = (len([x for x in row if x!= '-'])/float(len(row)))*100
- 	return round(percentage)
- 
- def Sample_count(row):
- 	"""Number of samples from each type"""
--	count = len(filter(lambda x: x!= '-', row))
-+	count = len([x for x in row if x!= '-'])
- 	return count 
- 
- def get_bh(pvalue,siglevel):
-@@ -269,7 +269,7 @@
- 	if(row_a[-1] != '-' and row_a[-1] != 0.0 and row_a[-1] <= 0.05):
- 		row =  row[0].split('_') + row[2:]
- 		row.insert(2, 'A.to.G')
--		print '\t'.join(map(str,row))
-+		print('\t'.join(map(str,row)))
- 
- def tuple_replace(i):
- 	if type(i) == tuple:
-@@ -292,11 +292,11 @@
- with open(samples_informations_file, 'r') as f:
-     for line in f:
-         if line.startswith('SRR'):
--            line = map(str.strip, line.split(','))
-+            line = list(map(str.strip, line.split(',')))
-             sample_informations.setdefault(line[0], line[1])
- 
- 
--cwd = filter(os.path.isdir, os.listdir(os.getcwd()))
-+cwd = list(filter(os.path.isdir, os.listdir(os.getcwd())))
- all_available_sites = []
- sample_edited_sites = {}
- for directory in cwd:
-@@ -306,7 +306,7 @@
-         with open(table,'r') as a:
-             for line in a:
-                 if line.startswith('chr'):
--                    s = map(str.strip, line.split("\t"))
-+                    s = list(map(str.strip, line.split("\t")))
- 		    if s[7] == 'AG':
- 	                site, freq, coverage = s[0] + "_" + s[1], s[8], s[4]
- 			freq_gnum_cov = '%s^%s^%s' %(s[8],eval(s[6])[2],s[4]) 
-@@ -314,14 +314,14 @@
- 			if (int(coverage) >= min_coverage) and (float(freq) >= min_edit_frequency):
-                 		sample_edited_sites.setdefault((directory, site), []).append((freq, freq_gnum_cov))
- 
--table_columns = map(lambda x: x + '_' + sample_informations[x], sorted(sample_informations.keys()))
-+table_columns = [x + '_' + sample_informations[x] for x in sorted(sample_informations.keys())]
- 
- disease = [i for i in table_columns if i.upper().find('DIS') != -1]
- controls = [i for i in table_columns if i.upper().find('CTRL') != -1]
- 
- if enable_linear_model:
- 	outtable=''
--        header = ['chromosome', 'position', 'type_editing'] + map(remove_underscore, controls) + map(remove_underscore, disease)
-+        header = ['chromosome', 'position', 'type_editing'] + list(map(remove_underscore, controls)) + list(map(remove_underscore, disease))
-         outtable += '\t'.join(header)
- 	outtable += '\n'
-         #print '\t'.join(header)
-@@ -329,13 +329,13 @@
-                 row = [chrom]
-                 for col in header[2:]:#header.index('[num_controls/num_disease]')]:
-                         row.append(sample_edited_sites.get((col.split('_')[0],chrom), ['-'])[0])
--                ctrls = zip(*(zip(controls,row[1:])))[1]
--                dss = zip(*(zip(disease,row[len(ctrls)+1:])))[1]
--                ctrls_freq = map(tuple_replace, ctrls)
--                dss_freq = map(tuple_replace, dss)
-+                ctrls = list(zip(*(list(zip(controls,row[1:])))))[1]
-+                dss = list(zip(*(list(zip(disease,row[len(ctrls)+1:])))))[1]
-+                ctrls_freq = list(map(tuple_replace, ctrls))
-+                dss_freq = list(map(tuple_replace, dss))
-                 row.append(str([Sample_count(ctrls), Sample_count(dss)]))
- 
--                row_b = map(tuple_replace_bis, row)
-+                row_b = list(map(tuple_replace_bis, row))
-                 row_b = row_b[0].split('_') + row_b[2:]
-                 row_b.insert(2, 'A.to.G')
- 		final_list = row_b[:-1]
-@@ -359,20 +359,20 @@
- 	if pvalue_correction == 2:
- 		header += ['pvalue BH corrected']
- 		
--	print '\t'.join(header)
-+	print('\t'.join(header))
- 	
- 	for chrom in sorted(all_available_sites, key = lambda x: Set_Chr_Nr(x)):
- 		row = [chrom]
- 		for col in header[3:header.index('[num_controls/num_disease]')]:
- 			row.append(sample_edited_sites.get((col.split('_')[0],chrom), ['-'])[0])
--		ctrls = zip(*(zip(controls,row[1:])))[1]
--		dss = zip(*(zip(disease,row[len(ctrls)+1:])))[1] 
--		ctrls_freq = map(tuple_replace, ctrls)
--		dss_freq = map(tuple_replace, dss)
-+		ctrls = list(zip(*(list(zip(controls,row[1:])))))[1]
-+		dss = list(zip(*(list(zip(disease,row[len(ctrls)+1:])))))[1] 
-+		ctrls_freq = list(map(tuple_replace, ctrls))
-+		dss_freq = list(map(tuple_replace, dss))
- 		row.append(str([Sample_count(ctrls), Sample_count(dss)]))
- 		if (Sample_percentage(ctrls) >= min_sample_testing) and (Sample_percentage(dss) >= min_sample_testing):
--			ctrls_mean = sum(map(float, filter(lambda x: x!= '-', ctrls_freq)))/len(filter(lambda x: x!= '-', ctrls_freq))
--	                dss_mean = sum(map(float, filter(lambda x: x!= '-', dss_freq)))/len(filter(lambda x : x!= '-', dss_freq))
-+			ctrls_mean = sum(map(float, [x for x in ctrls_freq if x!= '-']))/len([x for x in ctrls_freq if x!= '-'])
-+	                dss_mean = sum(map(float, [x for x in dss_freq if x!= '-']))/len([x for x in dss_freq if x!= '-'])
- 			delta_diff =  abs(ctrls_mean - dss_mean)
- 			pvalue=stats.mannwhitneyu(ctrls_freq, dss_freq, alternative='two-sided')
- 			row.append(round(delta_diff, 3))
-@@ -388,12 +388,12 @@
- 				row += ['-', '-']
- 			else:
- 				row += ['-', '-', '-']
--		row_a = map(tuple_replace, row)
--		row_b = map(tuple_replace_bis, row)
-+		row_a = list(map(tuple_replace, row))
-+		row_b = list(map(tuple_replace_bis, row))
- 		if pvalue_correction != 0 and only_significants == 'yes':
- 			only_sig(row_a,row_b)
- 		else:
- 			row_b =  row_b[0].split('_') + row_b[2:]
- 	                row_b.insert(2, 'A.to.G')
--			print '\t'.join(map(str,row_b))
-+			print('\t'.join(map(str,row_b)))
- 
-diff -ru a/accessory/GFFtoTabix.py b/accessory/GFFtoTabix.py
---- a/accessory/GFFtoTabix.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/GFFtoTabix.py	2020-07-16 16:17:22.264013625 -0500
-@@ -31,7 +31,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python GFFtoTabix.py [options]
- Options:
- -i		GFF file
-@@ -41,7 +41,7 @@
- -u		Save an uncompressed GFF copy (add _copy suffix)
- -h		Print this help
- 
--"""
-+""")
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:Sb:t:hu",["help"])
-@@ -49,7 +49,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/accessory/rediportal2recoding.py b/accessory/rediportal2recoding.py
---- a/accessory/rediportal2recoding.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/rediportal2recoding.py	2020-07-16 16:17:22.709013604 -0500
-@@ -41,4 +41,4 @@
- 				gff_row = line[0] + '\t'+ valore + '\t' + 'ed' + '\t' + line[1] + \
- 				'\t' + line[1] + '\t' + '.' + '\t' + line[4] + '\t' + '.' + '\t' + \
- 				'gene_id' + ' '  + '"ed_%s";' %(i) + ' ' + 'transcript_id' + ' ' + '"ed_%s";' %(i)
--				print gff_row
-+				print(gff_row)
-diff -ru a/accessory/SearchInTable.py b/accessory/SearchInTable.py
---- a/accessory/SearchInTable.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/SearchInTable.py	2020-07-16 16:17:22.309013623 -0500
-@@ -25,7 +25,7 @@
- #pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python SearchInTable.py [options]
- Options:
- -i		Sorted table file (first col=reference; second col=coordinate 1 based)
-@@ -42,7 +42,7 @@
- -o		Save found/not found positions on file
- -h		Print this help
- 
--"""
-+""")
- #-k		skip first INT lines [0]
- 
- try:
-@@ -51,7 +51,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/accessory/selectPositions.py b/accessory/selectPositions.py
---- a/accessory/selectPositions.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/selectPositions.py	2020-07-16 16:17:22.833013598 -0500
-@@ -25,7 +25,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python selectPositions.py [options]
- Options:
- -i		Table file from REDItools
-@@ -44,7 +44,7 @@
- -o		Save selected positions on outTable_%s
- -h		Print this help
- 
--"""%(pid)
-+"""%(pid))
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:c:C:v:s:f:F:euo:hrd:RV:",["help"])
-@@ -52,7 +52,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/accessory/SortGFF.py b/accessory/SortGFF.py
---- a/accessory/SortGFF.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/SortGFF.py	2020-07-16 16:17:22.357013620 -0500
-@@ -35,7 +35,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python SortGFF.py [options]
- Options:
- -i		GFF file
-@@ -44,7 +44,7 @@
- -t		Temporary directory to use (multiple -t may be used)
- -h		Print this help
- 
--"""%(pid)
-+"""%(pid))
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:o:b:t:h",["help"])
-@@ -52,7 +52,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/accessory/SortTable.py b/accessory/SortTable.py
---- a/accessory/SortTable.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/SortTable.py	2020-07-16 16:17:22.411013618 -0500
-@@ -35,7 +35,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python SortTable.py [options]
- Options:
- -i		Table file
-@@ -50,7 +50,7 @@
- -t		Temporary directory to use (multiple -t may be used)
- -h		Print this help
- 
--"""%(pid)
-+"""%(pid))
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:o:b:t:hd:s:c:e:m:O",["help"])
-@@ -58,7 +58,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/accessory/subCount2.py b/accessory/subCount2.py
---- a/accessory/subCount2.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/subCount2.py	2020-07-16 16:17:22.854013597 -0500
-@@ -28,5 +28,5 @@
- for i in s:
- 	try: v=(s[i]/float(all))*100
- 	except: v=0.0
--	print i,s[i],all,v
-+	print(i,s[i],all,v)
- 
-diff -ru a/accessory/subCount.py b/accessory/subCount.py
---- a/accessory/subCount.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/subCount.py	2020-07-16 16:17:22.844013597 -0500
-@@ -30,5 +30,5 @@
- for i in s:
- 	try: v=(s[i]/float(all))*100
- 	except: v=0.0
--	print i,s[i],all,v
-+	print(i,s[i],all,v)
- 
-diff -ru a/accessory/TableToGFF.py b/accessory/TableToGFF.py
---- a/accessory/TableToGFF.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/TableToGFF.py	2020-07-16 16:17:22.474013615 -0500
-@@ -29,7 +29,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python TableToGFF.py [options]
- Options:
- -i		Table file from REDItools
-@@ -40,7 +40,7 @@
- -o		Outfile [outTable_%s.gff]
- -h		Print this help
- 
--"""%(pid)
-+"""%(pid))
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:o:sthT:b:",["help"])
-@@ -48,7 +48,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/accessory/tableToTabix.py b/accessory/tableToTabix.py
---- a/accessory/tableToTabix.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/accessory/tableToTabix.py	2020-07-16 16:17:22.913013594 -0500
-@@ -31,7 +31,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python tableToTabix.py [options]
- Options:
- -i		TAB-delimited file
-@@ -46,7 +46,7 @@
- -u		Save an uncompressed GFF copy (add _copy suffix)
- -h		Print this help
- 
--"""
-+""")
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:Sb:t:hus:c:e:m:0",["help"])
-@@ -54,7 +54,7 @@
- 		usage()
- 		sys.exit(2)
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-diff -ru a/main/REDItoolDenovo.py b/main/REDItoolDenovo.py
---- a/main/REDItoolDenovo.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/main/REDItoolDenovo.py	2020-07-16 16:17:21.265013672 -0500
-@@ -75,7 +75,7 @@
-         return 1
-     numerator = 1
-     denominator = 1
--    for i in xrange(s+1, population + 1):
-+    for i in range(s+1, population + 1):
-         numerator *= i
-         denominator *= (i - s)
-     return numerator/denominator
-@@ -275,7 +275,7 @@
- try: import pysam
- except: sys.exit('Pysam module not found.')
- from multiprocessing import Process, Queue
--from Queue import Empty
-+from queue import Empty
- try:
-     from fisher import pvalue
-     exfisher=1
-@@ -292,7 +292,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python REDItoolDenovo.py [options]
- Options:
- -i		BAM file
-@@ -343,12 +343,12 @@
- 	- For Tophat2 use 50
- 	- For GSNAP use 30
- 
--"""%(pid)
-+"""%(pid))
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "b:f:k:t:o:c:q:m:O:s:edpuT:B:v:n:EP:r:hHa:i:lIU:V:w:XG:K:F:g:x:W")
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-@@ -474,7 +474,7 @@
- 		annfile=a
- 		uann=1
- 	else:
--		print o
-+		print(o)
- 		assert False, "Unhandled Option"
- 
- #######
-@@ -532,7 +532,7 @@
- 		return False
- 	try:
- 		os.kill(pid, 0)
--	except OSError, e:
-+	except OSError as e:
- 		return e.errno == errno.EPERM
- 	else:
- 		return True
-@@ -637,9 +637,9 @@
- 	subs=[]
- 	subv=[]
- 	for i in seq.upper():
--		if b.has_key(i): b[i]+=1
-+		if i in b: b[i]+=1
- 	for i in b:
--		if not b.has_key(ref): continue
-+		if ref not in b: continue
- 		if b[i]!=0 and i!=ref:
- 			vv=float(b[i])/(b[i]+b[ref])
- 			subv.append((b[i],vv,ref+i))
-@@ -690,7 +690,7 @@
- 	a={'A':'T','T':'A','C':'G','G':'C'}
- 	ss=''
- 	for i in s.upper():
--		if a.has_key(i): ss+=a[i]
-+		if i in a: ss+=a[i]
- 		else: ss+='N'
- 	return ss	
- 
-@@ -777,7 +777,7 @@
- 	frefs.append(l[0])
- fidxinfo.close()
- rnof=[]
--for i in rrefs.keys():
-+for i in list(rrefs.keys()):
- 	if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i))
- #####################		
- 
-@@ -817,8 +817,8 @@
- #mainbam=pysam.Samfile(bamfile,"rb")
- #regions=mainbam.references
- #mainbam.close()
--dicregions=dict(rrefs.items())
--chrs=[x for x in dicregions.keys() if x not in nochrs]
-+dicregions=dict(list(rrefs.items()))
-+chrs=[x for x in list(dicregions.keys()) if x not in nochrs]
- sys.stderr.write('Analysis on %i regions.\n' %(len(chrs)))
- 
- if infolder!='': outfolder=os.path.join(outfolder_,'denovo_%s_%s' %(infolder,pid))
-@@ -906,7 +906,7 @@
- 			if pileupread.alignment.has_tag('SA'): continue			
- 			#s,q,t,qq=pileupread.alignment.seq[pileupread.qpos].upper(),ord(pileupread.alignment.qual[pileupread.qpos])-QVAL,'*',pileupread.alignment.qual[pileupread.qpos]
- 			# escludi posizioni introniche nei pressi di splice sites
--			if exss and di.has_key(pileupcolumn.reference_pos+1): continue #MOD
-+			if exss and pileupcolumn.reference_pos+1 in di: continue #MOD
- 			# multiple hit
- 			if exh: #MOD
- 				if pileupread.alignment.is_secondary: continue #MOD
-@@ -998,7 +998,7 @@
- 					elif pileupread.alignment.is_read2: rt=2
- 					else: rt=0
- 					rname=pileupread.alignment.query_name+'_%i'%(rt) #MOD
--					if d.has_key(rname): blatc+='0' #continue
-+					if rname in d: blatc+='0' #continue
- 					else: blatc+='1'					
- 				# se la base e' diversa dal reference
- 				# se in regione omopolimerica scarta
-@@ -1032,7 +1032,7 @@
- 			if not custsub:
- 				ni='ACGT'
- 				for b in range(4):
--					if dsubs.has_key(ref.upper()+ni[b]):
-+					if ref.upper()+ni[b] in dsubs:
- 						dsubs[ref.upper()+ni[b]]+=bcomp[b]
- 			if expos:
- 				if chr in extabix.contigs:
-diff -ru a/main/REDItoolDnaRna.py b/main/REDItoolDnaRna.py
---- a/main/REDItoolDnaRna.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/main/REDItoolDnaRna.py	2020-07-16 16:17:21.757013649 -0500
-@@ -23,7 +23,7 @@
- try: import pysam
- except: sys.exit('Pysam module not found.')
- from multiprocessing import Process, Queue
--from Queue import Empty
-+from queue import Empty
- import gzip
- 
- pysamVersion=pysam.__version__
-@@ -34,7 +34,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python REDItoolDnaRNA.py [options]
- Options:
- -i		RNA-Seq BAM file
-@@ -103,7 +103,7 @@
- 	- For Tophat2 use 50
- 	- For GSNAP use 30
- 
--"""%(pid)
-+"""%(pid))
- 
- #option --fastq	Fastq to get reads [requires --reads], separated by comma [if paired] NOT IMPLEMENTED
- #option --rmOver	Remove overlapping reads NOT IMPLEMENTED
-@@ -111,7 +111,7 @@
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:f:k:t:o:c:q:m:O:s:edpuA:a:B:b:lLv:n:EPr:hHIXG:K:j:C:JDUzw:N:ZW:RVMT:F:x:g:SY:",["help","gzip","reads","addP","rmIndels"])
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-@@ -325,7 +325,7 @@
- params.append('Analysis ID: %s\n' %(pid))
- params.append('Analysis time: %s\n' %(script_time))
- params.append('-i --> RNA-Seq BAM file: %s\n' %(bamfile))
--params.append('-j --> DNA-Seq BAM file(s): %s\n' %(','.join(dgbamfile.keys())))
-+params.append('-j --> DNA-Seq BAM file(s): %s\n' %(','.join(list(dgbamfile.keys()))))
- params.append('-I --> Sort RNA-Seq BAM file: %i\n' %(sortbam))
- params.append('-J --> Sort DNA-Seq BAM file: %i\n' %(sortgbam))
- params.append('-f --> Reference file: %s\n' %(fastafile))
-@@ -483,9 +483,9 @@
- 	subs=[]
- 	subv=[]
- 	for i in seq.upper():
--		if b.has_key(i): b[i]+=1
-+		if i in b: b[i]+=1
- 	for i in b:
--		if not b.has_key(ref): continue
-+		if ref not in b: continue
- 		if b[i]!=0 and i!=ref:
- 			vv=float(b[i])/(b[i]+b[ref])
- 			subv.append((b[i],vv,ref+i))
-@@ -537,7 +537,7 @@
- 	a={'A':'T','T':'A','C':'G','G':'C'}
- 	ss=''
- 	for i in s.upper():
--		if a.has_key(i): ss+=a[i]
-+		if i in a: ss+=a[i]
- 		elif i==' ': ss+=' '
- 		elif i=='-': ss+='-'
- 		else: ss+='N'
-@@ -682,7 +682,7 @@
- 		if l.count(i[0])==2:
- 			s='='
- 			if i[1]!=i[2]: s='!'
--			if us.has_key(i[0]): us[i[0]].append((x,s))
-+			if i[0] in us: us[i[0]].append((x,s))
- 			else: us[i[0]]=[(x,s)]
- 		x+=1
- 	for i in us:
-@@ -753,7 +753,7 @@
- 	pysam.faidx(fastafile)
- ###########################################################
- # Check reference for name consistency
--grefs=dgdic.keys()
-+grefs=list(dgdic.keys())
- rrefs={}
- ridxinfo=pysam.idxstats(bamfile)
- for j in ridxinfo.split('\n'): #MOD
-@@ -769,7 +769,7 @@
- fidxinfo.close()
- # in rna-seq
- rnof=[]
--for i in rrefs.keys():
-+for i in list(rrefs.keys()):
- 	if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i))
- if len(gbamfile)!=0:
- 	for i in grefs:
-@@ -833,9 +833,9 @@
- #regions=mainbam.references
- #regionslens=mainbam.lengths
- #mainbam.close()
--dicregions=dict(rrefs.items())
-+dicregions=dict(list(rrefs.items()))
- #dicregions=dict([(regions[x],regionslens[x]) for x in range(len(regions))])
--chrs=[x for x in dicregions.keys() if x not in nochrs]
-+chrs=[x for x in list(dicregions.keys()) if x not in nochrs]
- if fworkR: sys.stderr.write('Analysis on region %s:%i-%i.\n' %(workR[0],workR[1][0],workR[1][1]))
- else: sys.stderr.write('Analysis on %i regions.\n' %(len(chrs)))
- ###########################################################
-@@ -883,7 +883,7 @@
- 	isgbam=1
- 	inputs=myinput.split('$')
- 	chr,bamfile,start_region,lenregion,suff_=inputs[0],inputs[1],int(inputs[2]),int(inputs[3]),inputs[4]
--	if not dgdic.has_key(chr): isgbam=0
-+	if chr not in dgdic: isgbam=0
- 	outfile=os.path.join(outfolder,'table_%s'%(suff_))
- 	if slist:
- 		if gziptab: outrna=gzip.open(os.path.join(outfolder,'pileupRNA_%s.gz'%(suff)),'wb')
-@@ -1024,7 +1024,7 @@
- 							if pileupread.alignment.is_read1: rt=1
- 							elif pileupread.alignment.is_read2: rt=2
- 							rname=pileupread.alignment.query_name+'_%i'%(rt)
--							if gd.has_key(rname): gblatc+='0' #continue
-+							if rname in gd: gblatc+='0' #continue
- 							else: gblatc+='1'
- 						# se la base e' diversa dal reference
- 						# se in regione omopolimerica scarta
-@@ -1079,7 +1079,7 @@
- 				#s,q,t,qq=pileupread.alignment.query_sequence[pileupread.query_position].upper(),pileupread.alignment.query_qualities[pileupread.query_position],'*',pileupread.alignment.qual[pileupread.query_position]
- 				#s,q,t,qq=pileupread.alignment.seq[pileupread.qpos].upper(),ord(pileupread.alignment.qual[pileupread.qpos])-QVAL,'*',pileupread.alignment.qual[pileupread.qpos]
- 				# escludi posizioni introniche nei pressi di splice sites
--				if exss and di.has_key(pileupcolumn.reference_pos+1): continue
-+				if exss and pileupcolumn.reference_pos+1 in di: continue
- 				# multiple hit
- 				if exh:
- 					if pileupread.alignment.is_secondary: continue
-@@ -1179,7 +1179,7 @@
- 						elif pileupread.alignment.is_read2: rt=2
- 						else: rt=0
- 						rname=pileupread.alignment.query_name+'_%i'%(rt)
--						if d.has_key(rname): blatc+='0' #continue
-+						if rname in d: blatc+='0' #continue
- 						else: blatc+='1'
- 					# se la base e' diversa dal reference
- 					# se in regione omopolimerica scarta
-@@ -1213,7 +1213,7 @@
- 							else: addpos=(pileupread.alignment.query_name,'-',pileupread.alignment.reference_name,'-',pileupread.alignment.reference_start,pileupread.alignment.reference_end,0 , 0)
- 							rqname_comp=rqname+'$'+pileupread.alignment.reference_name+'$'+str(pileupcolumn.reference_pos+1)
- 							#addpos=(chr+'_'+str(pileupcolumn.reference_pos+1),pileupcolumn.reference_pos+1)
--							if not grdb.has_key(rqname):
-+							if rqname not in grdb:
- 								#print rqname reference_start
- 								outreads.write('>'+rqname_comp+'\n'+rseqname+'\n')
- 								#grdb[rqname]=[addpos]
-@@ -1221,7 +1221,7 @@
- 							#	if addpos not in grdb[rqname]:
- 							#		grdb[rqname].append(addpos)
- 							if addP:
--								if not grdb2.has_key(rname): grdb2[rname]=addpos
-+								if rname not in grdb2: grdb2[rname]=addpos
- 			if seq.strip()!='':
- 				#print seq,qual,squal
- 				if rmIndel:
-@@ -1268,7 +1268,7 @@
- 				if exinv and subs=='-': continue
- 				if not checkSubs(subs): continue
- 				#print out rna-seq info + dna-seq
--				if gdic.has_key(pileupcolumn.reference_pos): # abbiamo l'informazione genomica
-+				if pileupcolumn.reference_pos in gdic: # abbiamo l'informazione genomica
- 					if exnonh and not gdic[pileupcolumn.reference_pos][1]: continue
- 					if mystrand=='0':
- 						gdic[pileupcolumn.reference_pos][0][2]=comp2(eval(gdic[pileupcolumn.reference_pos][0][2]))
-diff -ru a/main/REDItoolKnown.py b/main/REDItoolKnown.py
---- a/main/REDItoolKnown.py	2020-07-16 16:16:59.360014702 -0500
-+++ b/main/REDItoolKnown.py	2020-07-16 16:17:22.063013634 -0500
-@@ -23,7 +23,7 @@
- try: import pysam
- except: sys.exit('Pysam module not found.')
- from multiprocessing import Process, Queue
--from Queue import Empty
-+from queue import Empty
- 
- pysamVersion=pysam.__version__
- 
-@@ -34,7 +34,7 @@
- pid=str(os.getpid()+random.randint(0,999999999))
- 
- def usage():
--	print """
-+	print("""
- USAGE: python REDItoolKnown.py [options]
- Options:
- -i		BAM file
-@@ -82,12 +82,12 @@
- 	- For Tophat2 use 50
- 	- For GSNAP use 30
- 
--"""%(pid)
-+"""%(pid))
- 
- try:
- 	opts, args = getopt.getopt(sys.argv[1:], "i:f:k:t:o:c:q:m:O:s:edpuT:B:Sv:n:EP:r:hHIXG:K:l:C:F:x:g:U")
- except getopt.GetoptError as err:
--	print str(err) # will print something like "option -a not recognized"
-+	print(str(err)) # will print something like "option -a not recognized"
- 	usage()
- 	sys.exit(2)
- 
-@@ -257,7 +257,7 @@
-         return False
-     try:
-         os.kill(pid, 0)
--    except OSError, e:
-+    except OSError as e:
-         return e.errno == errno.EPERM
-     else:
-         return True
-@@ -361,9 +361,9 @@
- 	subs=[]
- 	subv=[]
- 	for i in seq.upper():
--		if b.has_key(i): b[i]+=1
-+		if i in b: b[i]+=1
- 	for i in b:
--		if not b.has_key(ref): continue
-+		if ref not in b: continue
- 		if b[i]!=0 and i!=ref:
- 			vv=float(b[i])/(b[i]+b[ref])
- 			subv.append((b[i],vv,ref+i))
-@@ -414,7 +414,7 @@
- 	a={'A':'T','T':'A','C':'G','G':'C'}
- 	ss=''
- 	for i in s.upper():
--		if a.has_key(i): ss+=a[i]
-+		if i in a: ss+=a[i]
- 		else: ss+='N'
- 	return ss	
- 
-@@ -524,7 +524,7 @@
- fidxinfo.close()
- # in rna-seq
- rnof=[]
--for i in rrefs.keys():
-+for i in list(rrefs.keys()):
- 	if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i))
- ##################################
- 
-@@ -568,8 +568,8 @@
- #mainbam.close()
- #dicregions=dict([(regions[x],regionslens[x]) for x in range(len(regions))])
- #chrs=[x for x in regions if x not in nochrs]
--dicregions=dict(rrefs.items())
--chrs=[x for x in dicregions.keys() if x not in nochrs]
-+dicregions=dict(list(rrefs.items()))
-+chrs=[x for x in list(dicregions.keys()) if x not in nochrs]
- sys.stderr.write('Analysis on %i regions.\n' %(len(chrs)))
- 
- if infolder!='': outfolder=os.path.join(outfolder_,'known_%s_%s' %(infolder,pid))
-@@ -654,7 +654,7 @@
- 			# else explore bam to find exact positions
- 			for pileupcolumn in bam.pileup(chr,startk,endk,stepper='nofilter', max_depth=MAX_DEPTH):
- 				if not startk<=pileupcolumn.reference_pos<=endk: continue
--				if not kdic.has_key(pileupcolumn.reference_pos+1): continue
-+				if pileupcolumn.reference_pos+1 not in kdic: continue
- 				ref=fasta.fetch(chr,pileupcolumn.reference_pos,pileupcolumn.reference_pos+1).upper()
- 				seq,qual,strand,squal,blatc='',0,'',[],''
- 				if rmsh:
-@@ -668,7 +668,7 @@
- 					if pileupread.alignment.is_supplementary: continue
- 					if pileupread.alignment.has_tag('SA'): continue
- 					# escludi posizioni introniche nei pressi di splice sites
--					if exss and di.has_key(pileupcolumn.reference_pos+1): continue
-+					if exss and pileupcolumn.reference_pos+1 in di: continue
- 					# multiple hit
- 					if exh:
- 						if pileupread.alignment.is_secondary: continue
-@@ -754,7 +754,7 @@
- 							elif pileupread.alignment.is_read2: rt=2
- 							else: rt=0
- 							rname=pileupread.alignment.query_name+'_%i'%(rt)
--							if d.has_key(rname): blatc+='0' #continue
-+							if rname in d: blatc+='0' #continue
- 							else: blatc+='1'
- 						# se la base e' diversa dal reference
- 						# se in regione omopolimerica scarta
diff --git a/var/spack/repos/builtin/packages/reditools/setup.py.patch b/var/spack/repos/builtin/packages/reditools/setup.py.patch
index f65345ad92..bde2bb579c 100644
--- a/var/spack/repos/builtin/packages/reditools/setup.py.patch
+++ b/var/spack/repos/builtin/packages/reditools/setup.py.patch
@@ -1,6 +1,16 @@
---- a/setup.py	2020-07-16 14:01:48.601449013 -0500
-+++ b/setup.py	2020-07-16 14:02:31.547441668 -0500
-@@ -33,7 +33,7 @@
+--- a/setup.py	2021-05-20 16:22:44.832444514 -0500
++++ b/setup.py	2021-05-20 16:23:43.127629214 -0500
+@@ -23,7 +23,8 @@
+ 	'accessory/readPsl.py',
+ 	'accessory/subCount.py',
+ 	'accessory/subCount2.py',
+-	'accessory/rediportal2recoding.py'
++	'accessory/rediportal2recoding.py',
++	'accessory/get_DE_events.py'
+ 	],
+ 	license='LICENSE.txt',
+ 	classifiers=[
+@@ -33,7 +34,7 @@
            'Operating System :: POSIX',
            'Programming Language :: Python',
            ],
author	Glenn Johnson <glenn-johnson@uiowa.edu>	2021-06-18 21:34:50 -0500
committer	GitHub <noreply@github.com>	2021-06-18 21:34:50 -0500
commit	91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3 (patch)
tree	2d4d46c34786faefe2f7fe1a41be21b9ee11e6d0 /var
parent	383d4cc84c03b23df2bca49442d9904022576656 (diff)
download	spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.tar.gz spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.tar.bz2 spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.tar.xz spack-91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3.zip