From 91ef60eb0ecc090c272d77bf1c67cae9e5f4bee3 Mon Sep 17 00:00:00 2001 From: Glenn Johnson Date: Fri, 18 Jun 2021 21:34:50 -0500 Subject: reditools: update and add features (#24370) This PR does the following: - adds version corresponding to commit at 08/03/2020 - adds missing get_DE_events.py script - adds dependencies needed by get_DE_events.py - removes REDItoolDenovo.py.patch and python2to3.patch in favor of running 2to3 and reindent pre-build - add batch_sort.patch to handle differences in string/char handling betweeen python2 and python3 - adds a variant for the Nature Protocol - adds dependencies for the nature_protocol variant - added myself as maintainer This PR adds a new version of reditools from git. --- .../packages/reditools/REDItoolDenovo.py.patch | 11 - .../builtin/packages/reditools/batch_sort.patch | 168 ++++ .../repos/builtin/packages/reditools/package.py | 59 +- .../builtin/packages/reditools/python2to3.patch | 926 --------------------- .../builtin/packages/reditools/setup.py.patch | 16 +- 5 files changed, 236 insertions(+), 944 deletions(-) delete mode 100644 var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch create mode 100644 var/spack/repos/builtin/packages/reditools/batch_sort.patch delete mode 100644 var/spack/repos/builtin/packages/reditools/python2to3.patch (limited to 'var') diff --git a/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch b/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch deleted file mode 100644 index a88358829c..0000000000 --- a/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- a/main/REDItoolDenovo.py 2020-07-16 17:35:46.008030346 -0500 -+++ b/main/REDItoolDenovo.py 2020-07-16 17:38:39.700035490 -0500 -@@ -768,7 +768,7 @@ - for j in ridxinfo.split('\n'): #MOD - l=(j.strip()).split('\t') - if l[0] in ['*', '']: continue #MOD -- if int(l[2])+int(l[3]) > 0: rrefs[l[0]]=int(l[1]) -+ if int(l[2])+int(l[3]) > 0: rrefs[l[0]]=int(l[1]) - frefs=[] - fidxinfo=open(fastafile+'.fai') - for j in fidxinfo: diff --git a/var/spack/repos/builtin/packages/reditools/batch_sort.patch b/var/spack/repos/builtin/packages/reditools/batch_sort.patch new file mode 100644 index 0000000000..a319d9619e --- /dev/null +++ b/var/spack/repos/builtin/packages/reditools/batch_sort.patch @@ -0,0 +1,168 @@ +diff -ru a/accessory/GFFtoTabix.py b/accessory/GFFtoTabix.py +--- a/accessory/GFFtoTabix.py 2021-05-19 15:28:02.000000000 -0500 ++++ b/accessory/GFFtoTabix.py 2021-05-20 16:06:25.187316573 -0500 +@@ -102,7 +102,7 @@ + chunks = [] + xx=0 + try: +- with open(input,'rb',64*1024) as input_file: ++ with open(input,'r',64*1024) as input_file: + input_iterator = iter(input_file) + for tempdir in cycle(tempdirs): + current_chunk2=[] +@@ -119,13 +119,13 @@ + xx+=len(current_chunk3) + if not current_chunk3: break + sys.stdout.write("Loaded and sorted %i lines.\n"%(xx)) +- output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024) ++ output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024) + chunks.append(output_chunk) + output_chunk.writelines(current_chunk3) + output_chunk.flush() + output_chunk.seek(0) + sys.stdout.write("Merging from %i files.\n"%(len(chunks))) +- with open(output,'wb',64*1024) as output_file: ++ with open(output,'w',64*1024) as output_file: + output_file.writelines(merge(key, *chunks)) + finally: + for chunk in chunks: +@@ -150,4 +150,4 @@ + sys.stdout.write("Tabix file saved on %s.\n" %(GFFfile)) + sys.stdout.write("Indices saved on %s.tbi.\n" %(GFFfile)) + script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) +-sys.stdout.write("Script time --> END: %s\n"%(script_time)) +\ No newline at end of file ++sys.stdout.write("Script time --> END: %s\n"%(script_time)) +diff -ru a/accessory/SortGFF.py b/accessory/SortGFF.py +--- a/accessory/SortGFF.py 2021-05-19 15:28:02.000000000 -0500 ++++ b/accessory/SortGFF.py 2021-05-20 16:06:01.023238792 -0500 +@@ -102,7 +102,7 @@ + chunks = [] + xx=0 + try: +- with open(input,'rb',64*1024) as input_file: ++ with open(input,'r',64*1024) as input_file: + input_iterator = iter(input_file) + for tempdir in cycle(tempdirs): + current_chunk2=[] +@@ -119,13 +119,13 @@ + xx+=len(current_chunk3) + if not current_chunk3: break + sys.stdout.write("Loaded and sorted %i lines.\n"%(xx)) +- output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024) ++ output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024) + chunks.append(output_chunk) + output_chunk.writelines(current_chunk3) + output_chunk.flush() + output_chunk.seek(0) + sys.stdout.write("Merging from %i files.\n"%(len(chunks))) +- with open(output,'wb',64*1024) as output_file: ++ with open(output,'w',64*1024) as output_file: + output_file.writelines(merge(key, *chunks)) + finally: + for chunk in chunks: +@@ -140,4 +140,4 @@ + batch_sort(GFFfile,outfile,key_,buffer_size,tempdirs) + sys.stdout.write("Sorted GFF saved on %s\n"%(outfile)) + script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) +-sys.stdout.write("Script time --> END: %s\n"%(script_time)) +\ No newline at end of file ++sys.stdout.write("Script time --> END: %s\n"%(script_time)) +diff -ru a/accessory/SortTable.py b/accessory/SortTable.py +--- a/accessory/SortTable.py 2021-05-19 15:28:02.000000000 -0500 ++++ b/accessory/SortTable.py 2021-05-20 16:05:35.857157751 -0500 +@@ -122,7 +122,7 @@ + chunks = [] + xx=0 + try: +- with open(input,'rb',64*1024) as input_file: ++ with open(input,'r',64*1024) as input_file: + input_iterator = iter(input_file) + for tempdir in cycle(tempdirs): + current_chunk2=[] +@@ -142,13 +142,13 @@ + xx+=len(current_chunk3) + if not current_chunk3: break + sys.stdout.write("Loaded and sorted %i lines.\n"%(xx)) +- output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024) ++ output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024) + chunks.append(output_chunk) + output_chunk.writelines(current_chunk3) + output_chunk.flush() + output_chunk.seek(0) + sys.stdout.write("Merging from %i files.\n"%(len(chunks))) +- with open(output,'wb',64*1024) as output_file: ++ with open(output,'w',64*1024) as output_file: + output_file.writelines(merge(key, *chunks)) + finally: + for chunk in chunks: +@@ -163,4 +163,4 @@ + batch_sort(GFFfile,outfile,key_,buffer_size,tempdirs) + sys.stdout.write("Sorted GFF saved on %s\n"%(outfile)) + script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) +-sys.stdout.write("Script time --> END: %s\n"%(script_time)) +\ No newline at end of file ++sys.stdout.write("Script time --> END: %s\n"%(script_time)) +diff -ru a/accessory/TableToGFF.py b/accessory/TableToGFF.py +--- a/accessory/TableToGFF.py 2021-05-19 15:28:02.000000000 -0500 ++++ b/accessory/TableToGFF.py 2021-05-20 16:05:11.309078667 -0500 +@@ -104,7 +104,7 @@ + chunks = [] + xx=0 + try: +- with open(input,'rb',64*1024) as input_file: ++ with open(input,'r',64*1024) as input_file: + input_iterator = iter(input_file) + for tempdir in cycle(tempdirs): + current_chunk2=[] +@@ -121,13 +121,13 @@ + xx+=len(current_chunk3) + if not current_chunk3: break + sys.stdout.write("Loaded and sorted %i lines.\n"%(xx)) +- output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024) ++ output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024) + chunks.append(output_chunk) + output_chunk.writelines(current_chunk3) + output_chunk.flush() + output_chunk.seek(0) + sys.stdout.write("Merging from %i files.\n"%(len(chunks))) +- with open(output,'wb',64*1024) as output_file: ++ with open(output,'w',64*1024) as output_file: + output_file.writelines(merge(key, *chunks)) + finally: + for chunk in chunks: +diff -ru a/accessory/tableToTabix.py b/accessory/tableToTabix.py +--- a/accessory/tableToTabix.py 2021-05-19 15:28:02.000000000 -0500 ++++ b/accessory/tableToTabix.py 2021-05-20 16:04:45.468995382 -0500 +@@ -117,7 +117,7 @@ + chunks = [] + xx=0 + try: +- with open(input,'rb',64*1024) as input_file: ++ with open(input,'r',64*1024) as input_file: + input_iterator = iter(input_file) + for tempdir in cycle(tempdirs): + current_chunk2=[] +@@ -136,13 +136,13 @@ + xx+=len(current_chunk3) + if not current_chunk3: break + sys.stdout.write("Loaded and sorted %i lines.\n"%(xx)) +- output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+b',64*1024) ++ output_chunk = open(os.path.join(tempdir,'%06i_%s'%(len(chunks),pid)),'w+',64*1024) + chunks.append(output_chunk) + output_chunk.writelines(current_chunk3) + output_chunk.flush() + output_chunk.seek(0) + sys.stdout.write("Merging from %i files.\n"%(len(chunks))) +- with open(output,'wb',64*1024) as output_file: ++ with open(output,'w',64*1024) as output_file: + output_file.writelines(merge(key, *chunks)) + finally: + for chunk in chunks: +@@ -167,4 +167,4 @@ + sys.stdout.write("Tabix file saved on %s.\n" %(GFFfile)) + sys.stdout.write("Indices saved on %s.tbi.\n" %(GFFfile)) + script_time=time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) +-sys.stdout.write("Script time --> END: %s\n"%(script_time)) +\ No newline at end of file ++sys.stdout.write("Script time --> END: %s\n"%(script_time)) diff --git a/var/spack/repos/builtin/packages/reditools/package.py b/var/spack/repos/builtin/packages/reditools/package.py index 256ec5cb0f..64df6ffce8 100644 --- a/var/spack/repos/builtin/packages/reditools/package.py +++ b/var/spack/repos/builtin/packages/reditools/package.py @@ -19,14 +19,65 @@ class Reditools(PythonPackage): homepage = "https://github.com/BioinfoUNIBA/REDItools" git = "https://github.com/BioinfoUNIBA/REDItools.git" + maintainers = ['glennpj'] + + version('1.3_2020-08-03', commit='2dc71277a25e667797c363d1fca22726249774a3') version('1.3_2020-03-20', commit='cf47f3d54f324aeb9650bcf8bfacf5a967762a55') - depends_on('py-pysam', type=('build', 'run')) - depends_on('py-fisher', type=('build', 'run')) + variant('nature_protocol', default=False, + description='Install the Nature Protocol scripts and files') + + depends_on('py-reindent', type='build', when='^python@3:') depends_on('blat', type='run') + depends_on('py-fisher', type='run') + depends_on('py-numpy', type='run') + depends_on('py-pandas', type='run') + depends_on('py-pysam', type='run') + depends_on('py-scipy', type='run') depends_on('tabix', type='run') - patch('REDItoolDenovo.py.patch') + # Nature Protocol + depends_on('bcftools', type='run', when='+nature_protocol') + depends_on('bedtools2', type='run', when='+nature_protocol') + depends_on('bwa', type='run', when='+nature_protocol') + depends_on('bzip2', type='run', when='+nature_protocol') + depends_on('fastp', type='run', when='+nature_protocol') + depends_on('fastqc', type='run', when='+nature_protocol') + depends_on('git', type='run', when='+nature_protocol') + depends_on('gmap-gsnap', type='run', when='+nature_protocol') + depends_on('htslib', type='run', when='+nature_protocol') + depends_on('libdeflate', type='run', when='+nature_protocol') + depends_on('py-bx-python', type='run', when='+nature_protocol') + depends_on('py-rseqc', type='run', when='+nature_protocol') + depends_on('samtools', type='run', when='+nature_protocol') + depends_on('star', type='run', when='+nature_protocol') + depends_on('wget', type='run', when='+nature_protocol') + patch('interpreter.patch') patch('setup.py.patch') - patch('python2to3.patch', when='^python@3:') + patch('batch_sort.patch', when='^python@3:') + + @run_before('build') + def p2_to_p3(self): + if '^python@3:' in self.spec: + # clean up space/tab mixing + reindent = which('reindent') + reindent('--nobackup', '--recurse', '.') + + # convert to be python3 compatible + p2_to_p3 = which('2to3') + p2_to_p3('--nobackups', '--write', '.') + + @run_after('install') + def nature_protocol(self): + if '+nature_protocol' in self.spec: + mkdirp(prefix.NPfiles) + install_tree('NPfiles', prefix.NPfiles) + + ignore_files = [ + 'conda_pckg_installer_docker.py', + 'conda_pckgs_installer.py', + 'download-prepare-data-NP_docker.py', + ] + docker_conda = lambda p: p in ignore_files + install_tree('NPscripts', prefix.bin, ignore=docker_conda) diff --git a/var/spack/repos/builtin/packages/reditools/python2to3.patch b/var/spack/repos/builtin/packages/reditools/python2to3.patch deleted file mode 100644 index 94937d175d..0000000000 --- a/var/spack/repos/builtin/packages/reditools/python2to3.patch +++ /dev/null @@ -1,926 +0,0 @@ -diff -ru a/accessory/AnnotateTable.py b/accessory/AnnotateTable.py ---- a/accessory/AnnotateTable.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/AnnotateTable.py 2020-07-16 16:17:22.159013630 -0500 -@@ -28,7 +28,7 @@ - sys.stderr.write('Pysam version used: %s\n' %(pysamVersion)) - - def usage(): -- print """ -+ print(""" - USAGE: python AnnotateTable.py [options] - Options: - -a Sorted Annotation file -@@ -44,12 +44,12 @@ - -C Columns with base distribution [7,12] (in combination with -S) - -o Save lines to a file - -h Print this help --""" -+""") - - try: - opts, args = getopt.getopt(sys.argv[1:], 'i:a:o:hs:c:n:SC:uk:r:',["help"]) --except getopt.GetoptError, err: -- print str(err) -+except getopt.GetoptError as err: -+ print(str(err)) - usage() - sys.exit() - -@@ -101,7 +101,7 @@ - a={'A':'T','T':'A','C':'G','G':'C'} - ss='' - for i in s.upper(): -- if a.has_key(i): ss+=a[i] -+ if i in a: ss+=a[i] - elif i==' ': ss+=' ' - elif i=='-': ss+='-' - else: ss+='N' -@@ -125,23 +125,23 @@ - anns='+' - for i in res: - if i[3]=='+': -- if d['+'].has_key(i[1]): -+ if i[1] in d['+']: - if i[0] not in d['+'][i[1]][0]: d['+'][i[1]][0]=d['+'][i[1]][0]+','+i[0] - if i[2]+'-'+i[0] not in d['+'][i[1]][1]: d['+'][i[1]][1]=d['+'][i[1]][1]+','+i[2]+'-'+i[0] - else: - d['+'][i[1]]=[i[0],i[2]+'-'+i[0]] - elif i[3]=='-': -- if d['-'].has_key(i[1]): -+ if i[1] in d['-']: - if i[0] not in d['-'][i[1]][0]: d['-'][i[1]][0]=d['-'][i[1]][0]+','+i[0] - if i[2]+'-'+i[0] not in d['-'][i[1]][1]: d['-'][i[1]][1]=d['-'][i[1]][1]+','+i[2]+'-'+i[0] - else: - d['-'][i[1]]=[i[0],i[2]+'-'+i[0]] -- gip='$'.join(d['+'].keys()) -- featp='$'.join([d['+'][x][0] for x in d['+'].keys()]) -- tip='$'.join([d['+'][x][1] for x in d['+'].keys()]) -- gim='$'.join(d['-'].keys()) -- featm='$'.join([d['-'][x][0] for x in d['-'].keys()]) -- tim='$'.join([d['-'][x][1] for x in d['-'].keys()]) -+ gip='$'.join(list(d['+'].keys())) -+ featp='$'.join([d['+'][x][0] for x in list(d['+'].keys())]) -+ tip='$'.join([d['+'][x][1] for x in list(d['+'].keys())]) -+ gim='$'.join(list(d['-'].keys())) -+ featm='$'.join([d['-'][x][0] for x in list(d['-'].keys())]) -+ tim='$'.join([d['-'][x][1] for x in list(d['-'].keys())]) - p=[featp,gip,tip] - m=[featm,gim,tim] - pm=[(featp+'&'+featm).strip('&'),(gip+'&'+gim).strip('&'),(tip+'&'+tim).strip('&')] -@@ -187,7 +187,7 @@ - elif strand=='-': res=ann[1] - else: res=ann[2] - for i in addc: -- print prinfo[i]+ res[i] -+ print(prinfo[i]+ res[i]) - except: sys.exit('Error: not correct position.') - - if af: -@@ -200,7 +200,7 @@ - h=[i.strip()] - for k in addc: h.append(hinfo[k]) - if save: o.write('\t'.join(h)+'\n') -- else: print '\t'.join(h) -+ else: print('\t'.join(h)) - continue - if i.startswith(skip): continue - l=(i.strip()).split('\t') -@@ -230,7 +230,7 @@ - else: res=ann[2] - for j in addc: l.append(res[j]) - if save: o.write('\t'.join(l)+'\n') -- else: print '\t'.join(l) -+ else: print('\t'.join(l)) - tabix.close() - if save: - o.close() -diff -ru a/accessory/FilterTable.py b/accessory/FilterTable.py ---- a/accessory/FilterTable.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/FilterTable.py 2020-07-16 16:17:22.209013627 -0500 -@@ -30,7 +30,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python FilterTable.py [options] - Options: - -i Table file -@@ -43,12 +43,12 @@ - -p Print simple statistics - -h Print this help - --""" -+""") - - try: - opts, args = getopt.getopt(sys.argv[1:], 'i:o:f:hs:F:S:Ep',["help"]) --except getopt.GetoptError, err: -- print str(err) -+except getopt.GetoptError as err: -+ print(str(err)) - usage() - sys.exit() - -diff -ru a/accessory/get_DE_events.py b/accessory/get_DE_events.py ---- a/accessory/get_DE_events.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/get_DE_events.py 2020-07-16 16:17:22.646013607 -0500 -@@ -104,7 +104,7 @@ - #WARNING those are np arrays. - - for i in range(0,edit_level_table.shape[0]): -- print i #keep track of progress -+ print(i) #keep track of progress - disease_edit_row = edit_level_table.loc[i, disease_people] - control_edit_row = edit_level_table.loc[i, control_people] - disease_cov_row = cov_table.loc[i, disease_people] -@@ -206,7 +206,7 @@ - #write the full_table to output - full_table.to_csv(output_file, sep='\t', index=False) - -- print "job completed\n" -+ print("job completed\n") - - - -@@ -225,12 +225,12 @@ - - def Sample_percentage(row): - """Percentage of samples from each type""" -- percentage = (len(filter(lambda x: x!= '-', row))/float(len(row)))*100 -+ percentage = (len([x for x in row if x!= '-'])/float(len(row)))*100 - return round(percentage) - - def Sample_count(row): - """Number of samples from each type""" -- count = len(filter(lambda x: x!= '-', row)) -+ count = len([x for x in row if x!= '-']) - return count - - def get_bh(pvalue,siglevel): -@@ -269,7 +269,7 @@ - if(row_a[-1] != '-' and row_a[-1] != 0.0 and row_a[-1] <= 0.05): - row = row[0].split('_') + row[2:] - row.insert(2, 'A.to.G') -- print '\t'.join(map(str,row)) -+ print('\t'.join(map(str,row))) - - def tuple_replace(i): - if type(i) == tuple: -@@ -292,11 +292,11 @@ - with open(samples_informations_file, 'r') as f: - for line in f: - if line.startswith('SRR'): -- line = map(str.strip, line.split(',')) -+ line = list(map(str.strip, line.split(','))) - sample_informations.setdefault(line[0], line[1]) - - --cwd = filter(os.path.isdir, os.listdir(os.getcwd())) -+cwd = list(filter(os.path.isdir, os.listdir(os.getcwd()))) - all_available_sites = [] - sample_edited_sites = {} - for directory in cwd: -@@ -306,7 +306,7 @@ - with open(table,'r') as a: - for line in a: - if line.startswith('chr'): -- s = map(str.strip, line.split("\t")) -+ s = list(map(str.strip, line.split("\t"))) - if s[7] == 'AG': - site, freq, coverage = s[0] + "_" + s[1], s[8], s[4] - freq_gnum_cov = '%s^%s^%s' %(s[8],eval(s[6])[2],s[4]) -@@ -314,14 +314,14 @@ - if (int(coverage) >= min_coverage) and (float(freq) >= min_edit_frequency): - sample_edited_sites.setdefault((directory, site), []).append((freq, freq_gnum_cov)) - --table_columns = map(lambda x: x + '_' + sample_informations[x], sorted(sample_informations.keys())) -+table_columns = [x + '_' + sample_informations[x] for x in sorted(sample_informations.keys())] - - disease = [i for i in table_columns if i.upper().find('DIS') != -1] - controls = [i for i in table_columns if i.upper().find('CTRL') != -1] - - if enable_linear_model: - outtable='' -- header = ['chromosome', 'position', 'type_editing'] + map(remove_underscore, controls) + map(remove_underscore, disease) -+ header = ['chromosome', 'position', 'type_editing'] + list(map(remove_underscore, controls)) + list(map(remove_underscore, disease)) - outtable += '\t'.join(header) - outtable += '\n' - #print '\t'.join(header) -@@ -329,13 +329,13 @@ - row = [chrom] - for col in header[2:]:#header.index('[num_controls/num_disease]')]: - row.append(sample_edited_sites.get((col.split('_')[0],chrom), ['-'])[0]) -- ctrls = zip(*(zip(controls,row[1:])))[1] -- dss = zip(*(zip(disease,row[len(ctrls)+1:])))[1] -- ctrls_freq = map(tuple_replace, ctrls) -- dss_freq = map(tuple_replace, dss) -+ ctrls = list(zip(*(list(zip(controls,row[1:])))))[1] -+ dss = list(zip(*(list(zip(disease,row[len(ctrls)+1:])))))[1] -+ ctrls_freq = list(map(tuple_replace, ctrls)) -+ dss_freq = list(map(tuple_replace, dss)) - row.append(str([Sample_count(ctrls), Sample_count(dss)])) - -- row_b = map(tuple_replace_bis, row) -+ row_b = list(map(tuple_replace_bis, row)) - row_b = row_b[0].split('_') + row_b[2:] - row_b.insert(2, 'A.to.G') - final_list = row_b[:-1] -@@ -359,20 +359,20 @@ - if pvalue_correction == 2: - header += ['pvalue BH corrected'] - -- print '\t'.join(header) -+ print('\t'.join(header)) - - for chrom in sorted(all_available_sites, key = lambda x: Set_Chr_Nr(x)): - row = [chrom] - for col in header[3:header.index('[num_controls/num_disease]')]: - row.append(sample_edited_sites.get((col.split('_')[0],chrom), ['-'])[0]) -- ctrls = zip(*(zip(controls,row[1:])))[1] -- dss = zip(*(zip(disease,row[len(ctrls)+1:])))[1] -- ctrls_freq = map(tuple_replace, ctrls) -- dss_freq = map(tuple_replace, dss) -+ ctrls = list(zip(*(list(zip(controls,row[1:])))))[1] -+ dss = list(zip(*(list(zip(disease,row[len(ctrls)+1:])))))[1] -+ ctrls_freq = list(map(tuple_replace, ctrls)) -+ dss_freq = list(map(tuple_replace, dss)) - row.append(str([Sample_count(ctrls), Sample_count(dss)])) - if (Sample_percentage(ctrls) >= min_sample_testing) and (Sample_percentage(dss) >= min_sample_testing): -- ctrls_mean = sum(map(float, filter(lambda x: x!= '-', ctrls_freq)))/len(filter(lambda x: x!= '-', ctrls_freq)) -- dss_mean = sum(map(float, filter(lambda x: x!= '-', dss_freq)))/len(filter(lambda x : x!= '-', dss_freq)) -+ ctrls_mean = sum(map(float, [x for x in ctrls_freq if x!= '-']))/len([x for x in ctrls_freq if x!= '-']) -+ dss_mean = sum(map(float, [x for x in dss_freq if x!= '-']))/len([x for x in dss_freq if x!= '-']) - delta_diff = abs(ctrls_mean - dss_mean) - pvalue=stats.mannwhitneyu(ctrls_freq, dss_freq, alternative='two-sided') - row.append(round(delta_diff, 3)) -@@ -388,12 +388,12 @@ - row += ['-', '-'] - else: - row += ['-', '-', '-'] -- row_a = map(tuple_replace, row) -- row_b = map(tuple_replace_bis, row) -+ row_a = list(map(tuple_replace, row)) -+ row_b = list(map(tuple_replace_bis, row)) - if pvalue_correction != 0 and only_significants == 'yes': - only_sig(row_a,row_b) - else: - row_b = row_b[0].split('_') + row_b[2:] - row_b.insert(2, 'A.to.G') -- print '\t'.join(map(str,row_b)) -+ print('\t'.join(map(str,row_b))) - -diff -ru a/accessory/GFFtoTabix.py b/accessory/GFFtoTabix.py ---- a/accessory/GFFtoTabix.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/GFFtoTabix.py 2020-07-16 16:17:22.264013625 -0500 -@@ -31,7 +31,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python GFFtoTabix.py [options] - Options: - -i GFF file -@@ -41,7 +41,7 @@ - -u Save an uncompressed GFF copy (add _copy suffix) - -h Print this help - --""" -+""") - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:Sb:t:hu",["help"]) -@@ -49,7 +49,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/accessory/rediportal2recoding.py b/accessory/rediportal2recoding.py ---- a/accessory/rediportal2recoding.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/rediportal2recoding.py 2020-07-16 16:17:22.709013604 -0500 -@@ -41,4 +41,4 @@ - gff_row = line[0] + '\t'+ valore + '\t' + 'ed' + '\t' + line[1] + \ - '\t' + line[1] + '\t' + '.' + '\t' + line[4] + '\t' + '.' + '\t' + \ - 'gene_id' + ' ' + '"ed_%s";' %(i) + ' ' + 'transcript_id' + ' ' + '"ed_%s";' %(i) -- print gff_row -+ print(gff_row) -diff -ru a/accessory/SearchInTable.py b/accessory/SearchInTable.py ---- a/accessory/SearchInTable.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/SearchInTable.py 2020-07-16 16:17:22.309013623 -0500 -@@ -25,7 +25,7 @@ - #pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python SearchInTable.py [options] - Options: - -i Sorted table file (first col=reference; second col=coordinate 1 based) -@@ -42,7 +42,7 @@ - -o Save found/not found positions on file - -h Print this help - --""" -+""") - #-k skip first INT lines [0] - - try: -@@ -51,7 +51,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/accessory/selectPositions.py b/accessory/selectPositions.py ---- a/accessory/selectPositions.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/selectPositions.py 2020-07-16 16:17:22.833013598 -0500 -@@ -25,7 +25,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python selectPositions.py [options] - Options: - -i Table file from REDItools -@@ -44,7 +44,7 @@ - -o Save selected positions on outTable_%s - -h Print this help - --"""%(pid) -+"""%(pid)) - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:c:C:v:s:f:F:euo:hrd:RV:",["help"]) -@@ -52,7 +52,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/accessory/SortGFF.py b/accessory/SortGFF.py ---- a/accessory/SortGFF.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/SortGFF.py 2020-07-16 16:17:22.357013620 -0500 -@@ -35,7 +35,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python SortGFF.py [options] - Options: - -i GFF file -@@ -44,7 +44,7 @@ - -t Temporary directory to use (multiple -t may be used) - -h Print this help - --"""%(pid) -+"""%(pid)) - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:o:b:t:h",["help"]) -@@ -52,7 +52,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/accessory/SortTable.py b/accessory/SortTable.py ---- a/accessory/SortTable.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/SortTable.py 2020-07-16 16:17:22.411013618 -0500 -@@ -35,7 +35,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python SortTable.py [options] - Options: - -i Table file -@@ -50,7 +50,7 @@ - -t Temporary directory to use (multiple -t may be used) - -h Print this help - --"""%(pid) -+"""%(pid)) - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:o:b:t:hd:s:c:e:m:O",["help"]) -@@ -58,7 +58,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/accessory/subCount2.py b/accessory/subCount2.py ---- a/accessory/subCount2.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/subCount2.py 2020-07-16 16:17:22.854013597 -0500 -@@ -28,5 +28,5 @@ - for i in s: - try: v=(s[i]/float(all))*100 - except: v=0.0 -- print i,s[i],all,v -+ print(i,s[i],all,v) - -diff -ru a/accessory/subCount.py b/accessory/subCount.py ---- a/accessory/subCount.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/subCount.py 2020-07-16 16:17:22.844013597 -0500 -@@ -30,5 +30,5 @@ - for i in s: - try: v=(s[i]/float(all))*100 - except: v=0.0 -- print i,s[i],all,v -+ print(i,s[i],all,v) - -diff -ru a/accessory/TableToGFF.py b/accessory/TableToGFF.py ---- a/accessory/TableToGFF.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/TableToGFF.py 2020-07-16 16:17:22.474013615 -0500 -@@ -29,7 +29,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python TableToGFF.py [options] - Options: - -i Table file from REDItools -@@ -40,7 +40,7 @@ - -o Outfile [outTable_%s.gff] - -h Print this help - --"""%(pid) -+"""%(pid)) - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:o:sthT:b:",["help"]) -@@ -48,7 +48,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/accessory/tableToTabix.py b/accessory/tableToTabix.py ---- a/accessory/tableToTabix.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/accessory/tableToTabix.py 2020-07-16 16:17:22.913013594 -0500 -@@ -31,7 +31,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python tableToTabix.py [options] - Options: - -i TAB-delimited file -@@ -46,7 +46,7 @@ - -u Save an uncompressed GFF copy (add _copy suffix) - -h Print this help - --""" -+""") - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:Sb:t:hus:c:e:m:0",["help"]) -@@ -54,7 +54,7 @@ - usage() - sys.exit(2) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -diff -ru a/main/REDItoolDenovo.py b/main/REDItoolDenovo.py ---- a/main/REDItoolDenovo.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/main/REDItoolDenovo.py 2020-07-16 16:17:21.265013672 -0500 -@@ -75,7 +75,7 @@ - return 1 - numerator = 1 - denominator = 1 -- for i in xrange(s+1, population + 1): -+ for i in range(s+1, population + 1): - numerator *= i - denominator *= (i - s) - return numerator/denominator -@@ -275,7 +275,7 @@ - try: import pysam - except: sys.exit('Pysam module not found.') - from multiprocessing import Process, Queue --from Queue import Empty -+from queue import Empty - try: - from fisher import pvalue - exfisher=1 -@@ -292,7 +292,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python REDItoolDenovo.py [options] - Options: - -i BAM file -@@ -343,12 +343,12 @@ - - For Tophat2 use 50 - - For GSNAP use 30 - --"""%(pid) -+"""%(pid)) - - try: - opts, args = getopt.getopt(sys.argv[1:], "b:f:k:t:o:c:q:m:O:s:edpuT:B:v:n:EP:r:hHa:i:lIU:V:w:XG:K:F:g:x:W") - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -@@ -474,7 +474,7 @@ - annfile=a - uann=1 - else: -- print o -+ print(o) - assert False, "Unhandled Option" - - ####### -@@ -532,7 +532,7 @@ - return False - try: - os.kill(pid, 0) -- except OSError, e: -+ except OSError as e: - return e.errno == errno.EPERM - else: - return True -@@ -637,9 +637,9 @@ - subs=[] - subv=[] - for i in seq.upper(): -- if b.has_key(i): b[i]+=1 -+ if i in b: b[i]+=1 - for i in b: -- if not b.has_key(ref): continue -+ if ref not in b: continue - if b[i]!=0 and i!=ref: - vv=float(b[i])/(b[i]+b[ref]) - subv.append((b[i],vv,ref+i)) -@@ -690,7 +690,7 @@ - a={'A':'T','T':'A','C':'G','G':'C'} - ss='' - for i in s.upper(): -- if a.has_key(i): ss+=a[i] -+ if i in a: ss+=a[i] - else: ss+='N' - return ss - -@@ -777,7 +777,7 @@ - frefs.append(l[0]) - fidxinfo.close() - rnof=[] --for i in rrefs.keys(): -+for i in list(rrefs.keys()): - if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i)) - ##################### - -@@ -817,8 +817,8 @@ - #mainbam=pysam.Samfile(bamfile,"rb") - #regions=mainbam.references - #mainbam.close() --dicregions=dict(rrefs.items()) --chrs=[x for x in dicregions.keys() if x not in nochrs] -+dicregions=dict(list(rrefs.items())) -+chrs=[x for x in list(dicregions.keys()) if x not in nochrs] - sys.stderr.write('Analysis on %i regions.\n' %(len(chrs))) - - if infolder!='': outfolder=os.path.join(outfolder_,'denovo_%s_%s' %(infolder,pid)) -@@ -906,7 +906,7 @@ - if pileupread.alignment.has_tag('SA'): continue - #s,q,t,qq=pileupread.alignment.seq[pileupread.qpos].upper(),ord(pileupread.alignment.qual[pileupread.qpos])-QVAL,'*',pileupread.alignment.qual[pileupread.qpos] - # escludi posizioni introniche nei pressi di splice sites -- if exss and di.has_key(pileupcolumn.reference_pos+1): continue #MOD -+ if exss and pileupcolumn.reference_pos+1 in di: continue #MOD - # multiple hit - if exh: #MOD - if pileupread.alignment.is_secondary: continue #MOD -@@ -998,7 +998,7 @@ - elif pileupread.alignment.is_read2: rt=2 - else: rt=0 - rname=pileupread.alignment.query_name+'_%i'%(rt) #MOD -- if d.has_key(rname): blatc+='0' #continue -+ if rname in d: blatc+='0' #continue - else: blatc+='1' - # se la base e' diversa dal reference - # se in regione omopolimerica scarta -@@ -1032,7 +1032,7 @@ - if not custsub: - ni='ACGT' - for b in range(4): -- if dsubs.has_key(ref.upper()+ni[b]): -+ if ref.upper()+ni[b] in dsubs: - dsubs[ref.upper()+ni[b]]+=bcomp[b] - if expos: - if chr in extabix.contigs: -diff -ru a/main/REDItoolDnaRna.py b/main/REDItoolDnaRna.py ---- a/main/REDItoolDnaRna.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/main/REDItoolDnaRna.py 2020-07-16 16:17:21.757013649 -0500 -@@ -23,7 +23,7 @@ - try: import pysam - except: sys.exit('Pysam module not found.') - from multiprocessing import Process, Queue --from Queue import Empty -+from queue import Empty - import gzip - - pysamVersion=pysam.__version__ -@@ -34,7 +34,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python REDItoolDnaRNA.py [options] - Options: - -i RNA-Seq BAM file -@@ -103,7 +103,7 @@ - - For Tophat2 use 50 - - For GSNAP use 30 - --"""%(pid) -+"""%(pid)) - - #option --fastq Fastq to get reads [requires --reads], separated by comma [if paired] NOT IMPLEMENTED - #option --rmOver Remove overlapping reads NOT IMPLEMENTED -@@ -111,7 +111,7 @@ - try: - opts, args = getopt.getopt(sys.argv[1:], "i:f:k:t:o:c:q:m:O:s:edpuA:a:B:b:lLv:n:EPr:hHIXG:K:j:C:JDUzw:N:ZW:RVMT:F:x:g:SY:",["help","gzip","reads","addP","rmIndels"]) - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -@@ -325,7 +325,7 @@ - params.append('Analysis ID: %s\n' %(pid)) - params.append('Analysis time: %s\n' %(script_time)) - params.append('-i --> RNA-Seq BAM file: %s\n' %(bamfile)) --params.append('-j --> DNA-Seq BAM file(s): %s\n' %(','.join(dgbamfile.keys()))) -+params.append('-j --> DNA-Seq BAM file(s): %s\n' %(','.join(list(dgbamfile.keys())))) - params.append('-I --> Sort RNA-Seq BAM file: %i\n' %(sortbam)) - params.append('-J --> Sort DNA-Seq BAM file: %i\n' %(sortgbam)) - params.append('-f --> Reference file: %s\n' %(fastafile)) -@@ -483,9 +483,9 @@ - subs=[] - subv=[] - for i in seq.upper(): -- if b.has_key(i): b[i]+=1 -+ if i in b: b[i]+=1 - for i in b: -- if not b.has_key(ref): continue -+ if ref not in b: continue - if b[i]!=0 and i!=ref: - vv=float(b[i])/(b[i]+b[ref]) - subv.append((b[i],vv,ref+i)) -@@ -537,7 +537,7 @@ - a={'A':'T','T':'A','C':'G','G':'C'} - ss='' - for i in s.upper(): -- if a.has_key(i): ss+=a[i] -+ if i in a: ss+=a[i] - elif i==' ': ss+=' ' - elif i=='-': ss+='-' - else: ss+='N' -@@ -682,7 +682,7 @@ - if l.count(i[0])==2: - s='=' - if i[1]!=i[2]: s='!' -- if us.has_key(i[0]): us[i[0]].append((x,s)) -+ if i[0] in us: us[i[0]].append((x,s)) - else: us[i[0]]=[(x,s)] - x+=1 - for i in us: -@@ -753,7 +753,7 @@ - pysam.faidx(fastafile) - ########################################################### - # Check reference for name consistency --grefs=dgdic.keys() -+grefs=list(dgdic.keys()) - rrefs={} - ridxinfo=pysam.idxstats(bamfile) - for j in ridxinfo.split('\n'): #MOD -@@ -769,7 +769,7 @@ - fidxinfo.close() - # in rna-seq - rnof=[] --for i in rrefs.keys(): -+for i in list(rrefs.keys()): - if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i)) - if len(gbamfile)!=0: - for i in grefs: -@@ -833,9 +833,9 @@ - #regions=mainbam.references - #regionslens=mainbam.lengths - #mainbam.close() --dicregions=dict(rrefs.items()) -+dicregions=dict(list(rrefs.items())) - #dicregions=dict([(regions[x],regionslens[x]) for x in range(len(regions))]) --chrs=[x for x in dicregions.keys() if x not in nochrs] -+chrs=[x for x in list(dicregions.keys()) if x not in nochrs] - if fworkR: sys.stderr.write('Analysis on region %s:%i-%i.\n' %(workR[0],workR[1][0],workR[1][1])) - else: sys.stderr.write('Analysis on %i regions.\n' %(len(chrs))) - ########################################################### -@@ -883,7 +883,7 @@ - isgbam=1 - inputs=myinput.split('$') - chr,bamfile,start_region,lenregion,suff_=inputs[0],inputs[1],int(inputs[2]),int(inputs[3]),inputs[4] -- if not dgdic.has_key(chr): isgbam=0 -+ if chr not in dgdic: isgbam=0 - outfile=os.path.join(outfolder,'table_%s'%(suff_)) - if slist: - if gziptab: outrna=gzip.open(os.path.join(outfolder,'pileupRNA_%s.gz'%(suff)),'wb') -@@ -1024,7 +1024,7 @@ - if pileupread.alignment.is_read1: rt=1 - elif pileupread.alignment.is_read2: rt=2 - rname=pileupread.alignment.query_name+'_%i'%(rt) -- if gd.has_key(rname): gblatc+='0' #continue -+ if rname in gd: gblatc+='0' #continue - else: gblatc+='1' - # se la base e' diversa dal reference - # se in regione omopolimerica scarta -@@ -1079,7 +1079,7 @@ - #s,q,t,qq=pileupread.alignment.query_sequence[pileupread.query_position].upper(),pileupread.alignment.query_qualities[pileupread.query_position],'*',pileupread.alignment.qual[pileupread.query_position] - #s,q,t,qq=pileupread.alignment.seq[pileupread.qpos].upper(),ord(pileupread.alignment.qual[pileupread.qpos])-QVAL,'*',pileupread.alignment.qual[pileupread.qpos] - # escludi posizioni introniche nei pressi di splice sites -- if exss and di.has_key(pileupcolumn.reference_pos+1): continue -+ if exss and pileupcolumn.reference_pos+1 in di: continue - # multiple hit - if exh: - if pileupread.alignment.is_secondary: continue -@@ -1179,7 +1179,7 @@ - elif pileupread.alignment.is_read2: rt=2 - else: rt=0 - rname=pileupread.alignment.query_name+'_%i'%(rt) -- if d.has_key(rname): blatc+='0' #continue -+ if rname in d: blatc+='0' #continue - else: blatc+='1' - # se la base e' diversa dal reference - # se in regione omopolimerica scarta -@@ -1213,7 +1213,7 @@ - else: addpos=(pileupread.alignment.query_name,'-',pileupread.alignment.reference_name,'-',pileupread.alignment.reference_start,pileupread.alignment.reference_end,0 , 0) - rqname_comp=rqname+'$'+pileupread.alignment.reference_name+'$'+str(pileupcolumn.reference_pos+1) - #addpos=(chr+'_'+str(pileupcolumn.reference_pos+1),pileupcolumn.reference_pos+1) -- if not grdb.has_key(rqname): -+ if rqname not in grdb: - #print rqname reference_start - outreads.write('>'+rqname_comp+'\n'+rseqname+'\n') - #grdb[rqname]=[addpos] -@@ -1221,7 +1221,7 @@ - # if addpos not in grdb[rqname]: - # grdb[rqname].append(addpos) - if addP: -- if not grdb2.has_key(rname): grdb2[rname]=addpos -+ if rname not in grdb2: grdb2[rname]=addpos - if seq.strip()!='': - #print seq,qual,squal - if rmIndel: -@@ -1268,7 +1268,7 @@ - if exinv and subs=='-': continue - if not checkSubs(subs): continue - #print out rna-seq info + dna-seq -- if gdic.has_key(pileupcolumn.reference_pos): # abbiamo l'informazione genomica -+ if pileupcolumn.reference_pos in gdic: # abbiamo l'informazione genomica - if exnonh and not gdic[pileupcolumn.reference_pos][1]: continue - if mystrand=='0': - gdic[pileupcolumn.reference_pos][0][2]=comp2(eval(gdic[pileupcolumn.reference_pos][0][2])) -diff -ru a/main/REDItoolKnown.py b/main/REDItoolKnown.py ---- a/main/REDItoolKnown.py 2020-07-16 16:16:59.360014702 -0500 -+++ b/main/REDItoolKnown.py 2020-07-16 16:17:22.063013634 -0500 -@@ -23,7 +23,7 @@ - try: import pysam - except: sys.exit('Pysam module not found.') - from multiprocessing import Process, Queue --from Queue import Empty -+from queue import Empty - - pysamVersion=pysam.__version__ - -@@ -34,7 +34,7 @@ - pid=str(os.getpid()+random.randint(0,999999999)) - - def usage(): -- print """ -+ print(""" - USAGE: python REDItoolKnown.py [options] - Options: - -i BAM file -@@ -82,12 +82,12 @@ - - For Tophat2 use 50 - - For GSNAP use 30 - --"""%(pid) -+"""%(pid)) - - try: - opts, args = getopt.getopt(sys.argv[1:], "i:f:k:t:o:c:q:m:O:s:edpuT:B:Sv:n:EP:r:hHIXG:K:l:C:F:x:g:U") - except getopt.GetoptError as err: -- print str(err) # will print something like "option -a not recognized" -+ print(str(err)) # will print something like "option -a not recognized" - usage() - sys.exit(2) - -@@ -257,7 +257,7 @@ - return False - try: - os.kill(pid, 0) -- except OSError, e: -+ except OSError as e: - return e.errno == errno.EPERM - else: - return True -@@ -361,9 +361,9 @@ - subs=[] - subv=[] - for i in seq.upper(): -- if b.has_key(i): b[i]+=1 -+ if i in b: b[i]+=1 - for i in b: -- if not b.has_key(ref): continue -+ if ref not in b: continue - if b[i]!=0 and i!=ref: - vv=float(b[i])/(b[i]+b[ref]) - subv.append((b[i],vv,ref+i)) -@@ -414,7 +414,7 @@ - a={'A':'T','T':'A','C':'G','G':'C'} - ss='' - for i in s.upper(): -- if a.has_key(i): ss+=a[i] -+ if i in a: ss+=a[i] - else: ss+='N' - return ss - -@@ -524,7 +524,7 @@ - fidxinfo.close() - # in rna-seq - rnof=[] --for i in rrefs.keys(): -+for i in list(rrefs.keys()): - if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i)) - ################################## - -@@ -568,8 +568,8 @@ - #mainbam.close() - #dicregions=dict([(regions[x],regionslens[x]) for x in range(len(regions))]) - #chrs=[x for x in regions if x not in nochrs] --dicregions=dict(rrefs.items()) --chrs=[x for x in dicregions.keys() if x not in nochrs] -+dicregions=dict(list(rrefs.items())) -+chrs=[x for x in list(dicregions.keys()) if x not in nochrs] - sys.stderr.write('Analysis on %i regions.\n' %(len(chrs))) - - if infolder!='': outfolder=os.path.join(outfolder_,'known_%s_%s' %(infolder,pid)) -@@ -654,7 +654,7 @@ - # else explore bam to find exact positions - for pileupcolumn in bam.pileup(chr,startk,endk,stepper='nofilter', max_depth=MAX_DEPTH): - if not startk<=pileupcolumn.reference_pos<=endk: continue -- if not kdic.has_key(pileupcolumn.reference_pos+1): continue -+ if pileupcolumn.reference_pos+1 not in kdic: continue - ref=fasta.fetch(chr,pileupcolumn.reference_pos,pileupcolumn.reference_pos+1).upper() - seq,qual,strand,squal,blatc='',0,'',[],'' - if rmsh: -@@ -668,7 +668,7 @@ - if pileupread.alignment.is_supplementary: continue - if pileupread.alignment.has_tag('SA'): continue - # escludi posizioni introniche nei pressi di splice sites -- if exss and di.has_key(pileupcolumn.reference_pos+1): continue -+ if exss and pileupcolumn.reference_pos+1 in di: continue - # multiple hit - if exh: - if pileupread.alignment.is_secondary: continue -@@ -754,7 +754,7 @@ - elif pileupread.alignment.is_read2: rt=2 - else: rt=0 - rname=pileupread.alignment.query_name+'_%i'%(rt) -- if d.has_key(rname): blatc+='0' #continue -+ if rname in d: blatc+='0' #continue - else: blatc+='1' - # se la base e' diversa dal reference - # se in regione omopolimerica scarta diff --git a/var/spack/repos/builtin/packages/reditools/setup.py.patch b/var/spack/repos/builtin/packages/reditools/setup.py.patch index f65345ad92..bde2bb579c 100644 --- a/var/spack/repos/builtin/packages/reditools/setup.py.patch +++ b/var/spack/repos/builtin/packages/reditools/setup.py.patch @@ -1,6 +1,16 @@ ---- a/setup.py 2020-07-16 14:01:48.601449013 -0500 -+++ b/setup.py 2020-07-16 14:02:31.547441668 -0500 -@@ -33,7 +33,7 @@ +--- a/setup.py 2021-05-20 16:22:44.832444514 -0500 ++++ b/setup.py 2021-05-20 16:23:43.127629214 -0500 +@@ -23,7 +23,8 @@ + 'accessory/readPsl.py', + 'accessory/subCount.py', + 'accessory/subCount2.py', +- 'accessory/rediportal2recoding.py' ++ 'accessory/rediportal2recoding.py', ++ 'accessory/get_DE_events.py' + ], + license='LICENSE.txt', + classifiers=[ +@@ -33,7 +34,7 @@ 'Operating System :: POSIX', 'Programming Language :: Python', ], -- cgit v1.2.3-60-g2f50