diff options
author | Glenn Johnson <glennpj@gmail.com> | 2020-08-07 14:40:52 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-07 14:40:52 -0500 |
commit | 1a11449c8675308292185834a9653a897e877f4a (patch) | |
tree | 82c3d1b30a4a1dddf77741e658b64ad51688b628 | |
parent | 11f2d01051a64ea680105f4c4019fc4ddee5ac0a (diff) | |
download | spack-1a11449c8675308292185834a9653a897e877f4a.tar.gz spack-1a11449c8675308292185834a9653a897e877f4a.tar.bz2 spack-1a11449c8675308292185834a9653a897e877f4a.tar.xz spack-1a11449c8675308292185834a9653a897e877f4a.zip |
New package - REDItools (#17703)
* New package - REDItools
This PR adds the REDItools package, along with a new package dependency,
py-fisher. This contains a patch generated from the python 2to3 script
as well as some other fixes. I am not sure if the project is ready to
support python-3 yet but I submitted the other patches upstream.
* Update var/spack/repos/builtin/packages/reditools/package.py
Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
6 files changed, 1042 insertions, 0 deletions
diff --git a/var/spack/repos/builtin/packages/py-fisher/package.py b/var/spack/repos/builtin/packages/py-fisher/package.py new file mode 100644 index 0000000000..45e0fc5110 --- /dev/null +++ b/var/spack/repos/builtin/packages/py-fisher/package.py @@ -0,0 +1,21 @@ +# Copyright 2013-2020 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack import * + + +class PyFisher(PythonPackage): + """Fisher's Exact Test. + + Simple, fast implementation of Fisher's exact test.""" + + homepage = "https://github.com/brentp/fishers_exact_test" + url = "https://pypi.io/packages/source/f/fisher/fisher-0.1.9.tar.gz" + + version('0.1.9', sha256='d378b3f7e488e2a679c6d0e5ea1bce17bc931c2bfe8ec8424ee47a74f251968d') + + depends_on('py-setuptools', type='build') + depends_on('py-numpy', type=('build', 'run')) + depends_on('py-pytest', type='test') diff --git a/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch b/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch new file mode 100644 index 0000000000..a88358829c --- /dev/null +++ b/var/spack/repos/builtin/packages/reditools/REDItoolDenovo.py.patch @@ -0,0 +1,11 @@ +--- a/main/REDItoolDenovo.py 2020-07-16 17:35:46.008030346 -0500 ++++ b/main/REDItoolDenovo.py 2020-07-16 17:38:39.700035490 -0500 +@@ -768,7 +768,7 @@ + for j in ridxinfo.split('\n'): #MOD + l=(j.strip()).split('\t') + if l[0] in ['*', '']: continue #MOD +- if int(l[2])+int(l[3]) > 0: rrefs[l[0]]=int(l[1]) ++ if int(l[2])+int(l[3]) > 0: rrefs[l[0]]=int(l[1]) + frefs=[] + fidxinfo=open(fastafile+'.fai') + for j in fidxinfo: diff --git a/var/spack/repos/builtin/packages/reditools/interpreter.patch b/var/spack/repos/builtin/packages/reditools/interpreter.patch new file mode 100644 index 0000000000..2e3f171e73 --- /dev/null +++ b/var/spack/repos/builtin/packages/reditools/interpreter.patch @@ -0,0 +1,41 @@ +diff -ru a/accessory/get_DE_events.py b/accessory/get_DE_events.py +--- a/accessory/get_DE_events.py 2020-07-16 17:18:47.012982992 -0500 ++++ b/accessory/get_DE_events.py 2020-07-16 17:19:47.531986703 -0500 +@@ -1,3 +1,4 @@ ++#!/usr/bin/env python + #################################### REDI OUT TABLE ######################################################## + #Region Position Reference Strand Coverage-q30 MeanQ BaseCount[A,C,G,T] # + #AllSubs Frequency gCoverage-q30 gMeanQ gBaseCount[A,C,G,T] gAllSubs gFrequency # +diff -ru a/accessory/readPsl.py b/accessory/readPsl.py +--- a/accessory/readPsl.py 2020-07-16 17:18:47.012982992 -0500 ++++ b/accessory/readPsl.py 2020-07-16 17:20:02.829987622 -0500 +@@ -1,4 +1,4 @@ +- ++#!/usr/bin/env python + import sys, os + import math + +diff -ru a/accessory/subCount2.py b/accessory/subCount2.py +--- a/accessory/subCount2.py 2020-07-16 17:18:47.012982992 -0500 ++++ b/accessory/subCount2.py 2020-07-16 17:20:22.650988801 -0500 +@@ -1,3 +1,4 @@ ++#!/usr/bin/env python + import sys + + try: +diff -ru a/accessory/subCount.py b/accessory/subCount.py +--- a/accessory/subCount.py 2020-07-16 17:18:47.012982992 -0500 ++++ b/accessory/subCount.py 2020-07-16 17:20:35.502989558 -0500 +@@ -1,3 +1,4 @@ ++#!/usr/bin/env python + import sys + + try: +diff -ru a/main/REDItoolDenovo.py b/main/REDItoolDenovo.py +--- a/main/REDItoolDenovo.py 2020-07-16 17:18:47.012982992 -0500 ++++ b/main/REDItoolDenovo.py 2020-07-16 17:21:01.018991045 -0500 +@@ -1,3 +1,4 @@ ++#!/usr/bin/env python + # coding=utf-8 + # Copyright (c) 2013-2014 Ernesto Picardi <ernesto.picardi@uniba.it> + # diff --git a/var/spack/repos/builtin/packages/reditools/package.py b/var/spack/repos/builtin/packages/reditools/package.py new file mode 100644 index 0000000000..7dd0197a51 --- /dev/null +++ b/var/spack/repos/builtin/packages/reditools/package.py @@ -0,0 +1,32 @@ +# Copyright 2013-2020 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack import * + + +class Reditools(PythonPackage): + """REDItools: python scripts for RNA editing detection by RNA-Seq data. + + REDItools are simple python scripts conceived to facilitate the + investigation of RNA editing at large-scale and devoted to research groups + that would to explore such phenomenon in own data but don't have sufficient + bioinformatics skills. They work on main operating systems (although + unix/linux-based OS are preferred), can handle reads from whatever platform + in the standard BAM format and implement a variety of filters.""" + + homepage = "https://github.com/BioinfoUNIBA/REDItools" + git = "https://github.com/BioinfoUNIBA/REDItools.git" + + version('1.3_2020-03-20', commit='cf47f3d54f324aeb9650bcf8bfacf5a967762a55') + + depends_on('py-pysam', type=('build', 'run')) + depends_on('py-fisher', type=('build', 'run')) + depends_on('blat', type='run') + depends_on('tabix', type='run') + + patch('REDItoolDenovo.py.patch') + patch('interpreter.patch') + patch('setup.py.patch') + patch('python2to3.patch', when='^python@3:') diff --git a/var/spack/repos/builtin/packages/reditools/python2to3.patch b/var/spack/repos/builtin/packages/reditools/python2to3.patch new file mode 100644 index 0000000000..94937d175d --- /dev/null +++ b/var/spack/repos/builtin/packages/reditools/python2to3.patch @@ -0,0 +1,926 @@ +diff -ru a/accessory/AnnotateTable.py b/accessory/AnnotateTable.py +--- a/accessory/AnnotateTable.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/AnnotateTable.py 2020-07-16 16:17:22.159013630 -0500 +@@ -28,7 +28,7 @@ + sys.stderr.write('Pysam version used: %s\n' %(pysamVersion)) + + def usage(): +- print """ ++ print(""" + USAGE: python AnnotateTable.py [options] + Options: + -a Sorted Annotation file +@@ -44,12 +44,12 @@ + -C Columns with base distribution [7,12] (in combination with -S) + -o Save lines to a file + -h Print this help +-""" ++""") + + try: + opts, args = getopt.getopt(sys.argv[1:], 'i:a:o:hs:c:n:SC:uk:r:',["help"]) +-except getopt.GetoptError, err: +- print str(err) ++except getopt.GetoptError as err: ++ print(str(err)) + usage() + sys.exit() + +@@ -101,7 +101,7 @@ + a={'A':'T','T':'A','C':'G','G':'C'} + ss='' + for i in s.upper(): +- if a.has_key(i): ss+=a[i] ++ if i in a: ss+=a[i] + elif i==' ': ss+=' ' + elif i=='-': ss+='-' + else: ss+='N' +@@ -125,23 +125,23 @@ + anns='+' + for i in res: + if i[3]=='+': +- if d['+'].has_key(i[1]): ++ if i[1] in d['+']: + if i[0] not in d['+'][i[1]][0]: d['+'][i[1]][0]=d['+'][i[1]][0]+','+i[0] + if i[2]+'-'+i[0] not in d['+'][i[1]][1]: d['+'][i[1]][1]=d['+'][i[1]][1]+','+i[2]+'-'+i[0] + else: + d['+'][i[1]]=[i[0],i[2]+'-'+i[0]] + elif i[3]=='-': +- if d['-'].has_key(i[1]): ++ if i[1] in d['-']: + if i[0] not in d['-'][i[1]][0]: d['-'][i[1]][0]=d['-'][i[1]][0]+','+i[0] + if i[2]+'-'+i[0] not in d['-'][i[1]][1]: d['-'][i[1]][1]=d['-'][i[1]][1]+','+i[2]+'-'+i[0] + else: + d['-'][i[1]]=[i[0],i[2]+'-'+i[0]] +- gip='$'.join(d['+'].keys()) +- featp='$'.join([d['+'][x][0] for x in d['+'].keys()]) +- tip='$'.join([d['+'][x][1] for x in d['+'].keys()]) +- gim='$'.join(d['-'].keys()) +- featm='$'.join([d['-'][x][0] for x in d['-'].keys()]) +- tim='$'.join([d['-'][x][1] for x in d['-'].keys()]) ++ gip='$'.join(list(d['+'].keys())) ++ featp='$'.join([d['+'][x][0] for x in list(d['+'].keys())]) ++ tip='$'.join([d['+'][x][1] for x in list(d['+'].keys())]) ++ gim='$'.join(list(d['-'].keys())) ++ featm='$'.join([d['-'][x][0] for x in list(d['-'].keys())]) ++ tim='$'.join([d['-'][x][1] for x in list(d['-'].keys())]) + p=[featp,gip,tip] + m=[featm,gim,tim] + pm=[(featp+'&'+featm).strip('&'),(gip+'&'+gim).strip('&'),(tip+'&'+tim).strip('&')] +@@ -187,7 +187,7 @@ + elif strand=='-': res=ann[1] + else: res=ann[2] + for i in addc: +- print prinfo[i]+ res[i] ++ print(prinfo[i]+ res[i]) + except: sys.exit('Error: not correct position.') + + if af: +@@ -200,7 +200,7 @@ + h=[i.strip()] + for k in addc: h.append(hinfo[k]) + if save: o.write('\t'.join(h)+'\n') +- else: print '\t'.join(h) ++ else: print('\t'.join(h)) + continue + if i.startswith(skip): continue + l=(i.strip()).split('\t') +@@ -230,7 +230,7 @@ + else: res=ann[2] + for j in addc: l.append(res[j]) + if save: o.write('\t'.join(l)+'\n') +- else: print '\t'.join(l) ++ else: print('\t'.join(l)) + tabix.close() + if save: + o.close() +diff -ru a/accessory/FilterTable.py b/accessory/FilterTable.py +--- a/accessory/FilterTable.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/FilterTable.py 2020-07-16 16:17:22.209013627 -0500 +@@ -30,7 +30,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python FilterTable.py [options] + Options: + -i Table file +@@ -43,12 +43,12 @@ + -p Print simple statistics + -h Print this help + +-""" ++""") + + try: + opts, args = getopt.getopt(sys.argv[1:], 'i:o:f:hs:F:S:Ep',["help"]) +-except getopt.GetoptError, err: +- print str(err) ++except getopt.GetoptError as err: ++ print(str(err)) + usage() + sys.exit() + +diff -ru a/accessory/get_DE_events.py b/accessory/get_DE_events.py +--- a/accessory/get_DE_events.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/get_DE_events.py 2020-07-16 16:17:22.646013607 -0500 +@@ -104,7 +104,7 @@ + #WARNING those are np arrays. + + for i in range(0,edit_level_table.shape[0]): +- print i #keep track of progress ++ print(i) #keep track of progress + disease_edit_row = edit_level_table.loc[i, disease_people] + control_edit_row = edit_level_table.loc[i, control_people] + disease_cov_row = cov_table.loc[i, disease_people] +@@ -206,7 +206,7 @@ + #write the full_table to output + full_table.to_csv(output_file, sep='\t', index=False) + +- print "job completed\n" ++ print("job completed\n") + + + +@@ -225,12 +225,12 @@ + + def Sample_percentage(row): + """Percentage of samples from each type""" +- percentage = (len(filter(lambda x: x!= '-', row))/float(len(row)))*100 ++ percentage = (len([x for x in row if x!= '-'])/float(len(row)))*100 + return round(percentage) + + def Sample_count(row): + """Number of samples from each type""" +- count = len(filter(lambda x: x!= '-', row)) ++ count = len([x for x in row if x!= '-']) + return count + + def get_bh(pvalue,siglevel): +@@ -269,7 +269,7 @@ + if(row_a[-1] != '-' and row_a[-1] != 0.0 and row_a[-1] <= 0.05): + row = row[0].split('_') + row[2:] + row.insert(2, 'A.to.G') +- print '\t'.join(map(str,row)) ++ print('\t'.join(map(str,row))) + + def tuple_replace(i): + if type(i) == tuple: +@@ -292,11 +292,11 @@ + with open(samples_informations_file, 'r') as f: + for line in f: + if line.startswith('SRR'): +- line = map(str.strip, line.split(',')) ++ line = list(map(str.strip, line.split(','))) + sample_informations.setdefault(line[0], line[1]) + + +-cwd = filter(os.path.isdir, os.listdir(os.getcwd())) ++cwd = list(filter(os.path.isdir, os.listdir(os.getcwd()))) + all_available_sites = [] + sample_edited_sites = {} + for directory in cwd: +@@ -306,7 +306,7 @@ + with open(table,'r') as a: + for line in a: + if line.startswith('chr'): +- s = map(str.strip, line.split("\t")) ++ s = list(map(str.strip, line.split("\t"))) + if s[7] == 'AG': + site, freq, coverage = s[0] + "_" + s[1], s[8], s[4] + freq_gnum_cov = '%s^%s^%s' %(s[8],eval(s[6])[2],s[4]) +@@ -314,14 +314,14 @@ + if (int(coverage) >= min_coverage) and (float(freq) >= min_edit_frequency): + sample_edited_sites.setdefault((directory, site), []).append((freq, freq_gnum_cov)) + +-table_columns = map(lambda x: x + '_' + sample_informations[x], sorted(sample_informations.keys())) ++table_columns = [x + '_' + sample_informations[x] for x in sorted(sample_informations.keys())] + + disease = [i for i in table_columns if i.upper().find('DIS') != -1] + controls = [i for i in table_columns if i.upper().find('CTRL') != -1] + + if enable_linear_model: + outtable='' +- header = ['chromosome', 'position', 'type_editing'] + map(remove_underscore, controls) + map(remove_underscore, disease) ++ header = ['chromosome', 'position', 'type_editing'] + list(map(remove_underscore, controls)) + list(map(remove_underscore, disease)) + outtable += '\t'.join(header) + outtable += '\n' + #print '\t'.join(header) +@@ -329,13 +329,13 @@ + row = [chrom] + for col in header[2:]:#header.index('[num_controls/num_disease]')]: + row.append(sample_edited_sites.get((col.split('_')[0],chrom), ['-'])[0]) +- ctrls = zip(*(zip(controls,row[1:])))[1] +- dss = zip(*(zip(disease,row[len(ctrls)+1:])))[1] +- ctrls_freq = map(tuple_replace, ctrls) +- dss_freq = map(tuple_replace, dss) ++ ctrls = list(zip(*(list(zip(controls,row[1:])))))[1] ++ dss = list(zip(*(list(zip(disease,row[len(ctrls)+1:])))))[1] ++ ctrls_freq = list(map(tuple_replace, ctrls)) ++ dss_freq = list(map(tuple_replace, dss)) + row.append(str([Sample_count(ctrls), Sample_count(dss)])) + +- row_b = map(tuple_replace_bis, row) ++ row_b = list(map(tuple_replace_bis, row)) + row_b = row_b[0].split('_') + row_b[2:] + row_b.insert(2, 'A.to.G') + final_list = row_b[:-1] +@@ -359,20 +359,20 @@ + if pvalue_correction == 2: + header += ['pvalue BH corrected'] + +- print '\t'.join(header) ++ print('\t'.join(header)) + + for chrom in sorted(all_available_sites, key = lambda x: Set_Chr_Nr(x)): + row = [chrom] + for col in header[3:header.index('[num_controls/num_disease]')]: + row.append(sample_edited_sites.get((col.split('_')[0],chrom), ['-'])[0]) +- ctrls = zip(*(zip(controls,row[1:])))[1] +- dss = zip(*(zip(disease,row[len(ctrls)+1:])))[1] +- ctrls_freq = map(tuple_replace, ctrls) +- dss_freq = map(tuple_replace, dss) ++ ctrls = list(zip(*(list(zip(controls,row[1:])))))[1] ++ dss = list(zip(*(list(zip(disease,row[len(ctrls)+1:])))))[1] ++ ctrls_freq = list(map(tuple_replace, ctrls)) ++ dss_freq = list(map(tuple_replace, dss)) + row.append(str([Sample_count(ctrls), Sample_count(dss)])) + if (Sample_percentage(ctrls) >= min_sample_testing) and (Sample_percentage(dss) >= min_sample_testing): +- ctrls_mean = sum(map(float, filter(lambda x: x!= '-', ctrls_freq)))/len(filter(lambda x: x!= '-', ctrls_freq)) +- dss_mean = sum(map(float, filter(lambda x: x!= '-', dss_freq)))/len(filter(lambda x : x!= '-', dss_freq)) ++ ctrls_mean = sum(map(float, [x for x in ctrls_freq if x!= '-']))/len([x for x in ctrls_freq if x!= '-']) ++ dss_mean = sum(map(float, [x for x in dss_freq if x!= '-']))/len([x for x in dss_freq if x!= '-']) + delta_diff = abs(ctrls_mean - dss_mean) + pvalue=stats.mannwhitneyu(ctrls_freq, dss_freq, alternative='two-sided') + row.append(round(delta_diff, 3)) +@@ -388,12 +388,12 @@ + row += ['-', '-'] + else: + row += ['-', '-', '-'] +- row_a = map(tuple_replace, row) +- row_b = map(tuple_replace_bis, row) ++ row_a = list(map(tuple_replace, row)) ++ row_b = list(map(tuple_replace_bis, row)) + if pvalue_correction != 0 and only_significants == 'yes': + only_sig(row_a,row_b) + else: + row_b = row_b[0].split('_') + row_b[2:] + row_b.insert(2, 'A.to.G') +- print '\t'.join(map(str,row_b)) ++ print('\t'.join(map(str,row_b))) + +diff -ru a/accessory/GFFtoTabix.py b/accessory/GFFtoTabix.py +--- a/accessory/GFFtoTabix.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/GFFtoTabix.py 2020-07-16 16:17:22.264013625 -0500 +@@ -31,7 +31,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python GFFtoTabix.py [options] + Options: + -i GFF file +@@ -41,7 +41,7 @@ + -u Save an uncompressed GFF copy (add _copy suffix) + -h Print this help + +-""" ++""") + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:Sb:t:hu",["help"]) +@@ -49,7 +49,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/accessory/rediportal2recoding.py b/accessory/rediportal2recoding.py +--- a/accessory/rediportal2recoding.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/rediportal2recoding.py 2020-07-16 16:17:22.709013604 -0500 +@@ -41,4 +41,4 @@ + gff_row = line[0] + '\t'+ valore + '\t' + 'ed' + '\t' + line[1] + \ + '\t' + line[1] + '\t' + '.' + '\t' + line[4] + '\t' + '.' + '\t' + \ + 'gene_id' + ' ' + '"ed_%s";' %(i) + ' ' + 'transcript_id' + ' ' + '"ed_%s";' %(i) +- print gff_row ++ print(gff_row) +diff -ru a/accessory/SearchInTable.py b/accessory/SearchInTable.py +--- a/accessory/SearchInTable.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/SearchInTable.py 2020-07-16 16:17:22.309013623 -0500 +@@ -25,7 +25,7 @@ + #pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python SearchInTable.py [options] + Options: + -i Sorted table file (first col=reference; second col=coordinate 1 based) +@@ -42,7 +42,7 @@ + -o Save found/not found positions on file + -h Print this help + +-""" ++""") + #-k skip first INT lines [0] + + try: +@@ -51,7 +51,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/accessory/selectPositions.py b/accessory/selectPositions.py +--- a/accessory/selectPositions.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/selectPositions.py 2020-07-16 16:17:22.833013598 -0500 +@@ -25,7 +25,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python selectPositions.py [options] + Options: + -i Table file from REDItools +@@ -44,7 +44,7 @@ + -o Save selected positions on outTable_%s + -h Print this help + +-"""%(pid) ++"""%(pid)) + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:c:C:v:s:f:F:euo:hrd:RV:",["help"]) +@@ -52,7 +52,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/accessory/SortGFF.py b/accessory/SortGFF.py +--- a/accessory/SortGFF.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/SortGFF.py 2020-07-16 16:17:22.357013620 -0500 +@@ -35,7 +35,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python SortGFF.py [options] + Options: + -i GFF file +@@ -44,7 +44,7 @@ + -t Temporary directory to use (multiple -t may be used) + -h Print this help + +-"""%(pid) ++"""%(pid)) + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:o:b:t:h",["help"]) +@@ -52,7 +52,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/accessory/SortTable.py b/accessory/SortTable.py +--- a/accessory/SortTable.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/SortTable.py 2020-07-16 16:17:22.411013618 -0500 +@@ -35,7 +35,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python SortTable.py [options] + Options: + -i Table file +@@ -50,7 +50,7 @@ + -t Temporary directory to use (multiple -t may be used) + -h Print this help + +-"""%(pid) ++"""%(pid)) + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:o:b:t:hd:s:c:e:m:O",["help"]) +@@ -58,7 +58,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/accessory/subCount2.py b/accessory/subCount2.py +--- a/accessory/subCount2.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/subCount2.py 2020-07-16 16:17:22.854013597 -0500 +@@ -28,5 +28,5 @@ + for i in s: + try: v=(s[i]/float(all))*100 + except: v=0.0 +- print i,s[i],all,v ++ print(i,s[i],all,v) + +diff -ru a/accessory/subCount.py b/accessory/subCount.py +--- a/accessory/subCount.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/subCount.py 2020-07-16 16:17:22.844013597 -0500 +@@ -30,5 +30,5 @@ + for i in s: + try: v=(s[i]/float(all))*100 + except: v=0.0 +- print i,s[i],all,v ++ print(i,s[i],all,v) + +diff -ru a/accessory/TableToGFF.py b/accessory/TableToGFF.py +--- a/accessory/TableToGFF.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/TableToGFF.py 2020-07-16 16:17:22.474013615 -0500 +@@ -29,7 +29,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python TableToGFF.py [options] + Options: + -i Table file from REDItools +@@ -40,7 +40,7 @@ + -o Outfile [outTable_%s.gff] + -h Print this help + +-"""%(pid) ++"""%(pid)) + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:o:sthT:b:",["help"]) +@@ -48,7 +48,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/accessory/tableToTabix.py b/accessory/tableToTabix.py +--- a/accessory/tableToTabix.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/accessory/tableToTabix.py 2020-07-16 16:17:22.913013594 -0500 +@@ -31,7 +31,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python tableToTabix.py [options] + Options: + -i TAB-delimited file +@@ -46,7 +46,7 @@ + -u Save an uncompressed GFF copy (add _copy suffix) + -h Print this help + +-""" ++""") + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:Sb:t:hus:c:e:m:0",["help"]) +@@ -54,7 +54,7 @@ + usage() + sys.exit(2) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +diff -ru a/main/REDItoolDenovo.py b/main/REDItoolDenovo.py +--- a/main/REDItoolDenovo.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/main/REDItoolDenovo.py 2020-07-16 16:17:21.265013672 -0500 +@@ -75,7 +75,7 @@ + return 1 + numerator = 1 + denominator = 1 +- for i in xrange(s+1, population + 1): ++ for i in range(s+1, population + 1): + numerator *= i + denominator *= (i - s) + return numerator/denominator +@@ -275,7 +275,7 @@ + try: import pysam + except: sys.exit('Pysam module not found.') + from multiprocessing import Process, Queue +-from Queue import Empty ++from queue import Empty + try: + from fisher import pvalue + exfisher=1 +@@ -292,7 +292,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python REDItoolDenovo.py [options] + Options: + -i BAM file +@@ -343,12 +343,12 @@ + - For Tophat2 use 50 + - For GSNAP use 30 + +-"""%(pid) ++"""%(pid)) + + try: + opts, args = getopt.getopt(sys.argv[1:], "b:f:k:t:o:c:q:m:O:s:edpuT:B:v:n:EP:r:hHa:i:lIU:V:w:XG:K:F:g:x:W") + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +@@ -474,7 +474,7 @@ + annfile=a + uann=1 + else: +- print o ++ print(o) + assert False, "Unhandled Option" + + ####### +@@ -532,7 +532,7 @@ + return False + try: + os.kill(pid, 0) +- except OSError, e: ++ except OSError as e: + return e.errno == errno.EPERM + else: + return True +@@ -637,9 +637,9 @@ + subs=[] + subv=[] + for i in seq.upper(): +- if b.has_key(i): b[i]+=1 ++ if i in b: b[i]+=1 + for i in b: +- if not b.has_key(ref): continue ++ if ref not in b: continue + if b[i]!=0 and i!=ref: + vv=float(b[i])/(b[i]+b[ref]) + subv.append((b[i],vv,ref+i)) +@@ -690,7 +690,7 @@ + a={'A':'T','T':'A','C':'G','G':'C'} + ss='' + for i in s.upper(): +- if a.has_key(i): ss+=a[i] ++ if i in a: ss+=a[i] + else: ss+='N' + return ss + +@@ -777,7 +777,7 @@ + frefs.append(l[0]) + fidxinfo.close() + rnof=[] +-for i in rrefs.keys(): ++for i in list(rrefs.keys()): + if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i)) + ##################### + +@@ -817,8 +817,8 @@ + #mainbam=pysam.Samfile(bamfile,"rb") + #regions=mainbam.references + #mainbam.close() +-dicregions=dict(rrefs.items()) +-chrs=[x for x in dicregions.keys() if x not in nochrs] ++dicregions=dict(list(rrefs.items())) ++chrs=[x for x in list(dicregions.keys()) if x not in nochrs] + sys.stderr.write('Analysis on %i regions.\n' %(len(chrs))) + + if infolder!='': outfolder=os.path.join(outfolder_,'denovo_%s_%s' %(infolder,pid)) +@@ -906,7 +906,7 @@ + if pileupread.alignment.has_tag('SA'): continue + #s,q,t,qq=pileupread.alignment.seq[pileupread.qpos].upper(),ord(pileupread.alignment.qual[pileupread.qpos])-QVAL,'*',pileupread.alignment.qual[pileupread.qpos] + # escludi posizioni introniche nei pressi di splice sites +- if exss and di.has_key(pileupcolumn.reference_pos+1): continue #MOD ++ if exss and pileupcolumn.reference_pos+1 in di: continue #MOD + # multiple hit + if exh: #MOD + if pileupread.alignment.is_secondary: continue #MOD +@@ -998,7 +998,7 @@ + elif pileupread.alignment.is_read2: rt=2 + else: rt=0 + rname=pileupread.alignment.query_name+'_%i'%(rt) #MOD +- if d.has_key(rname): blatc+='0' #continue ++ if rname in d: blatc+='0' #continue + else: blatc+='1' + # se la base e' diversa dal reference + # se in regione omopolimerica scarta +@@ -1032,7 +1032,7 @@ + if not custsub: + ni='ACGT' + for b in range(4): +- if dsubs.has_key(ref.upper()+ni[b]): ++ if ref.upper()+ni[b] in dsubs: + dsubs[ref.upper()+ni[b]]+=bcomp[b] + if expos: + if chr in extabix.contigs: +diff -ru a/main/REDItoolDnaRna.py b/main/REDItoolDnaRna.py +--- a/main/REDItoolDnaRna.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/main/REDItoolDnaRna.py 2020-07-16 16:17:21.757013649 -0500 +@@ -23,7 +23,7 @@ + try: import pysam + except: sys.exit('Pysam module not found.') + from multiprocessing import Process, Queue +-from Queue import Empty ++from queue import Empty + import gzip + + pysamVersion=pysam.__version__ +@@ -34,7 +34,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python REDItoolDnaRNA.py [options] + Options: + -i RNA-Seq BAM file +@@ -103,7 +103,7 @@ + - For Tophat2 use 50 + - For GSNAP use 30 + +-"""%(pid) ++"""%(pid)) + + #option --fastq Fastq to get reads [requires --reads], separated by comma [if paired] NOT IMPLEMENTED + #option --rmOver Remove overlapping reads NOT IMPLEMENTED +@@ -111,7 +111,7 @@ + try: + opts, args = getopt.getopt(sys.argv[1:], "i:f:k:t:o:c:q:m:O:s:edpuA:a:B:b:lLv:n:EPr:hHIXG:K:j:C:JDUzw:N:ZW:RVMT:F:x:g:SY:",["help","gzip","reads","addP","rmIndels"]) + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +@@ -325,7 +325,7 @@ + params.append('Analysis ID: %s\n' %(pid)) + params.append('Analysis time: %s\n' %(script_time)) + params.append('-i --> RNA-Seq BAM file: %s\n' %(bamfile)) +-params.append('-j --> DNA-Seq BAM file(s): %s\n' %(','.join(dgbamfile.keys()))) ++params.append('-j --> DNA-Seq BAM file(s): %s\n' %(','.join(list(dgbamfile.keys())))) + params.append('-I --> Sort RNA-Seq BAM file: %i\n' %(sortbam)) + params.append('-J --> Sort DNA-Seq BAM file: %i\n' %(sortgbam)) + params.append('-f --> Reference file: %s\n' %(fastafile)) +@@ -483,9 +483,9 @@ + subs=[] + subv=[] + for i in seq.upper(): +- if b.has_key(i): b[i]+=1 ++ if i in b: b[i]+=1 + for i in b: +- if not b.has_key(ref): continue ++ if ref not in b: continue + if b[i]!=0 and i!=ref: + vv=float(b[i])/(b[i]+b[ref]) + subv.append((b[i],vv,ref+i)) +@@ -537,7 +537,7 @@ + a={'A':'T','T':'A','C':'G','G':'C'} + ss='' + for i in s.upper(): +- if a.has_key(i): ss+=a[i] ++ if i in a: ss+=a[i] + elif i==' ': ss+=' ' + elif i=='-': ss+='-' + else: ss+='N' +@@ -682,7 +682,7 @@ + if l.count(i[0])==2: + s='=' + if i[1]!=i[2]: s='!' +- if us.has_key(i[0]): us[i[0]].append((x,s)) ++ if i[0] in us: us[i[0]].append((x,s)) + else: us[i[0]]=[(x,s)] + x+=1 + for i in us: +@@ -753,7 +753,7 @@ + pysam.faidx(fastafile) + ########################################################### + # Check reference for name consistency +-grefs=dgdic.keys() ++grefs=list(dgdic.keys()) + rrefs={} + ridxinfo=pysam.idxstats(bamfile) + for j in ridxinfo.split('\n'): #MOD +@@ -769,7 +769,7 @@ + fidxinfo.close() + # in rna-seq + rnof=[] +-for i in rrefs.keys(): ++for i in list(rrefs.keys()): + if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i)) + if len(gbamfile)!=0: + for i in grefs: +@@ -833,9 +833,9 @@ + #regions=mainbam.references + #regionslens=mainbam.lengths + #mainbam.close() +-dicregions=dict(rrefs.items()) ++dicregions=dict(list(rrefs.items())) + #dicregions=dict([(regions[x],regionslens[x]) for x in range(len(regions))]) +-chrs=[x for x in dicregions.keys() if x not in nochrs] ++chrs=[x for x in list(dicregions.keys()) if x not in nochrs] + if fworkR: sys.stderr.write('Analysis on region %s:%i-%i.\n' %(workR[0],workR[1][0],workR[1][1])) + else: sys.stderr.write('Analysis on %i regions.\n' %(len(chrs))) + ########################################################### +@@ -883,7 +883,7 @@ + isgbam=1 + inputs=myinput.split('$') + chr,bamfile,start_region,lenregion,suff_=inputs[0],inputs[1],int(inputs[2]),int(inputs[3]),inputs[4] +- if not dgdic.has_key(chr): isgbam=0 ++ if chr not in dgdic: isgbam=0 + outfile=os.path.join(outfolder,'table_%s'%(suff_)) + if slist: + if gziptab: outrna=gzip.open(os.path.join(outfolder,'pileupRNA_%s.gz'%(suff)),'wb') +@@ -1024,7 +1024,7 @@ + if pileupread.alignment.is_read1: rt=1 + elif pileupread.alignment.is_read2: rt=2 + rname=pileupread.alignment.query_name+'_%i'%(rt) +- if gd.has_key(rname): gblatc+='0' #continue ++ if rname in gd: gblatc+='0' #continue + else: gblatc+='1' + # se la base e' diversa dal reference + # se in regione omopolimerica scarta +@@ -1079,7 +1079,7 @@ + #s,q,t,qq=pileupread.alignment.query_sequence[pileupread.query_position].upper(),pileupread.alignment.query_qualities[pileupread.query_position],'*',pileupread.alignment.qual[pileupread.query_position] + #s,q,t,qq=pileupread.alignment.seq[pileupread.qpos].upper(),ord(pileupread.alignment.qual[pileupread.qpos])-QVAL,'*',pileupread.alignment.qual[pileupread.qpos] + # escludi posizioni introniche nei pressi di splice sites +- if exss and di.has_key(pileupcolumn.reference_pos+1): continue ++ if exss and pileupcolumn.reference_pos+1 in di: continue + # multiple hit + if exh: + if pileupread.alignment.is_secondary: continue +@@ -1179,7 +1179,7 @@ + elif pileupread.alignment.is_read2: rt=2 + else: rt=0 + rname=pileupread.alignment.query_name+'_%i'%(rt) +- if d.has_key(rname): blatc+='0' #continue ++ if rname in d: blatc+='0' #continue + else: blatc+='1' + # se la base e' diversa dal reference + # se in regione omopolimerica scarta +@@ -1213,7 +1213,7 @@ + else: addpos=(pileupread.alignment.query_name,'-',pileupread.alignment.reference_name,'-',pileupread.alignment.reference_start,pileupread.alignment.reference_end,0 , 0) + rqname_comp=rqname+'$'+pileupread.alignment.reference_name+'$'+str(pileupcolumn.reference_pos+1) + #addpos=(chr+'_'+str(pileupcolumn.reference_pos+1),pileupcolumn.reference_pos+1) +- if not grdb.has_key(rqname): ++ if rqname not in grdb: + #print rqname reference_start + outreads.write('>'+rqname_comp+'\n'+rseqname+'\n') + #grdb[rqname]=[addpos] +@@ -1221,7 +1221,7 @@ + # if addpos not in grdb[rqname]: + # grdb[rqname].append(addpos) + if addP: +- if not grdb2.has_key(rname): grdb2[rname]=addpos ++ if rname not in grdb2: grdb2[rname]=addpos + if seq.strip()!='': + #print seq,qual,squal + if rmIndel: +@@ -1268,7 +1268,7 @@ + if exinv and subs=='-': continue + if not checkSubs(subs): continue + #print out rna-seq info + dna-seq +- if gdic.has_key(pileupcolumn.reference_pos): # abbiamo l'informazione genomica ++ if pileupcolumn.reference_pos in gdic: # abbiamo l'informazione genomica + if exnonh and not gdic[pileupcolumn.reference_pos][1]: continue + if mystrand=='0': + gdic[pileupcolumn.reference_pos][0][2]=comp2(eval(gdic[pileupcolumn.reference_pos][0][2])) +diff -ru a/main/REDItoolKnown.py b/main/REDItoolKnown.py +--- a/main/REDItoolKnown.py 2020-07-16 16:16:59.360014702 -0500 ++++ b/main/REDItoolKnown.py 2020-07-16 16:17:22.063013634 -0500 +@@ -23,7 +23,7 @@ + try: import pysam + except: sys.exit('Pysam module not found.') + from multiprocessing import Process, Queue +-from Queue import Empty ++from queue import Empty + + pysamVersion=pysam.__version__ + +@@ -34,7 +34,7 @@ + pid=str(os.getpid()+random.randint(0,999999999)) + + def usage(): +- print """ ++ print(""" + USAGE: python REDItoolKnown.py [options] + Options: + -i BAM file +@@ -82,12 +82,12 @@ + - For Tophat2 use 50 + - For GSNAP use 30 + +-"""%(pid) ++"""%(pid)) + + try: + opts, args = getopt.getopt(sys.argv[1:], "i:f:k:t:o:c:q:m:O:s:edpuT:B:Sv:n:EP:r:hHIXG:K:l:C:F:x:g:U") + except getopt.GetoptError as err: +- print str(err) # will print something like "option -a not recognized" ++ print(str(err)) # will print something like "option -a not recognized" + usage() + sys.exit(2) + +@@ -257,7 +257,7 @@ + return False + try: + os.kill(pid, 0) +- except OSError, e: ++ except OSError as e: + return e.errno == errno.EPERM + else: + return True +@@ -361,9 +361,9 @@ + subs=[] + subv=[] + for i in seq.upper(): +- if b.has_key(i): b[i]+=1 ++ if i in b: b[i]+=1 + for i in b: +- if not b.has_key(ref): continue ++ if ref not in b: continue + if b[i]!=0 and i!=ref: + vv=float(b[i])/(b[i]+b[ref]) + subv.append((b[i],vv,ref+i)) +@@ -414,7 +414,7 @@ + a={'A':'T','T':'A','C':'G','G':'C'} + ss='' + for i in s.upper(): +- if a.has_key(i): ss+=a[i] ++ if i in a: ss+=a[i] + else: ss+='N' + return ss + +@@ -524,7 +524,7 @@ + fidxinfo.close() + # in rna-seq + rnof=[] +-for i in rrefs.keys(): ++for i in list(rrefs.keys()): + if i not in frefs: sys.stderr.write('WARNING: Region %s in RNA-Seq not found in reference file.\n' %(i)) + ################################## + +@@ -568,8 +568,8 @@ + #mainbam.close() + #dicregions=dict([(regions[x],regionslens[x]) for x in range(len(regions))]) + #chrs=[x for x in regions if x not in nochrs] +-dicregions=dict(rrefs.items()) +-chrs=[x for x in dicregions.keys() if x not in nochrs] ++dicregions=dict(list(rrefs.items())) ++chrs=[x for x in list(dicregions.keys()) if x not in nochrs] + sys.stderr.write('Analysis on %i regions.\n' %(len(chrs))) + + if infolder!='': outfolder=os.path.join(outfolder_,'known_%s_%s' %(infolder,pid)) +@@ -654,7 +654,7 @@ + # else explore bam to find exact positions + for pileupcolumn in bam.pileup(chr,startk,endk,stepper='nofilter', max_depth=MAX_DEPTH): + if not startk<=pileupcolumn.reference_pos<=endk: continue +- if not kdic.has_key(pileupcolumn.reference_pos+1): continue ++ if pileupcolumn.reference_pos+1 not in kdic: continue + ref=fasta.fetch(chr,pileupcolumn.reference_pos,pileupcolumn.reference_pos+1).upper() + seq,qual,strand,squal,blatc='',0,'',[],'' + if rmsh: +@@ -668,7 +668,7 @@ + if pileupread.alignment.is_supplementary: continue + if pileupread.alignment.has_tag('SA'): continue + # escludi posizioni introniche nei pressi di splice sites +- if exss and di.has_key(pileupcolumn.reference_pos+1): continue ++ if exss and pileupcolumn.reference_pos+1 in di: continue + # multiple hit + if exh: + if pileupread.alignment.is_secondary: continue +@@ -754,7 +754,7 @@ + elif pileupread.alignment.is_read2: rt=2 + else: rt=0 + rname=pileupread.alignment.query_name+'_%i'%(rt) +- if d.has_key(rname): blatc+='0' #continue ++ if rname in d: blatc+='0' #continue + else: blatc+='1' + # se la base e' diversa dal reference + # se in regione omopolimerica scarta diff --git a/var/spack/repos/builtin/packages/reditools/setup.py.patch b/var/spack/repos/builtin/packages/reditools/setup.py.patch new file mode 100644 index 0000000000..f65345ad92 --- /dev/null +++ b/var/spack/repos/builtin/packages/reditools/setup.py.patch @@ -0,0 +1,11 @@ +--- a/setup.py 2020-07-16 14:01:48.601449013 -0500 ++++ b/setup.py 2020-07-16 14:02:31.547441668 -0500 +@@ -33,7 +33,7 @@ + 'Operating System :: POSIX', + 'Programming Language :: Python', + ], +- long_description=open('README').read(), ++ long_description=open('README_1.md').read(), + platforms=['Linux','Unix','MacOS'] + ) + |