summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew W Elble <aweits@rit.edu>2020-09-05 12:50:03 -0400
committerGitHub <noreply@github.com>2020-09-05 11:50:03 -0500
commit8ad581e7b3f6e5d1dfb9b2b2e06408810a4d966d (patch)
treeb8d09d49b24959a3698e63b3b133a9fed19477c7
parentb494f5048988b3213144a3b504d803b3fd041864 (diff)
downloadspack-8ad581e7b3f6e5d1dfb9b2b2e06408810a4d966d.tar.gz
spack-8ad581e7b3f6e5d1dfb9b2b2e06408810a4d966d.tar.bz2
spack-8ad581e7b3f6e5d1dfb9b2b2e06408810a4d966d.tar.xz
spack-8ad581e7b3f6e5d1dfb9b2b2e06408810a4d966d.zip
new package: py-textblob (#18516)
* new package: py-textblob add variant to py-nltk to allow for data download/installation add dependencies to py-nltk so that bin/nltk works * add resources and resource generation script
-rw-r--r--var/spack/repos/builtin/packages/py-nltk/package.py657
-rw-r--r--var/spack/repos/builtin/packages/py-nltk/resourcegen.py32
-rw-r--r--var/spack/repos/builtin/packages/py-textblob/package.py21
3 files changed, 709 insertions, 1 deletions
diff --git a/var/spack/repos/builtin/packages/py-nltk/package.py b/var/spack/repos/builtin/packages/py-nltk/package.py
index 9dbbcc2651..a19244e5fc 100644
--- a/var/spack/repos/builtin/packages/py-nltk/package.py
+++ b/var/spack/repos/builtin/packages/py-nltk/package.py
@@ -13,11 +13,666 @@ class PyNltk(PythonPackage):
version('3.5', sha256='845365449cd8c5f9731f7cb9f8bd6fd0767553b9d53af9eb1b3abf7700936b35')
+ variant('data', default=False, description='Download the NLTK data')
+
depends_on('python@3.5:', type=('build', 'run'))
- depends_on('py-setuptools', type='build')
+ depends_on('py-setuptools', type=('build', 'run'))
+ depends_on('py-joblib', type=('build', 'run'))
depends_on('py-click', type=('build', 'run'))
depends_on('py-regex', type=('build', 'run'))
depends_on('py-tqdm', type=('build', 'run'))
+ resource(name='perluniprops',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/misc/perluniprops.zip',
+ when='+data',
+ sha256='57d54f591c4ed299b3cdf348eecf774ab2858f19e66955352d94ae555e2050ef',
+ destination='nltk_data/misc',
+ placement='perluniprops')
+ resource(name='mwa_ppdb',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/misc/mwa_ppdb.zip',
+ when='+data',
+ sha256='65f70300d720a280eb19899b222c94a630be5e378f01a658cc0a4bb50fa50b41',
+ destination='nltk_data/misc',
+ placement='mwa_ppdb')
+ resource(name='punkt',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip',
+ when='+data',
+ sha256='9a74e3cc0057021b12984c07cc5e46cb746385cf90f49b7d6fe806fb71610144',
+ destination='nltk_data/tokenizers',
+ placement='punkt')
+ resource(name='rslp',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/rslp.zip',
+ when='+data',
+ sha256='f482f9666a2a76cdd4acab16b01a44b002550ebaac29906dbd5a1bbc281e4f8b',
+ destination='nltk_data/stemmers',
+ placement='rslp')
+ resource(name='porter_test',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/porter_test.zip',
+ when='+data',
+ sha256='7760e1ae3a7a975d0b67f8afd9a0a53a29f94da73508b525d1b6e08205924669',
+ destination='nltk_data/stemmers',
+ placement='porter_test')
+ resource(name='snowball_data',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/snowball_data.zip',
+ when='+data',
+ sha256='e8a05c19890f8651df2b958b0f6e318d4476b8a500e26ed63f89077aed0585a2',
+ destination='nltk_data/stemmers',
+ placement='snowball_data')
+ resource(name='maxent_ne_chunker',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/chunkers/maxent_ne_chunker.zip',
+ when='+data',
+ sha256='b7cdb936c551c06ef2cdc6227238c5ccc9c8c5259a11f99f4a937419d52af61b',
+ destination='nltk_data/chunkers',
+ placement='maxent_ne_chunker')
+ resource(name='moses_sample',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/moses_sample.zip',
+ when='+data',
+ sha256='0639dfa1d1939295d29c3d57478b1eb7767405dc916effe2cf6a90071943f7e8',
+ destination='nltk_data/models',
+ placement='moses_sample')
+ resource(name='bllip_wsj_no_aux',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/bllip_wsj_no_aux.zip',
+ when='+data',
+ sha256='e00339b708f23c24b5cf67ff3db5711dd4d80b21083f52787cf167bf77ac2126',
+ destination='nltk_data/models',
+ placement='bllip_wsj_no_aux')
+ resource(name='word2vec_sample',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip',
+ when='+data',
+ sha256='d29ff84a6ceca407f8578648568c55894dac34641ceb1fa02f920264fe326b43',
+ destination='nltk_data/models',
+ placement='word2vec_sample')
+ resource(name='wmt15_eval',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/wmt15_eval.zip',
+ when='+data',
+ sha256='56ea67e320f75be1abdee60b9d57aef1bd50324edd176e11c3c40f451043c80e',
+ destination='nltk_data/models',
+ placement='wmt15_eval')
+ resource(name='spanish_grammars',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/spanish_grammars.zip',
+ when='+data',
+ sha256='4207035d8795d37000c06391d97b068ae470a43db697d96473018f392552b742',
+ destination='nltk_data/grammars',
+ placement='spanish_grammars')
+ resource(name='sample_grammars',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/sample_grammars.zip',
+ when='+data',
+ sha256='8c3e4fecdc47ef1d262401eda08bde995cf4ed912a7934a32905263485240872',
+ destination='nltk_data/grammars',
+ placement='sample_grammars')
+ resource(name='large_grammars',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/large_grammars.zip',
+ when='+data',
+ sha256='5a81e5278757fafe6e8f19b16f6e4363783635ee332c5c238a30e190f735da59',
+ destination='nltk_data/grammars',
+ placement='large_grammars')
+ resource(name='book_grammars',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/book_grammars.zip',
+ when='+data',
+ sha256='cc63b32d680888c04b3c332218d645a9f9db8571ffe7229808391c889796ffbd',
+ destination='nltk_data/grammars',
+ placement='book_grammars')
+ resource(name='basque_grammars',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/basque_grammars.zip',
+ when='+data',
+ sha256='40ec8a0e92079f32a6900189e8551909506e727b19652f28641fcd825a374ec7',
+ destination='nltk_data/grammars',
+ placement='basque_grammars')
+ resource(name='maxent_treebank_pos_tagger',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/maxent_treebank_pos_tagger.zip',
+ when='+data',
+ sha256='6ba605d803ad5e9aeb604dc9c82573afd44e9c9ad1f228788eb05ddd88ef0b24',
+ destination='nltk_data/taggers',
+ placement='maxent_treebank_pos_tagger')
+ resource(name='averaged_perceptron_tagger',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip',
+ when='+data',
+ sha256='e1f13cf2532daadfd6f3bc481a49859f0b8ea6432ccdcd83e6a49a5f19008de9',
+ destination='nltk_data/taggers',
+ placement='averaged_perceptron_tagger')
+ resource(name='averaged_perceptron_tagger_ru',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_ru.zip',
+ when='+data',
+ sha256='82a4ec6fd815dcee0fe6e150aed8fefa0ae501eba6e62b94fafbfc089af8954b',
+ destination='nltk_data/taggers',
+ placement='averaged_perceptron_tagger_ru')
+ resource(name='universal_tagset',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/universal_tagset.zip',
+ when='+data',
+ sha256='fb3b295a7b636d3f50e2bd3f9bd4c84eb99eaf36ff475ea406bdecd247f8f962',
+ destination='nltk_data/taggers',
+ placement='universal_tagset')
+ resource(name='vader_lexicon',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/sentiment/vader_lexicon.zip',
+ when='+data',
+ sha256='8adba4294eef3964d820bf655e37e61bdc3a341994356af59b74fb3b4a36ce5c',
+ destination='nltk_data/sentiment',
+ placement='vader_lexicon')
+ resource(name='lin_thesaurus',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/lin_thesaurus.zip',
+ when='+data',
+ sha256='04ebd29f0ad826700241b608f739bb8b9098c8de998f4a903535de5c3240c0a9',
+ destination='nltk_data/corpora',
+ placement='lin_thesaurus')
+ resource(name='movie_reviews',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/movie_reviews.zip',
+ when='+data',
+ sha256='a41211ae685019137410268134db6a1a14428c89b671eb83056151a878539008',
+ destination='nltk_data/corpora',
+ placement='movie_reviews')
+ resource(name='problem_reports',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/problem_reports.zip',
+ when='+data',
+ sha256='f9e691dcf5eed49827d892b1fc9eb6d73ca2cfa3d5c555fed316990ea6d15c8a',
+ destination='nltk_data/corpora',
+ placement='problem_reports')
+ resource(name='pros_cons',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pros_cons.zip',
+ when='+data',
+ sha256='b5bca541ba5b2e614cde2213ddcca027416f6997067c90e45c173bf55c6fade8',
+ destination='nltk_data/corpora',
+ placement='pros_cons')
+ resource(name='masc_tagged',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/masc_tagged.zip',
+ when='+data',
+ sha256='678a5141cf3381bedb1839c58a330507337be07c7c71603279c0ef5337032304',
+ destination='nltk_data/corpora',
+ placement='masc_tagged')
+ resource(name='sentence_polarity',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sentence_polarity.zip',
+ when='+data',
+ sha256='6e1ed4405b65c7eabf1d199a7f7c437091ac21da0ea7467b410a74062574566b',
+ destination='nltk_data/corpora',
+ placement='sentence_polarity')
+ resource(name='webtext',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/webtext.zip',
+ when='+data',
+ sha256='9e32dbae4879464b8f420a0dc721855bb26167b720d7695588d2ca2aeadf501a',
+ destination='nltk_data/corpora',
+ placement='webtext')
+ resource(name='nps_chat',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nps_chat.zip',
+ when='+data',
+ sha256='a4433d5da5e62fdbede49efa572a53a0139fff1014ffbe86cb263e17cbb4a837',
+ destination='nltk_data/corpora',
+ placement='nps_chat')
+ resource(name='city_database',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/city_database.zip',
+ when='+data',
+ sha256='df142032cac15d388171d018531ba9038fd48293567901ad56b378a40e1f8dfe',
+ destination='nltk_data/corpora',
+ placement='city_database')
+ resource(name='europarl_raw',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/europarl_raw.zip',
+ when='+data',
+ sha256='ad553e177baac263840c10980e6f3e76d5d15f7f7a078bd98520b36edb69b27c',
+ destination='nltk_data/corpora',
+ placement='europarl_raw')
+ resource(name='biocreative_ppi',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/biocreative_ppi.zip',
+ when='+data',
+ sha256='d30fe4ac6e2b71a15376401de7cd5bde1252deb28d3d45920ab740281e78e74b',
+ destination='nltk_data/corpora',
+ placement='biocreative_ppi')
+ resource(name='verbnet3',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/verbnet3.zip',
+ when='+data',
+ sha256='fa0136a7699c52f0bd532dc5adc0914745aa4369a52ae1465cb11841060ec1de',
+ destination='nltk_data/corpora',
+ placement='verbnet3')
+ resource(name='pe08',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pe08.zip',
+ when='+data',
+ sha256='3a4aa7d07cf89afbc8894b9d2f68239ad8452d4e815ad4b3f5824f13425227dd',
+ destination='nltk_data/corpora',
+ placement='pe08')
+ resource(name='pil',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pil.zip',
+ when='+data',
+ sha256='0538ee1d94de616004fd2434cf03840dffab5507cf8b56725b6ef82b572deb76',
+ destination='nltk_data/corpora',
+ placement='pil')
+ resource(name='crubadan',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/crubadan.zip',
+ when='+data',
+ sha256='8d64c8ff52f47a44381cad0795cf7fe3f8ff7907a1f92c09aadef8e163efdbc7',
+ destination='nltk_data/corpora',
+ placement='crubadan')
+ resource(name='gutenberg',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/gutenberg.zip',
+ when='+data',
+ sha256='2d3c3ab548c653944310f37f536443ec85d0a0ad855fcae217a0c9efdce2d611',
+ destination='nltk_data/corpora',
+ placement='gutenberg')
+ resource(name='propbank',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/propbank.zip',
+ when='+data',
+ sha256='320eee3cd06a15b5daac578d494ae109dc2414d9ea941bf9cc514796b6b1547a',
+ destination='nltk_data/corpora',
+ placement='propbank')
+ resource(name='machado',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/machado.zip',
+ when='+data',
+ sha256='772463b1553c1b0ff1fc0360768b31f59b488f7a52d44cc92c3e31ca289acce9',
+ destination='nltk_data/corpora',
+ placement='machado')
+ resource(name='state_union',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/state_union.zip',
+ when='+data',
+ sha256='366c1dc82b2abf896f42b2ec50ba802a0141a29f75d29ca48a7a243ce5bfbe8d',
+ destination='nltk_data/corpora',
+ placement='state_union')
+ resource(name='twitter_samples',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/twitter_samples.zip',
+ when='+data',
+ sha256='aac71c20e1e05003b7812321936c5635dfede61902aca2b94419a1124979c6dd',
+ destination='nltk_data/corpora',
+ placement='twitter_samples')
+ resource(name='semcor',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/semcor.zip',
+ when='+data',
+ sha256='126fa2e829ab63edd5b3fd9de45ef1d60d6880e01e25abc55b5ac7918a824655',
+ destination='nltk_data/corpora',
+ placement='semcor')
+ resource(name='names',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/names.zip',
+ when='+data',
+ sha256='0eec7e958b34982662b8f05824ae64642dea097b08057ade65c252191c5fe7ca',
+ destination='nltk_data/corpora',
+ placement='names')
+ resource(name='ptb',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ptb.zip',
+ when='+data',
+ sha256='f73b6a584bc7907cdd694d0661655a2e76a82ca74dc9bdae757236918d416bf7',
+ destination='nltk_data/corpora',
+ placement='ptb')
+ resource(name='nombank.1.0',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nombank.1.0.zip',
+ when='+data',
+ sha256='eb7c4228bdaf6d528630db60f818e53dd69d4ef7a5722f7066a920c0c7d90c76',
+ destination='nltk_data/corpora',
+ placement='nombank.1.0')
+ resource(name='floresta',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/floresta.zip',
+ when='+data',
+ sha256='7675017f8b36cb85013b7a4171659fb55c427110e1e2fd4bcd92c4c771a14bfd',
+ destination='nltk_data/corpora',
+ placement='floresta')
+ resource(name='comtrans',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/comtrans.zip',
+ when='+data',
+ sha256='95a334f6bd910d2271d159bf53c5ce08516be3fa1cceb32521232c21dd2131f9',
+ destination='nltk_data/corpora',
+ placement='comtrans')
+ resource(name='knbc',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/knbc.zip',
+ when='+data',
+ sha256='88a7822a33d16418e88b2f95084396496953a1c1087bf3e233d3e1fec3f935e8',
+ destination='nltk_data/corpora',
+ placement='knbc')
+ resource(name='mac_morpho',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/mac_morpho.zip',
+ when='+data',
+ sha256='1c6138beba28b9c71edfd4b54991c5e1cf36a4d6b0ad8c66f8aa27c57b07547b',
+ destination='nltk_data/corpora',
+ placement='mac_morpho')
+ resource(name='swadesh',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/swadesh.zip',
+ when='+data',
+ sha256='0b69919501a098f25d2abad9edb84689e1ed44915ca1c65c7832d2bf9d1de3b9',
+ destination='nltk_data/corpora',
+ placement='swadesh')
+ resource(name='rte',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/rte.zip',
+ when='+data',
+ sha256='2f806ead4d53171601254747c3b7c97d758e63a6ef54e3c010a6d62885ab214a',
+ destination='nltk_data/corpora',
+ placement='rte')
+ resource(name='toolbox',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/toolbox.zip',
+ when='+data',
+ sha256='f57d06b30360c5f52cc05c29e75b083eb23981416cce718206c80da0e931592e',
+ destination='nltk_data/corpora',
+ placement='toolbox')
+ resource(name='jeita',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/jeita.zip',
+ when='+data',
+ sha256='4415bd6365628be5eeb80fe7aefe2b9161ef6cfc4d604d101feec6b59aedcbfd',
+ destination='nltk_data/corpora',
+ placement='jeita')
+ resource(name='product_reviews_1',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/product_reviews_1.zip',
+ when='+data',
+ sha256='627bfb0bb7c87586246d99b4402c3d7e4fb77ac14559d8695c283bd6850615ac',
+ destination='nltk_data/corpora',
+ placement='product_reviews_1')
+ resource(name='omw',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/omw.zip',
+ when='+data',
+ sha256='e2cd473805b480b5448ae3f2c3e824978f2528dc1a95a14fe3072777a2f12519',
+ destination='nltk_data/corpora',
+ placement='omw')
+ resource(name='sentiwordnet',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sentiwordnet.zip',
+ when='+data',
+ sha256='b66876a17aaeb4c7c7c8d2f5bb2cf91fde16e1b76e2421e5480fedd17ad248c1',
+ destination='nltk_data/corpora',
+ placement='sentiwordnet')
+ resource(name='product_reviews_2',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/product_reviews_2.zip',
+ when='+data',
+ sha256='272b08fe130882e5867aa7ecc69a65616099183c4ccc10374a62c271801b0bc1',
+ destination='nltk_data/corpora',
+ placement='product_reviews_2')
+ resource(name='abc',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/abc.zip',
+ when='+data',
+ sha256='129bb6001beb828049a90a59b7dd3c2f0594a47012e48fc5177dfae38e658565',
+ destination='nltk_data/corpora',
+ placement='abc')
+ resource(name='udhr2',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/udhr2.zip',
+ when='+data',
+ sha256='0796c314b09a930c989c6f9d93d226af9af13feccd88496e196c743dd266c7f3',
+ destination='nltk_data/corpora',
+ placement='udhr2')
+ resource(name='senseval',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/senseval.zip',
+ when='+data',
+ sha256='fbcb658b562969e47a19a45e04c452d874755d157db936d815ca391ca88bfdea',
+ destination='nltk_data/corpora',
+ placement='senseval')
+ resource(name='words',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/words.zip',
+ when='+data',
+ sha256='54ed02917d6771dcc3e8141218960d020947f7f2ccfd9ac9b320979349746015',
+ destination='nltk_data/corpora',
+ placement='words')
+ resource(name='framenet_v15',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/framenet_v15.zip',
+ when='+data',
+ sha256='ea723e8575f1d7eeb0b39e7cd14a4d608f24adec4496800bfea3bdff82ffdcc8',
+ destination='nltk_data/corpora',
+ placement='framenet_v15')
+ resource(name='unicode_samples',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/unicode_samples.zip',
+ when='+data',
+ sha256='9f8e483e02aa29319648c794942ccd4b13c1029322907138b6fa662315e2d845',
+ destination='nltk_data/corpora',
+ placement='unicode_samples')
+ resource(name='kimmo',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/kimmo.zip',
+ when='+data',
+ sha256='5be9a891a08ac48914cccf8f98f3469c1e76e8d3aae16243220839e8c3fe16f4',
+ destination='nltk_data/corpora',
+ placement='kimmo')
+ resource(name='framenet_v17',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/framenet_v17.zip',
+ when='+data',
+ sha256='22f6aad6fb799ba4dbed0440714e1118442ad7d7345351de37428581284f471c',
+ destination='nltk_data/corpora',
+ placement='framenet_v17')
+ resource(name='chat80',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/chat80.zip',
+ when='+data',
+ sha256='6147451ba5bef268044e3fba446b5988da757fc2ed18d951d38d4eec864c66c0',
+ destination='nltk_data/corpora',
+ placement='chat80')
+ resource(name='qc',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/qc.zip',
+ when='+data',
+ sha256='091fb01e50883014d150acb7d5013d787136968b3f955ae01725a65e7e80f304',
+ destination='nltk_data/corpora',
+ placement='qc')
+ resource(name='inaugural',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/inaugural.zip',
+ when='+data',
+ sha256='a6c099af7f5b5ad2a03f1e4ea3f5ff7699779b9d4327152110af462da210bd1f',
+ destination='nltk_data/corpora',
+ placement='inaugural')
+ resource(name='wordnet',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip',
+ when='+data',
+ sha256='cbda5ea6eef7f36a97a43d4a75f85e07fccbb4f23657d27b4ccbc93e2646ab59',
+ destination='nltk_data/corpora',
+ placement='wordnet')
+ resource(name='stopwords',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip',
+ when='+data',
+ sha256='3fc8d3d4c6e3d5ba6e23a66920dd3fde611cc3edf6e1fd80159a7965f47bea09',
+ destination='nltk_data/corpora',
+ placement='stopwords')
+ resource(name='verbnet',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/verbnet.zip',
+ when='+data',
+ sha256='6bc3620a6dc1c50aec46a97e5ddb51e64c015b9f7d37246805c5f8acfd6d172d',
+ destination='nltk_data/corpora',
+ placement='verbnet')
+ resource(name='shakespeare',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/shakespeare.zip',
+ when='+data',
+ sha256='f1251d8c254710363254ba29c9dc0888d5cb13d5ac736ebc6fb14380f447cfc3',
+ destination='nltk_data/corpora',
+ placement='shakespeare')
+ resource(name='ycoe',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ycoe.zip',
+ when='+data',
+ sha256='e402fa937d6a0b4603495e79f91af02c3f192977e6f15cc5ed5962b5d3673d9a',
+ destination='nltk_data/corpora',
+ placement='ycoe')
+ resource(name='ieer',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ieer.zip',
+ when='+data',
+ sha256='1f63b08ed212c1d52545307838d183c79e02fd09cc8c5a48542f82c61c078b5d',
+ destination='nltk_data/corpora',
+ placement='ieer')
+ resource(name='cess_cat',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cess_cat.zip',
+ when='+data',
+ sha256='c5b42b363365bfaa9a0616e448eb50da9668d2f5b6d1ff9d12b5c28ae09543cb',
+ destination='nltk_data/corpora',
+ placement='cess_cat')
+ resource(name='switchboard',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/switchboard.zip',
+ when='+data',
+ sha256='6a1a22b659e2fe616129addab0e7967335e67c7dae6a6e63be10778dd0455d06',
+ destination='nltk_data/corpora',
+ placement='switchboard')
+ resource(name='comparative_sentences',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/comparative_sentences.zip',
+ when='+data',
+ sha256='d076e1bab25c7c2a39e8850aefbb64a2188ebc5033bf21aeb656f4fab15f7f8b',
+ destination='nltk_data/corpora',
+ placement='comparative_sentences')
+ resource(name='subjectivity',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/subjectivity.zip',
+ when='+data',
+ sha256='741f3371e1a4375051b874fd82fd55857b90975473c91c19a3101cbe17fc4d8c',
+ destination='nltk_data/corpora',
+ placement='subjectivity')
+ resource(name='udhr',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/udhr.zip',
+ when='+data',
+ sha256='97e4c9dfa4a402f243d60b03d511afb04cf63f92f9ad1be9108b511448c329fa',
+ destination='nltk_data/corpora',
+ placement='udhr')
+ resource(name='pl196x',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pl196x.zip',
+ when='+data',
+ sha256='494a7ee616e13b0f798793a9af8da8445b3b83bc4aa3c6bb239967e6ce3cbbeb',
+ destination='nltk_data/corpora',
+ placement='pl196x')
+ resource(name='paradigms',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/paradigms.zip',
+ when='+data',
+ sha256='5875c44cd547b6a8fdde48f8f798fe45bcad7cb232a93ee5fae17fed130c9870',
+ destination='nltk_data/corpora',
+ placement='paradigms')
+ resource(name='gazetteers',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/gazetteers.zip',
+ when='+data',
+ sha256='3e4df6d5a03a3e4e109e488366e96e98d84f085b98d70f3dc11ecd6ce6ca48ab',
+ destination='nltk_data/corpora',
+ placement='gazetteers')
+ resource(name='timit',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/timit.zip',
+ when='+data',
+ sha256='666c6650fb054001e2e1d9aa9b1889fc46629a0081ced7049686c2a598326668',
+ destination='nltk_data/corpora',
+ placement='timit')
+ resource(name='treebank',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/treebank.zip',
+ when='+data',
+ sha256='9da92d76c3666cfb6cddeaed0f7e86b344cce0f0928a286d439e555f19c37399',
+ destination='nltk_data/corpora',
+ placement='treebank')
+ resource(name='sinica_treebank',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sinica_treebank.zip',
+ when='+data',
+ sha256='5506ddf646d5c3fb0a5fffdb53330ec8465f6468499f08d86f77d2df01d5b35e',
+ destination='nltk_data/corpora',
+ placement='sinica_treebank')
+ resource(name='opinion_lexicon',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/opinion_lexicon.zip',
+ when='+data',
+ sha256='7a5da68d53016c5d1fca38f7dd81844cff73466371f90968d1ef15c85b873193',
+ destination='nltk_data/corpora',
+ placement='opinion_lexicon')
+ resource(name='ppattach',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ppattach.zip',
+ when='+data',
+ sha256='ff27399cb353bc6a48ec7ed90f31e6f4c94f270662482b7db07ca0923adb5468',
+ destination='nltk_data/corpora',
+ placement='ppattach')
+ resource(name='dependency_treebank',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip',
+ when='+data',
+ sha256='0df483999f1391f32b141d6047d8ce19efd0a5a3e63ca019bfc4af8530f51fbd',
+ destination='nltk_data/corpora',
+ placement='dependency_treebank')
+ resource(name='reuters',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/reuters.zip',
+ when='+data',
+ sha256='9a59a43823f02a6e2777075c989a3dc454e4b6f68e0332ee3c0e8264075b62f5',
+ destination='nltk_data/corpora',
+ placement='reuters')
+ resource(name='genesis',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/genesis.zip',
+ when='+data',
+ sha256='0cac241f88d7999f81a45e26b1764b2d1f3b4d21654aa954e0d5349eb4784cd0',
+ destination='nltk_data/corpora',
+ placement='genesis')
+ resource(name='cess_esp',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cess_esp.zip',
+ when='+data',
+ sha256='ae5b12898039e51911ae16d25c4822cb92adcfc034a2e12b57676d21d3c94884',
+ destination='nltk_data/corpora',
+ placement='cess_esp')
+ resource(name='conll2007',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2007.zip',
+ when='+data',
+ sha256='b1e2865b31cdbc016a437c29dc3e190042ef2e237b21ba2a69082b7dc1c007ca',
+ destination='nltk_data/corpora',
+ placement='conll2007')
+ resource(name='nonbreaking_prefixes',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nonbreaking_prefixes.zip',
+ when='+data',
+ sha256='62dd9fe11b21d201ca26cf2351595512965d5fe064f9d6ce1873c6231b46d869',
+ destination='nltk_data/corpora',
+ placement='nonbreaking_prefixes')
+ resource(name='dolch',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dolch.zip',
+ when='+data',
+ sha256='e4a58e0f13809ac86bc819e245aeb60981ea4edcac7025509af99fa6b67305cd',
+ destination='nltk_data/corpora',
+ placement='dolch')
+ resource(name='smultron',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/smultron.zip',
+ when='+data',
+ sha256='6748fb331f7b06dd529617590277414a8d3b65291f68367d8b04615cf621702c',
+ destination='nltk_data/corpora',
+ placement='smultron')
+ resource(name='alpino',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/alpino.zip',
+ when='+data',
+ sha256='2e4551748dc81707b01d5adabb62c308ae5cb70fc526936310502431a1db96ef',
+ destination='nltk_data/corpora',
+ placement='alpino')
+ resource(name='wordnet_ic',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet_ic.zip',
+ when='+data',
+ sha256='a931b34bb9013ac3c1291f64c812fd039802995a2b1246b8f7525e82080110e3',
+ destination='nltk_data/corpora',
+ placement='wordnet_ic')
+ resource(name='brown',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip',
+ when='+data',
+ sha256='9b275f9b3b95d7bd66ccfb7cd259f445a13bbe5d1f4107aba09fd3e8364bafa6',
+ destination='nltk_data/corpora',
+ placement='brown')
+ resource(name='panlex_swadesh',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/panlex_swadesh.zip',
+ when='+data',
+ sha256='dc028da016ba7d5f9bcc39263b0c3dc27bd56025672b18ccaec4578833fe4dff',
+ destination='nltk_data/corpora',
+ placement='panlex_swadesh')
+ resource(name='conll2000',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2000.zip',
+ when='+data',
+ sha256='01e65164f268366e7caa0db92332a1955d081908c87016e2c7640c3c5279b7cd',
+ destination='nltk_data/corpora',
+ placement='conll2000')
+ resource(name='universal_treebanks_v20',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/universal_treebanks_v20.zip',
+ when='+data',
+ sha256='7132fdee74f85cb908558ffa3a6dac5c1f3762d4095a316990eb19a647421d8a',
+ destination='nltk_data/corpora',
+ placement='universal_treebanks_v20')
+ resource(name='brown_tei',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown_tei.zip',
+ when='+data',
+ sha256='335bec1ea6362751d5d5c46970137ebb01c80bf7d7d75558787729d275e0a687',
+ destination='nltk_data/corpora',
+ placement='brown_tei')
+ resource(name='cmudict',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cmudict.zip',
+ when='+data',
+ sha256='d07cca47fd72ad32ea9d8ad1219f85301eeaf4568f8b6b73747506a71fb5afd6',
+ destination='nltk_data/corpora',
+ placement='cmudict')
+ resource(name='mte_teip5',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/mte_teip5.zip',
+ when='+data',
+ sha256='2847497d2f8c42c510e82e7cde37537a2a1da7d6e458d879fb22f73f4eef6059',
+ destination='nltk_data/corpora',
+ placement='mte_teip5')
+ resource(name='indian',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/indian.zip',
+ when='+data',
+ sha256='6f5aff392fc953769b6ccb994bd70e33ec6f0226e93979470255fa97abf692f9',
+ destination='nltk_data/corpora',
+ placement='indian')
+ resource(name='conll2002',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2002.zip',
+ when='+data',
+ sha256='64440e49236d0d393e08e0b266284966d68e2d2a82a50cc41b8e96d98c03b5c8',
+ destination='nltk_data/corpora',
+ placement='conll2002')
+ resource(name='tagsets',
+ url='https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/help/tagsets.zip',
+ when='+data',
+ sha256='e44c8ffd7e8759064573e8d4ae837dbb4b15ec68b2ca02cdf6a513dab8b12ca4',
+ destination='nltk_data/help',
+ placement='tagsets')
+
+ def setup_run_environment(self, env):
+ if '+data' in self.spec:
+ env.prepend_path("NLTK_DATA", self.prefix.nltk_data)
+
+ @run_after('install')
+ def install_data(self):
+ if '+data' in self.spec:
+ install_tree('nltk_data', self.prefix.nltk_data)
+
# May require additional third-party software:
# https://github.com/nltk/nltk/wiki/Installing-Third-Party-Software
diff --git a/var/spack/repos/builtin/packages/py-nltk/resourcegen.py b/var/spack/repos/builtin/packages/py-nltk/resourcegen.py
new file mode 100644
index 0000000000..77042e70c0
--- /dev/null
+++ b/var/spack/repos/builtin/packages/py-nltk/resourcegen.py
@@ -0,0 +1,32 @@
+#!/bin/env python
+#
+# Helper script for maintainers to autogenerate resources for py-nltk
+#
+import urllib.request
+import xml.etree.ElementTree
+import hashlib
+url = 'https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml'
+document = urllib.request.urlopen(url).read()
+tree = xml.etree.ElementTree.fromstring(document)
+packages = tree.findall('./packages/package')
+for package in packages:
+ url = package.get('url')
+ name = package.get('id')
+ subdir = package.get('subdir')
+ packagebody = urllib.request.urlopen(url).read()
+ meta_checksum = package.get('checksum')
+ loaded_checksum = hashlib.md5(packagebody).hexdigest()
+ if (meta_checksum == loaded_checksum):
+ output_checksum = hashlib.sha256(packagebody).hexdigest()
+ print("""
+ resource(name='{0}',
+ url='{1}',
+ when='+data',
+ sha256='{2}',
+ destination='nltk_data/{3}',
+ placement='{0}')""".format(name,
+ url,
+ output_checksum,
+ subdir))
+ else:
+ print("""bad {0}""".format(url))
diff --git a/var/spack/repos/builtin/packages/py-textblob/package.py b/var/spack/repos/builtin/packages/py-textblob/package.py
new file mode 100644
index 0000000000..57784919e6
--- /dev/null
+++ b/var/spack/repos/builtin/packages/py-textblob/package.py
@@ -0,0 +1,21 @@
+# Copyright 2013-2020 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+
+class PyTextblob(PythonPackage):
+ """TextBlob is a Python (2 and 3) library for processing textual
+ data. It provides a simple API for diving into common natural
+ language processing (NLP) tasks such as part-of-speech tagging,
+ noun phrase extraction, sentiment analysis, classification,
+ translation, and more."""
+
+ homepage = "https://textblob.readthedocs.io/"
+ url = "https://github.com/sloria/TextBlob/archive/0.16.0.tar.gz"
+
+ version('0.16.0', sha256='bf29369f3260cc779b22b2b86337bcce0c8e929d994b1c8f0d39545ec2fb33aa')
+
+ depends_on('python@3:', type=('build', 'run'))
+ depends_on('py-setuptools', type='build')
+ depends_on('py-nltk@3.1:+data', type=('build', 'run'))