blob: 1a6e747b49f29f88cf923ff4b7a69aac65d915e6 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
#!/bin/env python
#
# Helper script for maintainers to autogenerate resources for py-nltk
#
import hashlib
import sys
import urllib.request
import xml.etree.ElementTree
from typing import Optional
url: Optional[str] = None
url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml"
if url is not None:
document = urllib.request.urlopen(url).read()
tree = xml.etree.ElementTree.fromstring(document)
packages = tree.findall("./packages/package")
for package in packages:
url = package.get("url")
name = package.get("id")
subdir = package.get("subdir")
if url is None:
continue
packagebody = urllib.request.urlopen(url).read()
meta_checksum = package.get("checksum")
loaded_checksum = hashlib.md5(packagebody).hexdigest()
if meta_checksum == loaded_checksum:
output_checksum = hashlib.sha256(packagebody).hexdigest()
print(
"""
resource(name='{0}',
url='{1}',
when='+data',
sha256='{2}',
destination='nltk_data/{3}',
placement='{0}')""".format(
name, url, output_checksum, subdir
)
)
else:
print("""bad {0}""".format(url))
else:
sys.exit(1)
|