From b5916451fdf36f98f23f7218072717983230d968 Mon Sep 17 00:00:00 2001 From: Glenn Johnson Date: Fri, 5 Mar 2021 15:19:15 -0600 Subject: Improve R package creation (#21861) * Improve R package creation This PR adds the `list_url` attribute to CRAN R packages when using `spack create`. It also adds the `git` attribute to R Bioconductor packages upon creation. * Switch over to using cran/bioc attributes The cran/bioc entries are set to have the '=' line up with homepage entry, but homepage does not need to exist in the package file. If it does not, that could affect the alignment. * Do not have to split bioc * Edit R package documentation Explain Bioconductor packages and add `cran` and `bioc` attributes. * Update lib/spack/docs/build_systems/rpackage.rst Co-authored-by: Adam J. Stewart * Update lib/spack/docs/build_systems/rpackage.rst Co-authored-by: Adam J. Stewart * Simplify the cran attribute The version can be faked so that the cran attribute is simply equal to the CRAN package name. * Edit the docs to reflect new `cran` attribute format * Use the first element of self.versions() for url Co-authored-by: Adam J. Stewart --- lib/spack/docs/build_systems/rpackage.rst | 71 +++++++++++++++++++++++++++++-- lib/spack/spack/build_systems/r.py | 36 ++++++++++++++++ lib/spack/spack/cmd/create.py | 24 ++++++++++- 3 files changed, 125 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/spack/docs/build_systems/rpackage.rst b/lib/spack/docs/build_systems/rpackage.rst index 486fd95b69..a375b5328c 100644 --- a/lib/spack/docs/build_systems/rpackage.rst +++ b/lib/spack/docs/build_systems/rpackage.rst @@ -79,12 +79,14 @@ Description The first thing you'll need to add to your new package is a description. The top of the homepage for ``caret`` lists the following description: - caret: Classification and Regression Training + Classification and Regression Training Misc functions for training and plotting classification and regression models. -You can either use the short description (first line), long description -(second line), or both depending on what you feel is most appropriate. +The first line is a short description (title) and the second line is a long +description. In this case the description is only one line but often the +description is several lines. Spack makes use of both short and long +descriptions and convention is to use both when creating an R package. ^^^^^^^^ Homepage @@ -124,6 +126,67 @@ If you only specify the URL for the latest release, your package will no longer be able to fetch that version as soon as a new release comes out. To get around this, add the archive directory as a ``list_url``. +^^^^^^^^^^^^^^^^^^^^^ +Bioconductor packages +^^^^^^^^^^^^^^^^^^^^^ + +Bioconductor packages are set up in a similar way to CRAN packages, but there +are some very important distinctions. Bioconductor packages can be found at: +https://bioconductor.org/. Bioconductor packages are R packages and so follow +the same packaging scheme as CRAN packages. What is different is that +Bioconductor itself is versioned and released. This scheme, using the +Bioconductor package installer, allows further specification of the minimum +version of R as well as further restrictions on the dependencies between +packages than what is possible with the native R packaging system. Spack can +not replicate these extra features and thus Bioconductor packages in Spack need +to be managed as a group during updates in order to maintain package +consistency with Bioconductor itself. + +Another key difference is that, while previous versions of packages are +available, they are not available from a site that can be programmatically set, +thus a ``list_url`` attribute can not be used. However, each package is also +available in a git repository, with branches corresponding to each Bioconductor +release. Thus, it is always possible to retrieve the version of any package +corresponding to a Bioconductor release simply by fetching the branch that +corresponds to the Bioconductor release of the package repository. For this +reason, spack Bioconductor R packages use the git repository, with the commit +of the respective branch used in the ``version()`` attribute of the package. + +^^^^^^^^^^^^^^^^^^^^^^^^ +cran and bioc attributes +^^^^^^^^^^^^^^^^^^^^^^^^ + +Much like the ``pypi`` attribute for python packages, due to the fact that R +packages are obtained from specific repositories, it is possible to set up shortcut +attributes that can be used to set ``homepage``, ``url``, ``list_url``, and +``git``. For example, the following ``cran`` attribute: + +.. code-block:: python + + cran = 'caret' + +is equivalent to: + +.. code-block:: python + + homepage = 'https://cloud.r-project.org/package=caret' + url = 'https://cloud.r-project.org/src/contrib/caret_6.0-86.tar.gz' + list_url = 'https://cloud.r-project.org/src/contrib/Archive/caret' + +Likewise, the following ``bioc`` attribute: + +.. code-block:: python + + bioc = 'BiocVersion' + +is equivalent to: + +.. code-block:: python + + homepage = 'https://bioconductor.org/packages/BiocVersion/' + git = 'https://git.bioconductor.org/packages/BiocVersion' + + ^^^^^^^^^^^^^^^^^^^^^^^^^ Build system dependencies ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -156,7 +219,7 @@ R dependencies R packages are often small and follow the classic Unix philosophy of doing one thing well. They are modular and usually depend on several other packages. You may find a single package with over a -hundred dependencies. Luckily, CRAN packages are well-documented +hundred dependencies. Luckily, R packages are well-documented and list all of their dependencies in the following sections: * Depends diff --git a/lib/spack/spack/build_systems/r.py b/lib/spack/spack/build_systems/r.py index f713706882..f8553c476b 100644 --- a/lib/spack/spack/build_systems/r.py +++ b/lib/spack/spack/build_systems/r.py @@ -25,6 +25,14 @@ class RPackage(PackageBase): """ phases = ['install'] + # package attributes that can be expanded to set the homepage, url, + # list_url, and git values + # For CRAN packages + cran = None + + # For Bioconductor packages + bioc = None + maintainers = ['glennpj'] #: This attribute is used in UI queries that need to know the build @@ -33,6 +41,34 @@ class RPackage(PackageBase): extends('r') + @property + def homepage(self): + if self.cran: + return 'https://cloud.r-project.org/package=' + self.cran + elif self.bioc: + return 'https://bioconductor.org/packages/' + self.bioc + + @property + def url(self): + if self.cran: + return ( + 'https://cloud.r-project.org/src/contrib/' + + self.cran + '_' + str(list(self.versions)[0]) + '.tar.gz' + ) + + @property + def list_url(self): + if self.cran: + return ( + 'https://cloud.r-project.org/src/contrib/Archive/' + + self.cran + '/' + ) + + @property + def git(self): + if self.bioc: + return 'https://git.bioconductor.org/packages/' + self.bioc + def configure_args(self): """Arguments to pass to install via ``--configure-args``.""" return [] diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py index 71d34a7ebb..6eaab4a07d 100644 --- a/lib/spack/spack/cmd/create.py +++ b/lib/spack/spack/cmd/create.py @@ -328,14 +328,34 @@ class RPackageTemplate(PackageTemplate): args = [] return args""" - def __init__(self, name, *args, **kwargs): + def __init__(self, name, url, *args, **kwargs): # If the user provided `--name r-rcpp`, don't rename it r-r-rcpp if not name.startswith('r-'): # Make it more obvious that we are renaming the package tty.msg("Changing package name from {0} to r-{0}".format(name)) name = 'r-{0}'.format(name) - super(RPackageTemplate, self).__init__(name, *args, **kwargs) + r_name = parse_name(url) + + cran = re.search( + r'(?:r-project)[^/]+/src' + '/([^/]+)' * 2, + url + ) + + if cran: + url = r_name + self.url_line = ' cran = "{url}"' + + bioc = re.search( + r'(?:bioconductor)[^/]+/packages' + '/([^/]+)' * 5, + url + ) + + if bioc: + self.url_line = ' url = "{0}"\n'\ + ' bioc = "{1}"'.format(url, r_name) + + super(RPackageTemplate, self).__init__(name, url, *args, **kwargs) class PerlmakePackageTemplate(PackageTemplate): -- cgit v1.2.3-60-g2f50