From 9121599145162a37e20491f05b79f03f80fe2741 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Sun, 12 Mar 2017 13:52:18 -0400 Subject: New package pocl (#3413) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * New package pocl * pocl: Update dependencies, add self-test * pocl: Don't require LLVM shared libraries LLVM build fails with shared libraries. * Add patch * Update * Update * Make build and install work; install test still fails * Split pocl into pocl proper and pocl-test * Add debug output * pocl: Update to 0.14-rc * pocl: Correct flake8 error * pocl: Heed code review recommendations * pocl: Add newline at end of file * pocl: Correct flake8 error I don’t want to use an even longer line by putting the whole variant declaration onto a single line, nor do I think that having an overlong line and adding `# noqa` at the end is more readable than splitting a string over three lines. * pocl: Correct dependency type for libtool --- var/spack/repos/builtin/packages/pocl/example1.c | 232 +++++++++++++++++++++ var/spack/repos/builtin/packages/pocl/example1.out | 5 + var/spack/repos/builtin/packages/pocl/package.py | 108 ++++++++++ var/spack/repos/builtin/packages/pocl/uint.patch | 11 + .../repos/builtin/packages/pocl/vecmathlib.patch | 75 +++++++ 5 files changed, 431 insertions(+) create mode 100644 var/spack/repos/builtin/packages/pocl/example1.c create mode 100644 var/spack/repos/builtin/packages/pocl/example1.out create mode 100644 var/spack/repos/builtin/packages/pocl/package.py create mode 100644 var/spack/repos/builtin/packages/pocl/uint.patch create mode 100644 var/spack/repos/builtin/packages/pocl/vecmathlib.patch (limited to 'var') diff --git a/var/spack/repos/builtin/packages/pocl/example1.c b/var/spack/repos/builtin/packages/pocl/example1.c new file mode 100644 index 0000000000..84c3e5e30f --- /dev/null +++ b/var/spack/repos/builtin/packages/pocl/example1.c @@ -0,0 +1,232 @@ +/* example1 - Simple example from OpenCL specification. + + Copyright (c) 2011 Universidad Rey Juan Carlos + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include +#include +#include +#include + +#define N 128 + +void delete_memobjs(cl_mem *memobjs, int n) { + for (int i = 0; i < n; ++i) + clReleaseMemObject(memobjs[i]); +} + +int exec_dot_product_kernel(const char *program_source, int n, cl_float4 *srcA, + cl_float4 *srcB, cl_float *dst) { + cl_context context = poclu_create_any_context(); + if (context == (cl_context)0) + return -1; + + // get the list of GPU devices associated with context + size_t cb; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); + cl_device_id *devices = malloc(cb); + clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); + + // create a command-queue + cl_command_queue cmd_queue = + clCreateCommandQueue(context, devices[0], 0, NULL); + if (cmd_queue == 0) { + clReleaseContext(context); + free(devices); + return -1; + } + + // don't know why this is necessary + for (int i = 0; i < n; ++i) { + poclu_bswap_cl_float_array(devices[0], &srcA[i], 4); + poclu_bswap_cl_float_array(devices[0], &srcB[i], 4); + } + + // allocate the buffer memory objects + cl_mem memobjs[3]; + + memobjs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(cl_float4) * n, srcA, NULL); + if (memobjs[0] == 0) { + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + memobjs[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(cl_float4) * n, srcB, NULL); + if (memobjs[1] == 0) { + delete_memobjs(memobjs, 1); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + memobjs[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * n, + NULL, NULL); + if (memobjs[2] == 0) { + delete_memobjs(memobjs, 2); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + // create the program + cl_program program = + clCreateProgramWithSource(context, 1, &program_source, NULL, NULL); + if (program == 0) { + delete_memobjs(memobjs, 3); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + // build the program + cl_int err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + if (err != CL_SUCCESS) { + delete_memobjs(memobjs, 3); + clReleaseProgram(program); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + // create the kernel + cl_kernel kernel = clCreateKernel(program, "dot_product", NULL); + if (kernel == 0) { + delete_memobjs(memobjs, 3); + clReleaseProgram(program); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + // set the args values + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]); + err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobjs[1]); + err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&memobjs[2]); + + if (err != CL_SUCCESS) { + delete_memobjs(memobjs, 3); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + // set work-item dimensions + size_t global_work_size[1]; + global_work_size[0] = n; + size_t local_work_size[1]; + local_work_size[0] = 128; + + // execute kernel + err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, global_work_size, + local_work_size, 0, NULL, NULL); + if (err != CL_SUCCESS) { + delete_memobjs(memobjs, 3); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + // read output image + err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE, 0, + n * sizeof(cl_float), dst, 0, NULL, NULL); + if (err != CL_SUCCESS) { + delete_memobjs(memobjs, 3); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + return -1; + } + + for (int i = 0; i < n; ++i) { + poclu_bswap_cl_float_array(devices[0], &dst[i], 1); + poclu_bswap_cl_float_array(devices[0], &srcA[i], 4); + poclu_bswap_cl_float_array(devices[0], &srcB[i], 4); + } + + free(devices); + + // release kernel, program, and memory objects + delete_memobjs(memobjs, 3); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseCommandQueue(cmd_queue); + clReleaseContext(context); + + // success + return 0; +} + +int main(void) { + const char *source = "__kernel void dot_product(\n" + " __global const float4 *a,\n" + " __global const float4 *b,\n" + " __global float *c)\n" + "{\n" + " int gid = get_global_id(0);\n" + " float4 prod = a[gid] * b[gid];\n" + " c[gid] = prod.x + prod.y + prod.z + prod.w;\n" + "}\n"; + + cl_float4 *srcA = malloc(N * sizeof(cl_float4)); + cl_float4 *srcB = malloc(N * sizeof(cl_float4)); + cl_float *dst = malloc(N * sizeof(cl_float)); + + for (int i = 0; i < N; ++i) { + srcA[i].s[0] = (cl_float)i; + srcA[i].s[1] = (cl_float)i; + srcA[i].s[2] = (cl_float)i; + srcA[i].s[3] = (cl_float)i; + srcB[i].s[0] = (cl_float)i; + srcB[i].s[1] = (cl_float)i; + srcB[i].s[2] = (cl_float)i; + srcB[i].s[3] = (cl_float)i; + } + + if (exec_dot_product_kernel(source, N, srcA, srcB, dst)) { + printf("Error running the tests\n"); + return -1; + } + + for (int i = 0; i < 4; ++i) { + printf("(%f, %f, %f, %f) . (%f, %f, %f, %f) = %f\n", srcA[i].s[0], + srcA[i].s[1], srcA[i].s[2], srcA[i].s[3], srcB[i].s[0], srcB[i].s[1], + srcB[i].s[2], srcB[i].s[3], dst[i]); + if (srcA[i].s[0] * srcB[i].s[0] + srcA[i].s[1] * srcB[i].s[1] + + srcA[i].s[2] * srcB[i].s[2] + srcA[i].s[3] * srcB[i].s[3] != + dst[i]) { + printf("FAIL\n"); + return -1; + } + } + + printf("OK\n"); + return 0; +} diff --git a/var/spack/repos/builtin/packages/pocl/example1.out b/var/spack/repos/builtin/packages/pocl/example1.out new file mode 100644 index 0000000000..ef35a124f8 --- /dev/null +++ b/var/spack/repos/builtin/packages/pocl/example1.out @@ -0,0 +1,5 @@ +(0.000000, 0.000000, 0.000000, 0.000000) . (0.000000, 0.000000, 0.000000, 0.000000) = 0.000000 +(1.000000, 1.000000, 1.000000, 1.000000) . (1.000000, 1.000000, 1.000000, 1.000000) = 4.000000 +(2.000000, 2.000000, 2.000000, 2.000000) . (2.000000, 2.000000, 2.000000, 2.000000) = 16.000000 +(3.000000, 3.000000, 3.000000, 3.000000) . (3.000000, 3.000000, 3.000000, 3.000000) = 36.000000 +OK diff --git a/var/spack/repos/builtin/packages/pocl/package.py b/var/spack/repos/builtin/packages/pocl/package.py new file mode 100644 index 0000000000..573b81131c --- /dev/null +++ b/var/spack/repos/builtin/packages/pocl/package.py @@ -0,0 +1,108 @@ +############################################################################## +# Copyright (c) 2013-2016, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory. +# +# This file is part of Spack. +# Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved. +# LLNL-CODE-647188 +# +# For details, see https://github.com/llnl/spack +# Please also see the LICENSE file for our notice and the LGPL. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License (as +# published by the Free Software Foundation) version 2.1, February 1999. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and +# conditions of the GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################## + +from spack import * +from spack.package_test import * +import os + + +class Pocl(CMakePackage): + """Portable Computing Language (pocl) is an open source implementation + of the OpenCL standard which can be easily adapted for new targets + and devices, both for homogeneous CPU and heterogeneous + GPUs/accelerators.""" + + homepage = "http://portablecl.org" + url = "http://portablecl.org/downloads/pocl-0.13.tar.gz" + + version("master", git="https://github.com/pocl/pocl.git") + version("0.14-rc", + git="https://github.com/pocl/pocl.git", branch="release_0_14") + version("0.13", "344480864d4269f2f63f1509395898bd") + version("0.12", "e197ba3aa01a35f40581c48e053330dd") + version("0.11", "9be0640cde2983062c47393d9e8e8fe7") + version("0.10", "0096be4f595c7b5cbfa42430c8b3af6a") + + # This is Github's pocl/pocl#373 + patch("uint.patch", when="@:0.13") + patch("vecmathlib.patch", when="@:0.13") + + depends_on("cmake @2.8.12:", type="build") + depends_on("hwloc") + depends_on("libtool", type=("build", "run")) + # We don't request LLVM's shared libraries because these are not + # enabled by default, and also because they fail to build for us + # (see #1616) + depends_on("llvm +clang") + depends_on("pkg-config", type="build") + + # These are the supported LLVM versions + depends_on("llvm @3.7:3.9", when="@master") + depends_on("llvm @3.7:3.9", when="@0.14") + depends_on("llvm @3.7:3.8", when="@0.13") + depends_on("llvm @3.2:3.7", when="@0.12") + depends_on("llvm @3.2:3.6", when="@0.11") + depends_on("llvm @3.2:3.5", when="@0.10") + + variant("distro", default=False, + description=("Support several CPU architectures, " + "suitable e.g. in a build " + "that will be made available for download")) + variant("icd", default=False, + description="Support a system-wide ICD loader") + + def cmake_args(self): + spec = self.spec + args = ["-DINSTALL_OPENCL_HEADERS=ON"] + if "~shared" in spec["llvm"]: + args += ["-DSTATIC_LLVM"] + if "+distro" in spec: + args += ["-DKERNELLIB_HOST_CPU_VARIANTS=distro"] + args += ["-DENABLE_ICD=%s" % ("ON" if "+icd" in spec else "OFF")] + return args + + @run_after('install') + def symlink_opencl(self): + with working_dir(self.build_directory): + os.symlink("OpenCL", join_path(self.prefix.include, "CL")) + + @run_after('install') + def check_install(self): + # Build and run a small program to test the installed OpenCL library + spec = self.spec + print("Checking pocl installation...") + checkdir = "spack-check" + with working_dir(checkdir, create=True): + source = join_path(os.path.dirname(self.module.__file__), + "example1.c") + cflags = spec["pocl"].cppflags.split() + # ldflags = spec["pocl"].libs.ld_flags.split() + ldflags = ["-L%s" % spec["pocl"].prefix.lib, + "-lOpenCL", "-lpoclu"] + output = compile_c_and_execute(source, cflags, ldflags) + compare_output_file( + output, + join_path(os.path.dirname(self.module.__file__), + "example1.out")) diff --git a/var/spack/repos/builtin/packages/pocl/uint.patch b/var/spack/repos/builtin/packages/pocl/uint.patch new file mode 100644 index 0000000000..1a620cd901 --- /dev/null +++ b/var/spack/repos/builtin/packages/pocl/uint.patch @@ -0,0 +1,11 @@ +--- a/lib/CL/clCreateSubDevices.c ++++ b/lib/CL/clCreateSubDevices.c +@@ -46,7 +46,7 @@ + cl_device_id *new_devs = NULL; + // number of elements in (copies of) properties, including terminating null + cl_uint num_props = 0; +- uint i; ++ cl_uint i; + + POCL_GOTO_ERROR_COND((in_device == NULL), CL_INVALID_DEVICE); + POCL_GOTO_ERROR_COND((properties == NULL), CL_INVALID_VALUE); diff --git a/var/spack/repos/builtin/packages/pocl/vecmathlib.patch b/var/spack/repos/builtin/packages/pocl/vecmathlib.patch new file mode 100644 index 0000000000..73d2742e43 --- /dev/null +++ b/var/spack/repos/builtin/packages/pocl/vecmathlib.patch @@ -0,0 +1,75 @@ +diff --git a/lib/kernel/vecmathlib/vec_sse_double1.h b/lib/kernel/vecmathlib/vec_sse_double1.h +index d727de8..dc582b3 100644 +--- a/lib/kernel/vecmathlib/vec_sse_double1.h ++++ b/lib/kernel/vecmathlib/vec_sse_double1.h +@@ -397,8 +397,8 @@ public: + } + return r; + } +- boolvec_t isfinite() const { return vml_std::isfinite(v); } +- boolvec_t isinf() const { return vml_std::isinf(v); } ++ boolvec_t isfinite() const { return bool(vml_std::isfinite(v)); } ++ boolvec_t isinf() const { return bool(vml_std::isinf(v)); } + boolvec_t isnan() const { + // This is wrong: + // return _mm_ucomineq_sd(from_double(v), from_double(v)); +@@ -407,9 +407,9 @@ public: + // __asm__("ucomisd %[v],%[v]; setp %[r]": [r]"=q"(r): [v]"x"(v)); + // return boolvec_t::scalar_t(r); + // This works as well: +- return vml_std::isnan(v); ++ return bool(vml_std::isnan(v)); + } +- boolvec_t isnormal() const { return vml_std::isnormal(v); } ++ boolvec_t isnormal() const { return bool(vml_std::isnormal(v)); } + realvec_t ldexp(int_t n) const { return vml_std::ldexp(v, n); } + realvec_t ldexp(intvec_t n) const { return vml_std::ldexp(v, n); } + realvec_t log() const { return MF::vml_log(*this); } +@@ -433,7 +433,7 @@ public: + } + realvec_t round() const { return MF::vml_round(*this); } + realvec_t rsqrt() const { return MF::vml_rsqrt(*this); } +- boolvec_t signbit() const { return vml_std::signbit(v); } ++ boolvec_t signbit() const { return bool(vml_std::signbit(v)); } + realvec_t sin() const { return MF::vml_sin(*this); } + realvec_t sinh() const { return MF::vml_sinh(*this); } + realvec_t sqrt() const { +diff --git a/lib/kernel/vecmathlib/vec_sse_float1.h b/lib/kernel/vecmathlib/vec_sse_float1.h +index a84a046..4868b2c 100644 +--- a/lib/kernel/vecmathlib/vec_sse_float1.h ++++ b/lib/kernel/vecmathlib/vec_sse_float1.h +@@ -394,8 +394,8 @@ public: + } + return r; + } +- boolvec_t isfinite() const { return vml_std::isfinite(v); } +- boolvec_t isinf() const { return vml_std::isinf(v); } ++ boolvec_t isfinite() const { return bool(vml_std::isfinite(v)); } ++ boolvec_t isinf() const { return bool(vml_std::isinf(v)); } + boolvec_t isnan() const { + #if defined VML_HAVE_NAN + // This is wrong: +@@ -405,12 +405,12 @@ public: + // __asm__("ucomiss %[v],%[v]; setp %[r]": [r]"=q"(r): [v]"x"(v)); + // return boolvec_t::scalar_t(r); + // This works as well: +- return vml_std::isnan(v); ++ return bool(vml_std::isnan(v)); + #else + return BV(false); + #endif + } +- boolvec_t isnormal() const { return vml_std::isnormal(v); } ++ boolvec_t isnormal() const { return bool(vml_std::isnormal(v)); } + realvec_t ldexp(int_t n) const { return vml_std::ldexp(v, n); } + realvec_t ldexp(intvec_t n) const { return vml_std::ldexp(v, n); } + realvec_t log() const { return MF::vml_log(*this); } +@@ -434,7 +434,7 @@ public: + } + realvec_t round() const { return MF::vml_round(*this); } + realvec_t rsqrt() const { return MF::vml_rsqrt(*this); } +- boolvec_t signbit() const { return vml_std::signbit(v); } ++ boolvec_t signbit() const { return bool(vml_std::signbit(v)); } + realvec_t sin() const { return MF::vml_sin(*this); } + realvec_t sinh() const { return MF::vml_sinh(*this); } + realvec_t sqrt() const { return to_float(_mm_sqrt_ss(from_float(v))); } -- cgit v1.2.3-70-g09d2