1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
from mako.template import Template
from mako.lookup import TemplateLookup
import os
import sys
from compyle.opencl import get_context, profile_kernel, SimpleKernel
def get_simple_kernel(kernel_name, args, src, wgs, preamble=""):
ctx = get_context()
knl = SimpleKernel(
ctx, args, src, wgs,
kernel_name, preamble=preamble
)
return profile_kernel(knl, kernel_name, backend='opencl')
def get_elwise_kernel(kernel_name, args, src, preamble=""):
ctx = get_context()
from pyopencl.elementwise import ElementwiseKernel
knl = ElementwiseKernel(
ctx, args, src,
kernel_name, preamble=preamble
)
return profile_kernel(knl, kernel_name, backend='opencl')
class GPUNNPSHelper(object):
def __init__(self, tpl_filename, backend=None, use_double=False,
c_type=None):
"""
Parameters
----------
tpl_filename
filename of source template
backend
backend to use for helper
use_double:
Use double precision floating point data types
c_type:
c_type to use. Overrides use_double
"""
self.src_tpl = Template(
filename=os.path.join(
os.path.dirname(os.path.realpath(__file__)),
tpl_filename),
)
self.data_t = "double" if use_double else "float"
if c_type is not None:
self.data_t = c_type
helper_tpl = Template(
filename=os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"gpu_helper_functions.mako"),
)
helper_preamble = helper_tpl.get_def("get_helpers").render(
data_t=self.data_t
)
preamble = self.src_tpl.get_def("preamble").render(
data_t=self.data_t
)
self.preamble = "\n".join([helper_preamble, preamble])
self.cache = {}
self.backend = backend
def _get_code(self, kernel_name, **kwargs):
arguments = self.src_tpl.get_def("%s_args" % kernel_name).render(
data_t=self.data_t, **kwargs)
src = self.src_tpl.get_def("%s_src" % kernel_name).render(
data_t=self.data_t, **kwargs)
return arguments, src
def get_kernel(self, kernel_name, **kwargs):
key = kernel_name, tuple(kwargs.items())
wgs = kwargs.get('wgs', None)
if key in self.cache:
return self.cache[key]
else:
args, src = self._get_code(kernel_name, **kwargs)
if wgs is None:
knl = get_elwise_kernel(kernel_name, args, src,
preamble=self.preamble)
else:
knl = get_simple_kernel(kernel_name, args, src, wgs,
preamble=self.preamble)
self.cache[key] = knl
return knl
|