diff --git a/.gitignore b/.gitignore index 5cd5605dc86..875d4781162 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ htmlcov/ .idea/ !ultima/ultima.cpp ultima/ultima +.cache/ diff --git a/clpy/_version.py b/clpy/_version.py index 38da91ee5a4..04188a16d9f 100644 --- a/clpy/_version.py +++ b/clpy/_version.py @@ -1 +1 @@ -__version__ = '2.1.0.1' +__version__ = '2.2.0' diff --git a/clpy/backend/cuda/compiler.py b/clpy/backend/cuda/compiler.py index f8915715239..26f1f725600 100644 --- a/clpy/backend/cuda/compiler.py +++ b/clpy/backend/cuda/compiler.py @@ -195,7 +195,7 @@ def dump(self, f): linum_fmt = '{{:0{}d}} '.format(digits) f.write('NVRTC compilation error: {}\n'.format(self)) f.write('-----\n') - f.write('Name: {}\n'.format(' '.join(self.name))) + f.write('Name: {}\n'.format(self.name)) f.write('Options: {}\n'.format(' '.join(self.options))) f.write('CUDA source:\n') for i, line in enumerate(lines): diff --git a/clpy/backend/cuda/device.pxd b/clpy/backend/cuda/device.pxd index a48a148755d..d32c8630549 100644 --- a/clpy/backend/cuda/device.pxd +++ b/clpy/backend/cuda/device.pxd @@ -1,5 +1,7 @@ cpdef int get_device_id() except * -cpdef get_cublas_handle() +cpdef size_t get_cublas_handle() except * +cpdef size_t get_cusolver_handle() except * +cpdef size_t get_cusparse_handle() except * cdef class Device: cdef: diff --git a/clpy/backend/cuda/device.pyx b/clpy/backend/cuda/device.pyx index f414265b7c3..1369e049a88 100644 --- a/clpy/backend/cuda/device.pyx +++ b/clpy/backend/cuda/device.pyx @@ -24,24 +24,27 @@ cdef dict _cusolver_handles = {} cdef dict _cusparse_handles = {} -cpdef get_cublas_handle(): +cpdef size_t get_cublas_handle() except *: dev_id = get_device_id() - if dev_id in _cublas_handles: - return _cublas_handles[dev_id] + ret = _cublas_handles.get(dev_id, None) + if ret is not None: + return ret return Device().cublas_handle -cpdef get_cusolver_handle(): +cpdef size_t get_cusolver_handle() except *: dev_id = get_device_id() - if dev_id in _cusolver_handles: - return _cusolver_handles[dev_id] + ret = _cusolver_handles.get(dev_id, None) + if ret is not None: + return ret return Device().cusolver_handle -cpdef get_cusparse_handle(): +cpdef size_t get_cusparse_handle() except *: dev_id = get_device_id() - if dev_id in _cusparse_handles: - return _cusparse_handles[dev_id] + ret = _cusparse_handles.get(dev_id, None) + if ret is not None: + return ret return Device().cusparse_handle diff --git a/clpy/core/core.pxd b/clpy/core/core.pxd index 732c93eefe3..3226210da8b 100644 --- a/clpy/core/core.pxd +++ b/clpy/core/core.pxd @@ -71,6 +71,9 @@ cdef class Indexer: readonly Py_ssize_t size readonly tuple shape + +cpdef ndarray ascontiguousarray(ndarray a, dtype=*) + cdef class CArray: cdef: readonly tuple stride diff --git a/clpy/core/core.pyx b/clpy/core/core.pyx index a0b4189c5c5..43fc82ce53a 100644 --- a/clpy/core/core.pyx +++ b/clpy/core/core.pyx @@ -1888,29 +1888,18 @@ include "reduction.pxi" cdef _id = 'out0 = in0' -_elementwise_copy = create_ufunc( +elementwise_copy = create_ufunc( 'clpy_copy', ('?->?', 'b->b', 'B->B', 'h->h', 'H->H', 'i->i', 'I->I', 'l->l', 'L->L', 'q->q', 'Q->Q', ('e->e', _id), 'f->f', 'd->d', 'F->F', 'D->D'), - 'out0 = (out0_type)(in0)') + 'out0 = out0_type(in0)', default_casting='unsafe') # complex numbers requires out0 = complex(in0) - -def elementwise_copy(*args, **kwargs): - kwargs['casting'] = 'unsafe' - return _elementwise_copy(*args, **kwargs) - - -_elementwise_copy_where = create_ufunc( +elementwise_copy_where = create_ufunc( 'clpy_copy_where', ('??->?', 'b?->b', 'B?->B', 'h?->h', 'H?->H', 'i?->i', 'I?->I', 'l?->l', - 'L?->L', 'q?->q', 'Q?->Q', 'f?->f', 'd?->d', 'F?->F', 'D?->D'), - 'if (in1) out0 = in0') - - -def elementwise_copy_where(*args, **kwargs): - kwargs['casting'] = 'unsafe' - return _elementwise_copy_where(*args, **kwargs) + 'L?->L', 'q?->q', 'Q?->Q', 'e?->e', 'f?->f', 'd?->d', 'F?->F', 'D?->D'), + 'if (in1) out0 = in0', default_casting='unsafe') cdef _divmod_int = string.Template(''' @@ -3465,6 +3454,11 @@ cpdef ndarray matmul(ndarray a, ndarray b, ndarray out=None): cdef Py_ssize_t i, n, m, ka, kb cdef Py_ssize_t batchCount + orig_a_shape = a.shape + orig_b_shape = b.shape + if len(orig_a_shape) == 0 or len(orig_b_shape) == 0: + raise ValueError('Scalar operands are not allowed, use \'*\' instead') + ret_dtype = numpy.result_type(a.dtype, b.dtype) dtype = numpy.find_common_type((ret_dtype, 'f'), ()) @@ -3550,9 +3544,16 @@ cpdef ndarray matmul(ndarray a, ndarray b, ndarray out=None): *la, ka, n = a.shape *lb, m, kb = b.shape - assert ka == kb + if ka != kb: + raise ValueError( + 'shapes ({}) and ({}) not aligned'.format( + ','.join([str(_) for _ in orig_a_shape]), + ','.join([str(_) for _ in orig_b_shape]))) for la_, lb_ in zip(la, lb): - assert la_ == lb_ or la_ == 1 or lb_ == 1 + if not (la_ == lb_ or la_ == 1 or lb_ == 1): + raise ValueError( + 'operands could not be broadcast together with ' + 'remapped shapes') batchCount = 1 # batchCount = numpy.prod(la) for i in la: diff --git a/clpy/core/elementwise.pxi b/clpy/core/elementwise.pxi index 746606db431..d34153cb841 100644 --- a/clpy/core/elementwise.pxi +++ b/clpy/core/elementwise.pxi @@ -567,9 +567,11 @@ cdef class ElementwiseKernel: Args: args: Arguments of the kernel. - size (int): Range size of the indices. If specified, the variable - ``n`` is set to this value. Otherwise, the result of - broadcasting is used to determine the value of ``n``. + size (int): Range size of the indices. By default, the range size + is automatically determined from the result of broadcasting. + This parameter must be specified if and only if all ndarrays + are `raw` and the range size cannot be determined + automatically. Returns: Arrays are returned according to the ``out_params`` argument of the @@ -761,7 +763,9 @@ class ufunc(object): nargs (int): Number of all arguments. """ - def __init__(self, name, nin, nout, ops, preamble='', doc=''): + + def __init__(self, name, nin, nout, ops, preamble='', doc='', + default_casting=None): # TODO(tomoya.sakai): raw array may be possible for ufunc self.name = name self.nin = nin @@ -770,6 +774,10 @@ class ufunc(object): self._ops = ops self._preamble = preamble self.__doc__ = doc + if default_casting is None: + self._default_casting = 'same_kind' + else: + self._default_casting = default_casting _in_params = tuple( ParameterInfo('T in%d' % i, True) for i in range(nin)) @@ -799,9 +807,7 @@ class ufunc(object): return types def __call__(self, *args, **kwargs): - """__call__(*args, **kwargs) - - Applies the universal function to arguments elementwise. + """Applies the universal function to arguments elementwise. Args: args: Input arguments. Each of them can be a :class:`clpy.ndarray` @@ -821,7 +827,7 @@ class ufunc(object): out = kwargs.pop('out', None) dtype = kwargs.pop('dtype', None) # Note default behavior of casting is 'same_kind' on numpy>=1.10 - casting = kwargs.pop('casting', 'same_kind') + casting = kwargs.pop('casting', self._default_casting) if dtype is not None: dtype = numpy.dtype(dtype).type if kwargs: @@ -879,7 +885,8 @@ class ufunc(object): return ret -cpdef create_ufunc(name, ops, routine=None, preamble='', doc=''): +cpdef create_ufunc(name, ops, routine=None, preamble='', doc='', + default_casting=None): _ops = [] for t in ops: if not isinstance(t, tuple): @@ -897,4 +904,6 @@ cpdef create_ufunc(name, ops, routine=None, preamble='', doc=''): out_types = tuple([numpy.dtype(t).type for t in out_types]) _ops.append((in_types, out_types, rt)) - return ufunc(name, len(_ops[0][0]), len(_ops[0][1]), _ops, preamble, doc) + ret = ufunc(name, len(_ops[0][0]), len(_ops[0][1]), _ops, preamble, doc, + default_casting=default_casting) + return ret diff --git a/clpy/core/fusion.py b/clpy/core/fusion.py index 73b1c4a2c9b..76756993754 100644 --- a/clpy/core/fusion.py +++ b/clpy/core/fusion.py @@ -734,7 +734,7 @@ def _create_ufunc(clpy_ufunc, numpy_ufunc): clip = ufunc(core._clip, math.misc.clip, numpy.clip) -copy = ufunc(core._elementwise_copy, +copy = ufunc(core.elementwise_copy, creation.from_data.copy, numpy.copy) bitwise_and = _create_ufunc(core.bitwise_and, numpy.bitwise_and) diff --git a/clpy/core/reduction.pxi b/clpy/core/reduction.pxi index e0c61b48f22..ddaf4f67472 100644 --- a/clpy/core/reduction.pxi +++ b/clpy/core/reduction.pxi @@ -398,9 +398,7 @@ class ReductionKernel(object): self.preamble = preamble def __call__(self, *args, **kwargs): - """__call__(*args, **kwargs) - - Compiles and invokes the reduction kernel. + """Compiles and invokes the reduction kernel. The compilation runs only if the kernel is not cached. Note that the kernels with different argument dtypes, ndims, or axis are not diff --git a/clpy/cudnn.py b/clpy/cudnn.py deleted file mode 100644 index db91e726f03..00000000000 --- a/clpy/cudnn.py +++ /dev/null @@ -1,338 +0,0 @@ -import atexit -import threading - -import numpy -import six - -import clpy -from clpy.core import internal -from clpy import cuda -from clpy.cuda import cudnn - - -_cudnn_version = cudnn.getVersion() -_thread_local = threading.local() - -_handles = {} - - -def get_handle(): - dev = cuda.get_device_id() - if dev in _handles: - return _handles[dev] - handle = cudnn.create() - _handles[dev] = handle - return handle - - -@atexit.register -def reset_handles(): - global _handles - handles = _handles - _handles = {} - - for handle in six.itervalues(handles): - cudnn.destroy(handle) - - -def _get_nd_tensor_cache(): - if not hasattr(_thread_local, 'cudnn_nd_tensor_cache'): - _thread_local.cudnn_nd_tensor_cache = {} - return _thread_local.cudnn_nd_tensor_cache - - -class Descriptor(object): - - def __init__(self, descriptor, destroyer): - self.value = descriptor - self.destroy = destroyer - - def __del__(self): - if self.value: - self.destroy(self.value) - self.value = None - - -def get_data_type(dtype): - if dtype.type == numpy.float32: - return cudnn.CUDNN_DATA_FLOAT - elif dtype.type == numpy.float64: - return cudnn.CUDNN_DATA_DOUBLE - elif dtype.type == numpy.float16: - return cudnn.CUDNN_DATA_HALF - else: - raise TypeError('Dtype {} is not supported in cuDNN'.format(dtype)) - - -def _to_ctypes_array(tup, dtype=numpy.intc): - return numpy.array(tup, dtype=dtype).ctypes - - -def create_tensor_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW): - desc = Descriptor(cudnn.createTensorDescriptor(), - cudnn.destroyTensorDescriptor) - if not arr.flags.c_contiguous: - raise ValueError('clpy.cudnn supports c-contiguous arrays only') - data_type = get_data_type(arr.dtype) - if arr.ndim == 4: - cudnn.setTensor4dDescriptor(desc.value, format, data_type, *arr.shape) - else: - strides = [s // arr.itemsize for s in arr.strides] - c_shape = _to_ctypes_array(arr.shape) - c_strides = _to_ctypes_array(strides) - cudnn.setTensorNdDescriptor(desc.value, data_type, arr.ndim, - c_shape.data, c_strides.data) - return desc - - -def create_uninitialized_tensor_descriptor(): - """Create uninitialized tensor descriptor. - - Create a cudnnCreateTensorDescriptor_t that is not yet initialized. - This is used by the batch normalization functions. - """ - desc = Descriptor(cudnn.createTensorDescriptor(), - cudnn.destroyTensorDescriptor) - return desc - - -def create_tensor_nd_descriptor(arr): - desc = Descriptor(cudnn.createTensorDescriptor(), - cudnn.destroyTensorDescriptor) - if not arr.flags.c_contiguous: - raise ValueError('clpy.cudnn supports c-contiguous arrays only') - data_type = get_data_type(arr.dtype) - shape = arr.shape - key = (data_type, shape) - cache = _get_nd_tensor_cache() - if key in cache: - return cache[key] - - # numpy's stride is defined in bytes, but cudnn's stride is defined in - # size of element - strides = [s // arr.itemsize for s in arr.strides] - - c_shape = _to_ctypes_array(shape) - c_strides = _to_ctypes_array(strides) - cudnn.setTensorNdDescriptor(desc.value, data_type, - arr.ndim, c_shape.data, c_strides.data) - cache = _get_nd_tensor_cache() - cache[key] = desc - return desc - - -def create_filter_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW): - desc = Descriptor(cudnn.createFilterDescriptor(), - cudnn.destroyFilterDescriptor) - data_type = get_data_type(arr.dtype) - if arr.ndim == 4: - cudnn.setFilter4dDescriptor_v4(desc.value, data_type, format, - *arr.shape) - else: - c_shape = _to_ctypes_array(arr.shape) - cudnn.setFilterNdDescriptor_v4(desc.value, data_type, format, - arr.ndim, c_shape.data) - return desc - - -def create_convolution_descriptor(pad, stride, dtype, - mode=cudnn.CUDNN_CROSS_CORRELATION, - dilation=(1, 1), - use_tensor_core=False, - group=1): - desc = Descriptor(cudnn.createConvolutionDescriptor(), - cudnn.destroyConvolutionDescriptor) - ndim = len(pad) - if ndim != len(stride): - raise ValueError('pad and stride must be of same length') - - if ndim == 2: - if _cudnn_version < 6000: - if dilation[0] != 1 or dilation[1] != 1: - raise ValueError('dilation must be one when cudnn < 6.0') - if _cudnn_version >= 5000: - compute_type = get_data_type(dtype) - # TODO(takagi) Temporarily use computing precision of FP32 for - # storing precision of FP16. - if dtype == numpy.float16: - compute_type = cudnn.CUDNN_DATA_FLOAT - cudnn.setConvolution2dDescriptor_v5( - desc.value, pad[0], pad[1], stride[0], stride[1], - dilation[0], dilation[1], mode, compute_type) - - if _cudnn_version >= 7000: - if use_tensor_core: - math_type = cudnn.CUDNN_TENSOR_OP_MATH - cudnn.setConvolutionMathType(desc.value, math_type) - if group > 1: - cudnn.setConvolutionGroupCount(desc.value, group) - else: - cudnn.setConvolution2dDescriptor_v4( - desc.value, pad[0], pad[1], stride[0], stride[1], 1, 1, mode) - else: - c_pad = _to_ctypes_array(pad) - c_stride = _to_ctypes_array(stride) - c_dilation = _to_ctypes_array((1,) * ndim) - compute_type = get_data_type(dtype) - # TODO(takagi) Temporarily use computing precision of FP32 for - # storing precision of FP16. - if dtype == numpy.float16: - compute_type = cudnn.CUDNN_DATA_FLOAT - cudnn.setConvolutionNdDescriptor_v3( - desc.value, ndim, c_pad.data, c_stride.data, c_dilation.data, - mode, compute_type) - - return desc - - -def create_pooling_descriptor(ksize, stride, pad, mode): - desc = Descriptor(cudnn.createPoolingDescriptor(), - cudnn.destroyPoolingDescriptor) - ndim = len(ksize) - if ndim != len(stride) or ndim != len(pad): - raise ValueError('ksize, stride, and pad must be of same length') - - if ndim == 2: - cudnn.setPooling2dDescriptor_v4( - desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ksize[0], - ksize[1], pad[0], pad[1], stride[0], stride[1]) - else: - c_ksize = _to_ctypes_array(ksize) - c_pad = _to_ctypes_array(pad) - c_stride = _to_ctypes_array(stride) - cudnn.setPoolingNdDescriptor_v4( - desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ndim, - c_ksize.data, c_pad.data, c_stride.data) - - return desc - - -def _as4darray(arr): - if arr.ndim == 0: - return arr.reshape(1, 1, 1, 1) - else: - return arr.reshape(arr.shape[0], -1, 1, 1) - - -def activation_forward(x, mode): - x = clpy.ascontiguousarray(x) - y = clpy.empty_like(x) - - dtype = 'd' if x.dtype == 'd' else 'f' - one = numpy.array(1, dtype=dtype).ctypes - zero = numpy.array(0, dtype=dtype).ctypes - handle = get_handle() - x_mat = _as4darray(x) - desc = create_tensor_descriptor(x_mat) - act_desc = Descriptor(cudnn.createActivationDescriptor(), - cudnn.destroyActivationDescriptor) - cudnn.setActivationDescriptor( - act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) - cudnn.activationForward_v4( - handle, act_desc.value, one.data, desc.value, x_mat.data.ptr, - zero.data, desc.value, y.data.ptr) - return y - - -def activation_backward(x, y, gy, mode): - x = clpy.ascontiguousarray(x) - gy = clpy.ascontiguousarray(gy) - - gx = clpy.empty_like(x) - dtype = 'd' if x.dtype == 'd' else 'f' - one = numpy.array(1, dtype=dtype).ctypes - zero = numpy.array(0, dtype=dtype).ctypes - handle = get_handle() - y_mat = _as4darray(y) - desc = create_tensor_descriptor(y_mat) - act_desc = Descriptor(cudnn.createActivationDescriptor(), - cudnn.destroyActivationDescriptor) - cudnn.setActivationDescriptor( - act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) - cudnn.activationBackward_v4( - handle, act_desc.value, one.data, desc.value, y.data.ptr, - desc.value, gy.data.ptr, desc.value, x.data.ptr, - zero.data, desc.value, gx.data.ptr) - return gx - - -def create_dropout_descriptor( - handle, dropout, states, state_size_in_bytes, seed): - desc = Descriptor(cudnn.createDropoutDescriptor(), - cudnn.destroyDropoutDescriptor) - cudnn.setDropoutDescriptor(desc.value, handle, dropout, - states, state_size_in_bytes, seed) - return desc - - -def set_dropout_descriptor(desc, handle, dropout): - # When the fourth argument is NULL, random state is not updated. - cudnn.setDropoutDescriptor(desc.value, handle, dropout, 0, 0, 0) - - -def create_rnn_descriptor(hidden_size, num_layers, dropout_desc, - input_mode, direction, mode, data_type): - desc = Descriptor(cudnn.createRNNDescriptor(), - cudnn.destroyRNNDescriptor) - if _cudnn_version >= 7000: - _handle = get_handle() - _algo = cudnn.CUDNN_RNN_ALGO_STANDARD - cudnn.setRNNDescriptor_v6( - _handle, desc.value, hidden_size, num_layers, dropout_desc.value, - input_mode, direction, mode, _algo, data_type) - else: - cudnn.setRNNDescriptor_v5( - desc.value, hidden_size, num_layers, dropout_desc.value, - input_mode, direction, mode, data_type) - return desc - - -def get_rnn_lin_layer_matrix_params( - handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id): - mat_desc = Descriptor(cudnn.createFilterDescriptor(), - cudnn.destroyFilterDescriptor) - ptr = numpy.array(0, dtype=numpy.intp) - cudnn.getRNNLinLayerMatrixParams( - handle, rnn_desc.value, layer, x_desc.value, w_desc.value, w.data.ptr, - lin_layer_id, mat_desc.value, ptr.ctypes.data) - offset = (ptr - w.data.ptr) // 4 - _, _, _, dim = cudnn.getFilterNdDescriptor(mat_desc.value, 3) - size = internal.prod(dim) - mat = w[offset: offset + size] - return mat - - -def get_rnn_lin_layer_bias_params( - handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id): - bias_desc = Descriptor(cudnn.createFilterDescriptor(), - cudnn.destroyFilterDescriptor) - ptr = numpy.array(0, dtype=numpy.intp) - cudnn.getRNNLinLayerBiasParams( - handle, rnn_desc.value, layer, x_desc.value, w_desc.value, w.data.ptr, - lin_layer_id, bias_desc.value, ptr.ctypes.data) - offset = (ptr - w.data.ptr) // 4 - _, _, _, dim = cudnn.getFilterNdDescriptor(bias_desc.value, 3) - size = internal.prod(dim) - bias = w[offset: offset + size] - return bias - - -def create_dropout_states(handle): - state_size = cudnn.dropoutGetStatesSize(handle) - return clpy.empty((state_size,), dtype='b') - - -def create_spatial_transformer_descriptor(sampler_type, dtype, nb_dims, dim_A): - desc = Descriptor(cudnn.createSpatialTransformerDescriptor(), - cudnn.destroySpatialTransformerDescriptor) - data_type = get_data_type(dtype) - - cudnn.setSpatialTransformerDescriptor( - desc.value, sampler_type, data_type, nb_dims, dim_A) - return desc - - -def add_tensor(handle, alpha, biasDesc, biasData, beta, srcDestDesc, - srcDestData): - cudnn.addTensor_v3(handle, alpha, biasDesc, - biasData, beta, srcDestDesc, srcDestData) diff --git a/clpy/manipulation/join.py b/clpy/manipulation/join.py index bd5c469adc2..bedc998fce3 100644 --- a/clpy/manipulation/join.py +++ b/clpy/manipulation/join.py @@ -120,10 +120,12 @@ def stack(tup, axis=0): .. seealso:: :func:`numpy.stack` """ + # TODO(okuta) Remove this if exampd_dims is updated for x in tup: - if not (-x.ndim <= axis < x.ndim): + if not (-x.ndim - 1 <= axis <= x.ndim): raise core.core._AxisError( - 'axis {} out of bounds [0, {})'.format(axis, x.ndim)) + 'axis {} out of bounds [{}, {}]'.format( + axis, -x.ndim - 1, x.ndim)) return concatenate([clpy.expand_dims(x, axis) for x in tup], axis) diff --git a/clpy/random/distributions.py b/clpy/random/distributions.py index 88f026872b6..1df2302cc7d 100644 --- a/clpy/random/distributions.py +++ b/clpy/random/distributions.py @@ -39,12 +39,12 @@ def gumbel(loc=0.0, scale=1.0, size=None, dtype=float): Returns: clpy.ndarray: Samples drawn from the Gumbel destribution. - .. seealso:: :func:`numpy.random.gumbel` + .. seealso:: + :func:`cupy.RandomState.gumbel` + :func:`numpy.random.gumbel` """ - rs = uniform(size=size, dtype=dtype) - # We use `1 - x` as input of `log` method to prevent overflow. - # It obeys numpy implementation. - return _gumbel_kernel(rs, loc, scale, rs) + rs = generator.get_random_state() + return rs.gumbel(loc, scale, size, dtype) def lognormal(mean=0.0, sigma=1.0, size=None, dtype=float): diff --git a/clpy/random/generator.py b/clpy/random/generator.py index 34e350c3c41..9f407393968 100644 --- a/clpy/random/generator.py +++ b/clpy/random/generator.py @@ -14,6 +14,20 @@ from clpy import core +_gumbel_kernel = None + + +def _get_gumbel_kernel(): + global _gumbel_kernel + if _gumbel_kernel is None: + _gumbel_kernel = core.ElementwiseKernel( + 'T x, T loc, T scale', 'T y', + 'y = loc - log(-log(1 - x)) * scale', + 'gumbel_kernel' + ) + return _gumbel_kernel + + class RandomState(object): """Portable container of a pseudo-random number generator. @@ -315,12 +329,12 @@ def choice(self, a, size=None, replace=True, p=None): if p is not None: p = clpy.broadcast_to(p, (size, a_size)) index = clpy.argmax(clpy.log(p) + - clpy.random.gumbel(size=(size, a_size)), + self.gumbel(size=(size, a_size)), axis=1) if not isinstance(shape, six.integer_types): index = clpy.reshape(index, shape) else: - index = clpy.random.randint(0, a_size, size=shape) + index = self.randint(0, a_size, size=shape) # Align the dtype with NumPy index = index.astype(clpy.int64, copy=False) @@ -350,6 +364,51 @@ def shuffle(self, a): clrand.generate(self._generator, sample) a[:] = a[clpy.argsort(sample)] + def gumbel(self, loc=0.0, scale=1.0, size=None, dtype=float): + """Returns an array of samples drawn from a Gumbel distribution. + + .. seealso:: + :func:`clpy.random.gumbel` for full documentation, + :meth:`numpy.random.RandomState.gumbel` + """ + x = self.uniform(size=size, dtype=dtype) + # We use `1 - x` as input of `log` method to prevent overflow. + # It obeys numpy implementation. + _get_gumbel_kernel()(x, loc, scale, x) + return x + + def randint(self, low, high=None, size=None, dtype='l'): + """Returns a scalar or an array of integer values over ``[low, high)``. + + .. seealso:: + :func:`clpy.random.randint` for full documentation, + :meth:`numpy.random.RandomState.randint` + """ + if high is None: + lo = 0 + hi = low + else: + lo = low + hi = high + + if lo >= hi: + raise ValueError('low >= high') + if lo < clpy.iinfo(dtype).min: + raise ValueError( + 'low is out of bounds for {}'.format(clpy.dtype(dtype).name)) + if hi > clpy.iinfo(dtype).max + 1: + raise ValueError( + 'high is out of bounds for {}'.format(clpy.dtype(dtype).name)) + + diff = hi - lo - 1 + if diff > clpy.iinfo(clpy.int32).max - clpy.iinfo(clpy.int32).min + 1: + raise NotImplementedError( + 'Sampling from a range whose extent is larger than int32 ' + 'range is currently not supported') + x = self.interval(diff, size).astype(dtype, copy=False) + clpy.add(x, lo, out=x) + return x + def seed(seed=None): """Resets the state of the random number generator with a seed. @@ -368,7 +427,7 @@ def seed(seed=None): get_random_state().seed(seed) -# CuPy specific functions +# ClPy specific functions _random_states = {} diff --git a/clpy/random/sample.py b/clpy/random/sample.py index be49ba40f12..455594ab277 100644 --- a/clpy/random/sample.py +++ b/clpy/random/sample.py @@ -1,6 +1,5 @@ import six -import clpy from clpy import core from clpy.creation import basic from clpy.random import distributions @@ -81,31 +80,8 @@ def randint(low, high=None, size=None, dtype='l'): If size is integer, it is the 1D-array of length ``size`` element. Otherwise, it is the array whose shape specified by ``size``. """ - if high is None: - lo = 0 - hi = low - else: - lo = low - hi = high - - if lo >= hi: - raise ValueError('low >= high') - if lo < clpy.iinfo(dtype).min: - raise ValueError( - 'low is out of bounds for {}'.format(clpy.dtype(dtype).name)) - if hi > clpy.iinfo(dtype).max + 1: - raise ValueError( - 'high is out of bounds for {}'.format(clpy.dtype(dtype).name)) - - diff = hi - lo - 1 - if diff > clpy.iinfo(clpy.int32).max - clpy.iinfo(clpy.int32).min + 1: - raise NotImplementedError( - 'Sampling from a range whose extent is larger than int32 range is ' - 'currently not supported') rs = generator.get_random_state() - x = rs.interval(diff, size).astype(dtype, copy=False) - clpy.add(x, lo, out=x) - return x + return rs.randint(low, high, size, dtype) def random_integers(low, high=None, size=None): diff --git a/clpy/testing/helper.py b/clpy/testing/helper.py index 5680c812c35..f4157374e09 100644 --- a/clpy/testing/helper.py +++ b/clpy/testing/helper.py @@ -47,7 +47,11 @@ def _check_clpy_numpy_error(self, clpy_error, clpy_tb, numpy_error, self.fail('Only numpy raises error\n\n' + numpy_tb) elif numpy_error is None: self.fail('Only clpy raises error\n\n' + clpy_tb) - elif type(clpy_error) is not type(numpy_error): + elif not isinstance(clpy_error, type(numpy_error)): + # CuPy errors should be at least as explicit as the NumPy errors, i.e. + # allow CuPy errors to derive from NumPy errors but not the opposite. + # This ensures that try/except blocks that catch NumPy errors also + # catch CuPy errors. msg = '''Different types of errors occurred clpy @@ -56,7 +60,8 @@ def _check_clpy_numpy_error(self, clpy_error, clpy_tb, numpy_error, %s ''' % (clpy_tb, numpy_tb) self.fail(msg) - elif not isinstance(clpy_error, accept_error): + elif not (isinstance(clpy_error, accept_error) and + isinstance(numpy_error, accept_error)): msg = '''Both clpy and numpy raise exceptions clpy @@ -406,7 +411,7 @@ def numpy_clpy_equal(name='xp', sp_name=None): ``scipy.sparse`` or ``clpy.sparse`` module. If ``None``, no argument is given for the modules. - Decorated test fixture is required throw same errors + Decorated test fixture is required to return the same results even if ``xp`` is ``numpy`` or ``clpy``. """ def decorator(impl): @@ -441,6 +446,12 @@ def numpy_clpy_raises(name='xp', sp_name=None, accept_error=Exception): sp_name(str or None): Argument name whose value is either ``scipy.sparse`` or ``clpy.sparse`` module. If ``None``, no argument is given for the modules. + accept_error(bool, Exception or tuple of Exception): Specify + acceptable errors. When both NumPy test and CuPy test raises the + same type of errors, and the type of the errors is specified with + this argument, the errors are ignored and not raised. + If it is ``True`` all error types are acceptable. + If it is ``False`` no error is acceptable. Decorated test fixture is required throw same errors even if ``xp`` is ``numpy`` or ``clpy``. diff --git a/clpy/testing/random.py b/clpy/testing/random.py index 52a6bfbbcc6..908992fba4e 100644 --- a/clpy/testing/random.py +++ b/clpy/testing/random.py @@ -88,7 +88,7 @@ def _teardown_random(): def generate_seed(): assert _nest_count > 0, 'random is not set up' - return numpy.random.randint(0xffffffff, dtype=numpy.int64) + return numpy.random.randint(0x7fffffff) def fix_random(): diff --git a/clpy_setup_build.py b/clpy_setup_build.py index 182a982a3cc..a5a3b788c7f 100644 --- a/clpy_setup_build.py +++ b/clpy_setup_build.py @@ -126,6 +126,70 @@ def launch_headercvt(): ], 'check_method': build.check_opencl_version, }, + # { + # 'name': 'cudnn', + # 'file': [ + # 'cupy.cuda.cudnn', + # 'cupy.cudnn', + # ], + # 'include': [ + # 'cudnn.h', + # ], + # 'libraries': [ + # 'cudnn', + # ], + # 'check_method': build.check_cudnn_version, + # }, + # { + # 'name': 'nccl', + # 'file': [ + # 'cupy.cuda.nccl', + # ], + # 'include': [ + # 'nccl.h', + # ], + # 'libraries': [ + # 'nccl', + # ], + # 'check_method': build.check_nccl_version, + # }, + # { + # 'name': 'cusolver', + # 'file': [ + # 'cupy.cuda.cusolver', + # ], + # 'include': [ + # 'cusolverDn.h', + # ], + # 'libraries': [ + # 'cusolver', + # ], + # 'check_method': build.check_cusolver_version, + # }, + # { + # # The value of the key 'file' is a list that contains extension names + # # or tuples of an extension name and a list of other souces files + # # required to build the extension such as .cpp files and .cu files. + # # + # # | (, a list of ) + # # + # # The extension name is also interpreted as the name of the Cython + # # source file required to build the extension with appending '.pyx' + # # file extension. + # 'name': 'thrust', + # 'file': [ + # ('cupy.cuda.thrust', ['cupy/cuda/cupy_thrust.cu']), + # ], + # 'include': [ + # 'thrust/device_ptr.h', + # 'thrust/sequence.h', + # 'thrust/sort.h', + # ], + # 'libraries': [ + # 'cudart', + # ], + # 'check_method': build.check_cuda_version, + # } ] @@ -288,6 +352,9 @@ def cythonize(extensions, arg_options): directive_keys = ('linetrace', 'profile') directives = {key: arg_options[key] for key in directive_keys} + # Embed signatures for Sphinx documentation. + directives['embedsignature'] = True + cythonize_option_keys = ('annotate',) cythonize_options = {key: arg_options[key] for key in cythonize_option_keys} diff --git a/cupy/cudnn.pyx b/cupy/cudnn.pyx new file mode 100644 index 00000000000..baa630a6a1c --- /dev/null +++ b/cupy/cudnn.pyx @@ -0,0 +1,389 @@ +from libcpp cimport vector + +import atexit +import threading + +import numpy + +from cupy.core cimport core +from cupy.cuda cimport cudnn +from cupy.cuda cimport device +from cupy.cuda cimport memory + +import cupy +from cupy.core import internal +from cupy.cuda import cudnn as py_cudnn + + +cdef int _cudnn_version = cudnn.getVersion() +cdef _thread_local = threading.local() + +cdef vector.vector[size_t] _handles + + +cpdef size_t get_handle() except *: + cdef int dev + dev = device.get_device_id() + if _handles.size() <= dev: + _handles.resize(dev + 1, 0) + ret = _handles[dev] + if ret != 0: + return ret + ret = cudnn.create() + _handles[dev] = ret + return ret + + +@atexit.register +def reset_handles(): + for handle in _handles: + if handle: + cudnn.destroy(handle) + _handles.clear() + + +cpdef dict _get_nd_tensor_cache(): + if not hasattr(_thread_local, 'cudnn_nd_tensor_cache'): + _thread_local.cudnn_nd_tensor_cache = {} + return _thread_local.cudnn_nd_tensor_cache + + +class Descriptor(object): + + def __init__(self, descriptor, destroyer): + self.value = descriptor + self.destroy = destroyer + + def __del__(self): + if self.value: + self.destroy(self.value) + self.value = None + + +cpdef get_data_type(dtype): + t = dtype.type + if t is numpy.float32: + return cudnn.CUDNN_DATA_FLOAT + elif t is numpy.float64: + return cudnn.CUDNN_DATA_DOUBLE + elif t is numpy.float16: + return cudnn.CUDNN_DATA_HALF + else: + raise TypeError('Dtype {} is not supported in cuDNN'.format(dtype)) + + +cpdef _create_tensor_nd_descriptor( + size_t desc, core.ndarray arr, int data_type): + cdef vector.vector[int] c_shape, c_strides + cdef Py_ssize_t itemsize, s + itemsize = arr.itemsize + for s in arr._strides: + c_strides.push_back(s // itemsize) + for s in arr._shape: + c_shape.push_back(s) + cudnn.setTensorNdDescriptor( + desc, data_type, arr.ndim, &c_shape[0], &c_strides[0]) + + +cpdef _create_tensor_descriptor(size_t desc, core.ndarray arr, int format): + if not arr.flags.c_contiguous: + raise ValueError('cupy.cudnn supports c-contiguous arrays only') + data_type = get_data_type(arr.dtype) + if arr._shape.size() == 4: + n, c, h, w = arr.shape + cudnn.setTensor4dDescriptor(desc, format, data_type, n, c, h, w) + else: + _create_tensor_nd_descriptor(desc, arr, data_type) + + +cpdef _create_filter_descriptor( + size_t desc, core.ndarray arr, int format=cudnn.CUDNN_TENSOR_NCHW): + cdef vector.vector[int] c_shape + cdef Py_ssize_t s + data_type = get_data_type(arr.dtype) + if arr._shape.size() == 4: + n, c, h, w = arr.shape + cudnn.setFilter4dDescriptor_v4( + desc, data_type, format, n, c, h, w) + else: + for s in arr._shape: + c_shape.push_back(s) + cudnn.setFilterNdDescriptor_v4( + desc, data_type, format, arr.ndim, &c_shape[0]) + + +cpdef _create_convolution_descriptor( + desc, pad, stride, dtype, mode, dilation, int group, + bint use_tensor_core): + cdef int d0, d1, p0, p1, s0, s1 + cdef vector.vector[int] c_pad, c_stride, c_dilation + ndim = len(pad) + if ndim != len(stride): + raise ValueError('pad and stride must be of same length') + + compute_type = get_data_type(dtype) + # TODO(takagi) Temporarily use computing precision of FP32 for + # storing precision of FP16. + if compute_type == cudnn.CUDNN_DATA_HALF: + compute_type = cudnn.CUDNN_DATA_FLOAT + + if ndim != 2: + c_pad = pad + c_stride = stride + c_dilation.assign(ndim, 1) + cudnn.setConvolutionNdDescriptor_v3( + desc, ndim, &c_pad[0], &c_stride[0], + &c_dilation[0], mode, compute_type) + return + + d0, d1 = dilation + p0, p1 = pad + s0, s1 = stride + if _cudnn_version < 6000 and (d0 != 1 or d1 != 1): + raise ValueError('dilation must be one when cudnn < 6.0') + if _cudnn_version >= 5000: + cudnn.setConvolution2dDescriptor_v5( + desc, p0, p1, s0, s1, d0, d1, mode, compute_type) + if _cudnn_version >= 7000 and use_tensor_core: + math_type = cudnn.CUDNN_TENSOR_OP_MATH + cudnn.setConvolutionMathType(desc, math_type) + if group > 1: + cudnn.setConvolutionGroupCount(desc.value, group) + else: + cudnn.setConvolution2dDescriptor_v4(desc, p0, p1, s0, s1, 1, 1, mode) + + +def create_tensor_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW): + desc = Descriptor(cudnn.createTensorDescriptor(), + py_cudnn.destroyTensorDescriptor) + _create_tensor_descriptor(desc.value, arr, format) + return desc + + +def create_uninitialized_tensor_descriptor(): + """Create uninitialized tensor descriptor. + + Create a cudnnCreateTensorDescriptor_t that is not yet initialized. + This is used by the batch normalization functions. + """ + return Descriptor(cudnn.createTensorDescriptor(), + py_cudnn.destroyTensorDescriptor) + + +def create_tensor_nd_descriptor(core.ndarray arr): + cdef dict cache + if not arr.flags.c_contiguous: + raise ValueError('cupy.cudnn supports c-contiguous arrays only') + data_type = get_data_type(arr.dtype) + shape = arr.shape + key = (data_type, shape) + cache = _get_nd_tensor_cache() + if key in cache: + return cache[key] + + # numpy's stride is defined in bytes, but cudnn's stride is defined in + # size of element + desc = Descriptor(cudnn.createTensorDescriptor(), + py_cudnn.destroyTensorDescriptor) + _create_tensor_nd_descriptor(desc.value, arr, data_type) + cache[key] = desc + return desc + + +def create_filter_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW): + desc = Descriptor(cudnn.createFilterDescriptor(), + py_cudnn.destroyFilterDescriptor) + _create_filter_descriptor(desc.value, arr, format) + return desc + + +def create_convolution_descriptor(pad, stride, dtype, + mode=cudnn.CUDNN_CROSS_CORRELATION, + dilation=(1, 1), + use_tensor_core=False, + group=1): + desc = Descriptor(cudnn.createConvolutionDescriptor(), + py_cudnn.destroyConvolutionDescriptor) + _create_convolution_descriptor( + desc.value, pad, stride, dtype, mode, dilation, group, use_tensor_core) + return desc + + +def create_pooling_descriptor(ksize, stride, pad, mode): + cdef vector.vector[int] c_ksize, c_pad, c_stride + ndim = len(ksize) + if ndim != len(stride) or ndim != len(pad): + raise ValueError('ksize, stride, and pad must be of same length') + desc = Descriptor(cudnn.createPoolingDescriptor(), + py_cudnn.destroyPoolingDescriptor) + if ndim == 2: + cudnn.setPooling2dDescriptor_v4( + desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ksize[0], + ksize[1], pad[0], pad[1], stride[0], stride[1]) + else: + c_ksize = ksize + c_pad = pad + c_stride = stride + cudnn.setPoolingNdDescriptor_v4( + desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ndim, + &c_ksize[0], &c_pad[0], &c_stride[0]) + + return desc + + +cpdef core.ndarray _as4darray(core.ndarray arr): + if arr.ndim == 0: + return arr.reshape(1, 1, 1, 1) + return arr.reshape(arr.shape[0], -1, 1, 1) + + +def activation_forward(core.ndarray x, int mode): + cdef float float_zero = 0, float_one = 1 + cdef double double_zero = 0, double_one = 1 + cdef size_t zero, one + if x.dtype == 'd': + zero = &double_zero + one = &double_one + else: + zero = &float_zero + one = &float_one + + x = core.ascontiguousarray(x) + y = cupy.empty_like(x) + x = _as4darray(x) + + handle = get_handle() + desc = cudnn.createTensorDescriptor() + act_desc = cudnn.createActivationDescriptor() + try: + _create_tensor_descriptor(desc, x, cudnn.CUDNN_TENSOR_NCHW) + cudnn.setActivationDescriptor( + act_desc, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) + cudnn.activationForward_v4( + handle, act_desc, one, desc, x.data.ptr, + zero, desc, y.data.ptr) + finally: + cudnn.destroyActivationDescriptor(act_desc) + cudnn.destroyTensorDescriptor(desc) + return y + + +def activation_backward(core.ndarray x, core.ndarray y, core.ndarray gy, + int mode): + cdef float float_zero = 0, float_one = 1 + cdef double double_zero = 0, double_one = 1 + cdef size_t zero, one + if x.dtype == 'd': + zero = &double_zero + one = &double_one + else: + zero = &float_zero + one = &float_one + + gx = cupy.empty_like(x) + x = core.ascontiguousarray(x) + gy = core.ascontiguousarray(gy) + y_mat = _as4darray(y) + + handle = get_handle() + desc = cudnn.createTensorDescriptor() + act_desc = cudnn.createActivationDescriptor() + try: + _create_tensor_descriptor(desc, y_mat, cudnn.CUDNN_TENSOR_NCHW) + cudnn.setActivationDescriptor( + act_desc, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) + cudnn.activationBackward_v4( + handle, act_desc, one, desc, y.data.ptr, + desc, gy.data.ptr, desc, x.data.ptr, + zero, desc, gx.data.ptr) + finally: + cudnn.destroyActivationDescriptor(act_desc) + cudnn.destroyTensorDescriptor(desc) + return gx + + +def create_dropout_descriptor( + handle, dropout, states, state_size_in_bytes, seed): + desc = Descriptor(cudnn.createDropoutDescriptor(), + py_cudnn.destroyDropoutDescriptor) + cudnn.setDropoutDescriptor(desc.value, handle, dropout, + states, state_size_in_bytes, seed) + return desc + + +def set_dropout_descriptor(desc, handle, dropout): + # When the fourth argument is NULL, random state is not updated. + cudnn.setDropoutDescriptor(desc.value, handle, dropout, 0, 0, 0) + + +def create_rnn_descriptor(hidden_size, num_layers, dropout_desc, + input_mode, direction, mode, data_type): + desc = Descriptor(cudnn.createRNNDescriptor(), + py_cudnn.destroyRNNDescriptor) + if _cudnn_version >= 7000: + _handle = get_handle() + _algo = cudnn.CUDNN_RNN_ALGO_STANDARD + cudnn.setRNNDescriptor_v6( + _handle, desc.value, hidden_size, num_layers, dropout_desc.value, + input_mode, direction, mode, _algo, data_type) + else: + cudnn.setRNNDescriptor_v5( + desc.value, hidden_size, num_layers, dropout_desc.value, + input_mode, direction, mode, data_type) + return desc + + +def get_rnn_lin_layer_matrix_params( + handle, rnn_desc, layer, x_desc, w_desc, core.ndarray w, lin_layer_id): + cdef size_t ptr = 0 + w_data_ptr = w.data.ptr + mat_desc = cudnn.createFilterDescriptor() + try: + cudnn.getRNNLinLayerMatrixParams( + handle, rnn_desc.value, layer, x_desc.value, w_desc.value, + w.data.ptr, lin_layer_id, mat_desc, &ptr) + _, _, _, dim = cudnn.getFilterNdDescriptor(mat_desc, 3) + finally: + cudnn.destroyFilterDescriptor(mat_desc) + offset = (ptr - w.data.ptr) // 4 + size = internal.prod(dim) + mat = w[offset: offset + size] + return mat + + +def get_rnn_lin_layer_bias_params( + handle, rnn_desc, layer, x_desc, w_desc, core.ndarray w, lin_layer_id): + cdef size_t ptr = 0 + bias_desc = cudnn.createFilterDescriptor() + try: + cudnn.getRNNLinLayerBiasParams( + handle, rnn_desc.value, layer, x_desc.value, w_desc.value, + w.data.ptr, lin_layer_id, bias_desc, &ptr) + _, _, _, dim = cudnn.getFilterNdDescriptor(bias_desc, 3) + finally: + cudnn.destroyFilterDescriptor(bias_desc) + offset = (ptr - w.data.ptr) // 4 + size = internal.prod(dim) + bias = w[offset: offset + size] + return bias + + +def create_dropout_states(handle): + state_size = cudnn.dropoutGetStatesSize(handle) + return cupy.empty((state_size,), dtype='b') + + +def create_spatial_transformer_descriptor(sampler_type, dtype, nb_dims, dim_A): + desc = Descriptor(cudnn.createSpatialTransformerDescriptor(), + py_cudnn.destroySpatialTransformerDescriptor) + data_type = get_data_type(dtype) + + cudnn.setSpatialTransformerDescriptor( + desc.value, sampler_type, data_type, nb_dims, dim_A) + return desc + + +def add_tensor(handle, alpha, biasDesc, biasData, beta, srcDestDesc, + srcDestData): + cudnn.addTensor_v3(handle, alpha, biasDesc, + biasData, beta, srcDestDesc, srcDestData) diff --git a/docker/python2/Dockerfile b/docker/python2/Dockerfile index 3d7ca934793..d844f54ef11 100644 --- a/docker/python2/Dockerfile +++ b/docker/python2/Dockerfile @@ -6,4 +6,4 @@ RUN apt-get update -y && \ python-pip && \ rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* -RUN pip install cupy==2.1.0.1 +RUN pip install cupy==2.2.0 diff --git a/docker/python3/Dockerfile b/docker/python3/Dockerfile index 170ec5e1167..da0b905f193 100644 --- a/docker/python3/Dockerfile +++ b/docker/python3/Dockerfile @@ -6,4 +6,4 @@ RUN apt-get update -y && \ python3-pip && \ rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* -RUN pip3 install cupy==2.1.0.1 +RUN pip3 install cupy==2.2.0 diff --git a/docs/source/install.rst b/docs/source/install.rst index 850069722cc..69d5085e19e 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -23,9 +23,8 @@ The following versions of Python can be used: 2.7.6+, 3.4.3+, 3.5.1+, and 3.6.0+ .. note:: We are testing CuPy automatically with Jenkins, where all the above *recommended* environments are tested. - We cannot guarantee that CuPy works on other environments including Windows and macOS (especially with CUDA support), even if CuPy looks running correctly. + We cannot guarantee that CuPy works on other environments including Windows and macOS, even if CuPy looks running correctly. -CuPy is supported on Python 2.7.6+, 3.4.3+, 3.5.1+, 3.6.0+. CuPy uses C++ compiler such as g++. You need to install it before installing CuPy. This is typical installation method for each platform:: @@ -54,17 +53,17 @@ The latest version of each package will automatically be installed if missing. * `NumPy `_ 1.9, 1.10, 1.11, 1.12, 1.13 * `Six `_ 1.9+ -CUDA support +In addition, you need to install `CUDA `_. +The following versions of CUDA can be used: 7.0, 7.5, 8.0 and 9.0. -* `CUDA `_ 7.0, 7.5, 8.0, 9.0 +Optional Libraries +~~~~~~~~~~~~~~~~~~ -cuDNN support +The following libraries are optional dependencies. +CuPy will enable these features only if they are installed. * `cuDNN `_ v4, v5, v5.1, v6, v7 - -NCCL support - -* `nccl `_ v1.3+ +* `NCCL `_ v1.3+ Install CuPy ------------ diff --git a/docs/source/reference/environment.rst b/docs/source/reference/environment.rst index d2c804eddcb..7907b090e80 100644 --- a/docs/source/reference/environment.rst +++ b/docs/source/reference/environment.rst @@ -6,7 +6,7 @@ Here are the environment variables CuPy uses. +------------------------------------+----------------------------------------------------+ | ``CUPY_CACHE_DIR`` | Path to the directory to store kernel cache. | -| | ``$(HOME)/.cupy.kernel_cache`` is used by default. | +| | ``${HOME}/.cupy/kernel_cache`` is used by default. | | | See :ref:`overview` for details. | +------------------------------------+----------------------------------------------------+ | ``CUPY_CACHE_SAVE_CUDA_SOURCE`` | If set to 1, CUDA source file will be saved along | diff --git a/tests/clpy_tests/manipulation_tests/test_join.py b/tests/clpy_tests/manipulation_tests/test_join.py index 982efad66ce..3db5fed9f70 100644 --- a/tests/clpy_tests/manipulation_tests/test_join.py +++ b/tests/clpy_tests/manipulation_tests/test_join.py @@ -173,10 +173,28 @@ def test_stack_value(self): @testing.with_requires('numpy>=1.10') @testing.numpy_clpy_array_equal() - def test_stack_with_axis(self, xp): + def test_stack_with_axis1(self, xp): a = testing.shaped_arange((2, 3), xp) return xp.stack((a, a), axis=1) + @testing.with_requires('numpy>=1.10') + @testing.numpy_clpy_array_equal() + def test_stack_with_axis2(self, xp): + a = testing.shaped_arange((2, 3), xp) + return xp.stack((a, a), axis=2) + + @testing.with_requires('numpy>=1.10') + @testing.numpy_clpy_raises() + def test_stack_with_axis_over(self, xp): + a = testing.shaped_arange((2, 3), xp) + try: + return xp.stack((a, a), axis=3) + except IndexError: + # For 'numpy<=1.12', catch both IndexError from NumPy and + # IndexOrValueError from CuPy. For 'numpy>=1.13', simply do not + # catch the AxisError. + raise IndexError() + def test_stack_with_axis_value(self): a = testing.shaped_arange((2, 3), clpy) s = clpy.stack((a, a), axis=1) diff --git a/tests/clpy_tests/math_tests/test_matmul.py b/tests/clpy_tests/math_tests/test_matmul.py index 71d0f6deaa0..c80925bcbda 100644 --- a/tests/clpy_tests/math_tests/test_matmul.py +++ b/tests/clpy_tests/math_tests/test_matmul.py @@ -104,6 +104,30 @@ def test_clpy_matmul(self, xp, dtype1, dtype2): if ((dtype1, dtype2) in self.skip_dtypes or (dtype2, dtype1) in self.skip_dtypes): return xp.array([]) - x1 = testing.shaped_arange(self.shape_pair[0], xp, dtype1) - x2 = testing.shaped_arange(self.shape_pair[1], xp, dtype2) + shape1, shape2 = self.shape_pair + x1 = testing.shaped_arange(shape1, xp, dtype1) + x2 = testing.shaped_arange(shape2, xp, dtype2) return xp.matmul(x1, x2) + + +@testing.parameterize( + *testing.product({ + 'shape_pair': [ + ((5, 3, 1), (3, 1, 4)), + ((3, 2, 3), (3, 2, 4)), + ((3, 2), ()), + ((), (3, 2)), + ((), ()), + ((3, 2), (1,)), + ], + })) +@testing.gpu +class TestMatmulInvalidShape(unittest.TestCase): + + @testing.with_requires('numpy>=1.10') + @testing.numpy_clpy_raises(accept_error=ValueError) + def test_invalid_shape(self, xp): + shape1, shape2 = self.shape_pair + x1 = testing.shaped_arange(shape1, xp, numpy.float32) + x2 = testing.shaped_arange(shape2, xp, numpy.float32) + xp.matmul(x1, x2) diff --git a/tests/clpy_tests/random_tests/test_generator.py b/tests/clpy_tests/random_tests/test_generator.py index b171efb2565..357a5cc3497 100644 --- a/tests/clpy_tests/random_tests/test_generator.py +++ b/tests/clpy_tests/random_tests/test_generator.py @@ -1,5 +1,3 @@ -import mock -import operator import os import threading import unittest @@ -8,268 +6,308 @@ import six import clpy -from clpy import backend -import clpy.backend.opencl.random as clrand from clpy import core +from clpy import backend from clpy.random import generator from clpy import testing from clpy.testing import condition from clpy.testing import hypothesis -class FunctionSwitcher(object): +class RandomGeneratorTestCase(unittest.TestCase): - def __init__(self, f): - self.tmp = f - self.func_name = f.__name__ + target_method = None - def __enter__(self): - setattr(clrand, self.func_name, mock.Mock()) + def setUp(self): + self.rs = generator.RandomState(seed=testing.generate_seed()) - def __exit__(self, *_): - setattr(clrand, self.func_name, self.tmp) + def _get_generator_func(self, *args, **kwargs): + assert isinstance(self.target_method, str), ( + 'generate_method must be overridden') + f = getattr(self.rs, self.target_method) + return lambda: f(*args, **kwargs) + + def _generate_check_repro(self, func, seed=0): + # Sample a random array while checking reproducibility + self.rs.seed(seed) + x = func() + self.rs.seed(seed) + y = func() + testing.assert_array_equal( + x, y, + 'Randomly generated arrays with the same seed did not match') + return x + + def generate(self, *args, **kwargs): + # Pick one sample from generator. + # Reproducibility is checked by repeating seed-and-sample cycle twice. + func = self._get_generator_func(*args, **kwargs) + return self._generate_check_repro(func, seed=0) + + def generate_many(self, *args, **kwargs): + # Pick many samples from generator. + # Reproducibility is checked only for the first sample, + # because it's very slow to set seed every time. + _count = kwargs.pop('_count', None) + assert _count is not None, '_count is required' + func = self._get_generator_func(*args, **kwargs) + + if _count == 0: + return [] + + vals = [self._generate_check_repro(func, seed=0)] + for i in range(1, _count): + vals.append(func()) + return vals @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu class TestRandomState(unittest.TestCase): - _multiprocess_can_split_ = True - args = (0.0, 1.0) - size = None - def setUp(self): self.rs = generator.RandomState(seed=testing.generate_seed()) - """ - def check_lognormal(self, clrand_func, dtype): - shape = core.get_size(self.size) - exp_size = six.moves.reduce(operator.mul, shape, 1) - if exp_size % 2 == 1: - exp_size += 1 - - clrand_func.return_value = clpy.zeros(exp_size, dtype=dtype) - out = self.rs.lognormal(self.args[0], self.args[1], self.size, dtype) - gen, _, mean, sigma = clrand_func.call_args[0] - self.assertIs(gen, self.rs._generator) - self.assertEqual(size, exp_size) - self.assertIs(mean, self.args[0]) - self.assertIs(sigma, self.args[1]) - self.assertEqual(out.shape, shape) - - def test_lognormal_float(self): - with FunctionSwitcher(clrand.generateLogNormalDouble): - self.check_lognormal(clrand.generateLogNormalDouble, float) - - def test_lognormal_float32(self): - with FunctionSwitcher(clrand.generateLogNormal): - self.check_lognormal(clrand.generateLogNormal, numpy.float32) - - def test_lognormal_float64(self): - with FunctionSwitcher(clrand.generateLogNormalDouble): - self.check_lognormal(clrand.generateLogNormalDouble, numpy.float64) - """ - - def check_normal(self, clrand_func, dtype): - shape = core.get_size(self.size) - exp_size = six.moves.reduce(operator.mul, shape, 1) - if exp_size % 2 == 1: - exp_size += 1 - - clrand_func.return_value = clpy.zeros(exp_size, dtype=dtype) - out = self.rs.normal(self.args[0], self.args[1], self.size, dtype) - gen, _, loc, scale = clrand_func.call_args[0] - self.assertIs(gen, self.rs._generator) - self.assertIs(loc, self.args[0]) - self.assertIs(scale, self.args[1]) - self.assertEqual(out.shape, shape) - - def test_normal_float32(self): - with FunctionSwitcher(clrand.generateNormal): - self.check_normal(clrand.generateNormal, numpy.float32) - - def test_normal_float64(self): - with FunctionSwitcher(clrand.generateNormalDouble): - self.check_normal(clrand.generateNormalDouble, numpy.float64) - - def check_random_sample(self, clrand_func, dtype): - out = self.rs.random_sample(self.size, dtype) - clrand_func.assert_called_once_with(self.rs._generator, out) + def check_seed(self, seed): + rs = self.rs - def test_random_sample_float32(self): - with FunctionSwitcher(clrand.generateUniform): - self.check_random_sample(clrand.generateUniform, numpy.float32) + rs.seed(seed) + xs1 = [rs.uniform() for _ in range(100)] - def test_random_sample_float64(self): - with FunctionSwitcher(clrand.generateUniformDouble): - self.check_random_sample( - clrand.generateUniformDouble, numpy.float64) + rs.seed(seed) + xs2 = [rs.uniform() for _ in range(100)] - def check_seed(self, clrand_func, seed): - self.rs.seed(seed) - call_args_list = clrand_func.call_args_list - self.assertEqual(1, len(call_args_list)) - call_args = call_args_list[0][0] - self.assertEqual(2, len(call_args)) - self.assertIs(self.rs._generator, call_args[0]) - self.assertEqual(numpy.uint64, call_args[1].dtype) + rs.seed(seed) + rs.seed(None) + xs3 = [rs.uniform() for _ in range(100)] - def test_seed_none(self): - with FunctionSwitcher(clrand.setPseudoRandomGeneratorSeed): - self.check_seed(clrand.setPseudoRandomGeneratorSeed, None) + # Random state must be reproducible + assert xs1 == xs2 + # Random state must be initialized randomly with seed=None + assert xs1 != xs3 @testing.for_int_dtypes() def test_seed_not_none(self, dtype): - with FunctionSwitcher(clrand.setPseudoRandomGeneratorSeed): - self.check_seed(clrand.setPseudoRandomGeneratorSeed, dtype(0)) + self.check_seed(dtype(0)) @testing.for_dtypes([numpy.complex_]) def test_seed_invalid_type_complex(self, dtype): with self.assertRaises(TypeError): - with FunctionSwitcher(clrand.setPseudoRandomGeneratorSeed): - self.check_seed(clrand.setPseudoRandomGeneratorSeed, dtype(0)) + self.rs.seed(dtype(0)) @testing.for_float_dtypes() def test_seed_invalid_type_float(self, dtype): with self.assertRaises(TypeError): - with FunctionSwitcher(clrand.setPseudoRandomGeneratorSeed): - self.check_seed(clrand.setPseudoRandomGeneratorSeed, dtype(0)) + self.rs.seed(dtype(0)) @testing.gpu -class TestRandomState2(TestRandomState): +@testing.parameterize(*[ + {'args': (0.0, 1.0), 'size': None}, + {'args': (10.0, 20.0), 'size': None}, + {'args': (0.0, 1.0), 'size': 10}, + {'args': (0.0, 1.0), 'size': (1, 2, 3)}, + {'args': (0.0, 1.0), 'size': 3}, + {'args': (0.0, 1.0), 'size': (3, 3)}, + {'args': (0.0, 1.0), 'size': ()}, +]) +@testing.fix_random() +class TestLogNormal(RandomGeneratorTestCase): - args = (10.0, 20.0) - size = None + target_method = 'lognormal' + def check_lognormal(self, dtype): + vals = self.generate_many( + self.args[0], self.args[1], self.size, dtype, _count=10) -@testing.gpu -class TestRandomState3(TestRandomState): + shape = core.get_size(self.size) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == dtype + assert val.shape == shape + assert (0 <= val).all() + # TODO(niboshi): Distribution test - args = (0.0, 1.0) - size = 10 + def test_lognormal_float(self): + self.check_lognormal(float) + + def test_lognormal_float32(self): + self.check_lognormal(numpy.float32) + + def test_lognormal_float64(self): + self.check_lognormal(numpy.float64) @testing.gpu -class TestRandomState4(TestRandomState): +@testing.parameterize(*[ + {'args': (0.0, 1.0), 'size': None}, + {'args': (10.0, 20.0), 'size': None}, + {'args': (0.0, 1.0), 'size': 10}, + {'args': (0.0, 1.0), 'size': (1, 2, 3)}, + {'args': (0.0, 1.0), 'size': 3}, + {'args': (0.0, 1.0), 'size': (3, 3)}, + {'args': (0.0, 1.0), 'size': ()}, +]) +@testing.fix_random() +class TestNormal(RandomGeneratorTestCase): - args = (0.0, 1.0) - size = (1, 2, 3) + target_method = 'normal' + def check_normal(self, dtype): + vals = self.generate_many( + self.args[0], self.args[1], self.size, dtype, _count=10) -@testing.gpu -class TestRandomState6(TestRandomState): + shape = core.get_size(self.size) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == dtype + assert val.shape == shape + # TODO(niboshi): Distribution test - args = (0.0, 1.0) - size = 3 + def test_normal_float32(self): + self.check_normal(numpy.float32) + + def test_normal_float64(self): + self.check_normal(numpy.float64) @testing.gpu -class TestRandomState7(TestRandomState): +@testing.parameterize(*[ + {'size': None}, + {'size': 10}, + {'size': (1, 2, 3)}, + {'size': 3}, + {'size': ()}, +]) +@testing.fix_random() +class TestRandomSample(unittest.TestCase): - args = (0.0, 1.0) - size = (3, 3) + def setUp(self): + self.rs = generator.RandomState(seed=testing.generate_seed()) + def check_random_sample(self, dtype): + vals = [self.rs.random_sample(self.size, dtype) for _ in range(10)] -@testing.gpu -class TestRandomState8(TestRandomState): + shape = core.get_size(self.size) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == dtype + assert val.shape == shape + assert (0 <= val).all() + assert (val < 1).all() + # TODO(niboshi): Distribution test - args = (0.0, 1.0) - size = () + def test_random_sample_float32(self): + self.check_random_sample(numpy.float32) + + def test_random_sample_float64(self): + self.check_random_sample(numpy.float64) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu class TestRandAndRandN(unittest.TestCase): def setUp(self): self.rs = generator.RandomState(seed=testing.generate_seed()) - def test_rand(self): - self.rs.random_sample = mock.Mock() - self.rs.rand(1, 2, 3, dtype=numpy.float32) - self.rs.random_sample.assert_called_once_with( - size=(1, 2, 3), dtype=numpy.float32) - def test_rand_invalid_argument(self): with self.assertRaises(TypeError): self.rs.rand(1, 2, 3, unnecessary='unnecessary_argument') - def test_randn(self): - self.rs.normal = mock.Mock() - self.rs.randn(1, 2, 3, dtype=numpy.float32) - self.rs.normal.assert_called_once_with( - size=(1, 2, 3), dtype=numpy.float32) - def test_randn_invalid_argument(self): with self.assertRaises(TypeError): self.rs.randn(1, 2, 3, unnecessary='unnecessary_argument') @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu -class TestInterval(unittest.TestCase): +class TestInterval(RandomGeneratorTestCase): - def setUp(self): - self.rs = clpy.random.get_random_state() - self.rs.seed(testing.generate_seed()) + target_method = 'interval' def test_zero(self): - numpy.testing.assert_array_equal( - self.rs.interval(0, (2, 3)).get(), numpy.zeros((2, 3))) + shape = (2, 3) + vals = self.generate_many(0, shape, _count=10) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == numpy.int32 + assert val.shape == shape + assert (val == 0).all() def test_shape_zero(self): - v = self.rs.interval(10, None) - self.assertEqual(v.dtype, numpy.int32) - self.assertEqual(v.shape, ()) + mx = 10 + vals = self.generate_many(mx, None, _count=10) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == numpy.int32 + assert val.shape == () + assert (0 <= val).all() + assert (val <= mx).all() + # TODO(niboshi): Distribution test def test_shape_one_dim(self): - v = self.rs.interval(10, 10) - self.assertEqual(v.dtype, numpy.int32) - self.assertEqual(v.shape, (10,)) + mx = 10 + size = 20 + vals = self.generate_many(mx, size, _count=10) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == numpy.int32 + assert val.shape == (size,) + assert (0 <= val).all() + assert (val <= mx).all() + # TODO(niboshi): Distribution test def test_shape_multi_dim(self): - v = self.rs.interval(10, (1, 2)) - self.assertEqual(v.dtype, numpy.int32) - self.assertEqual(v.shape, (1, 2)) + mx = 10 + shape = (1, 2) + vals = self.generate_many(mx, shape, _count=10) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == numpy.int32 + assert val.shape == shape + assert (0 <= val).all() + assert (val <= mx).all() + # TODO(niboshi): Distribution test def test_int32_range(self): - v = self.rs.interval(0x00000000, 2) - self.assertEqual(v.dtype, numpy.int32) + v = self.generate(0x00000000, 2) + assert v.dtype == numpy.int32 - v = self.rs.interval(0x7fffffff, 2) - self.assertEqual(v.dtype, numpy.int32) + v = self.generate(0x7fffffff, 2) + assert v.dtype == numpy.int32 def test_uint32_range(self): - v = self.rs.interval(0x80000000, 2) - self.assertEqual(v.dtype, numpy.uint32) + v = self.generate(0x80000000, 2) + assert v.dtype == numpy.uint32 - v = self.rs.interval(0xffffffff, 2) - self.assertEqual(v.dtype, numpy.uint32) + v = self.generate(0xffffffff, 2) + assert v.dtype == numpy.uint32 - @condition.repeat(3, 10) def test_bound_1(self): - vals = [self.rs.interval(10, (2, 3)).get() for _ in range(10)] + vals = self.generate_many(10, (2, 3), _count=10) for val in vals: - self.assertEqual(val.shape, (2, 3)) - self.assertEqual(min(_.min() for _ in vals), 0) - self.assertEqual(max(_.max() for _ in vals), 10) + assert isinstance(val, clpy.ndarray) + assert val.dtype == numpy.int32 + assert val.shape == (2, 3) + assert (0 <= val).all() + assert (val <= 10).all() - @condition.repeat(3, 10) def test_bound_2(self): - vals = [self.rs.interval(2, None).get() for _ in range(10)] - self.assertEqual(min(vals), 0) - self.assertEqual(max(vals), 2) + vals = self.generate_many(2, None, _count=20) + for val in vals: + assert isinstance(val, clpy.ndarray) + assert val.dtype == numpy.int32 + assert val.shape == () + assert (0 <= val).all() + assert (val <= 2).all() @condition.repeat(3, 10) def test_goodness_of_fit(self): mx = 5 trial = 100 - vals = [self.rs.interval(mx, None).get() - for _ in six.moves.xrange(trial)] + vals = self.generate_many(mx, None, _count=trial) + vals = [val.get() for val in vals] counts = numpy.histogram(vals, bins=numpy.arange(mx + 2))[0] expected = numpy.array([float(trial) / (mx + 1)] * (mx + 1)) self.assertTrue(hypothesis.chi_square_test(counts, expected)) @@ -277,7 +315,7 @@ def test_goodness_of_fit(self): @condition.repeat(3) def test_goodness_of_fit_2(self): mx = 5 - vals = self.rs.interval(mx, (5, 5)).get() + vals = self.generate(mx, (5, 5)).get() counts = numpy.histogram(vals, bins=numpy.arange(mx + 2))[0] expected = numpy.array([float(vals.size) / (mx + 1)] * (mx + 1)) self.assertTrue(hypothesis.chi_square_test(counts, expected)) @@ -292,16 +330,13 @@ def test_goodness_of_fit_2(self): {'a': numpy.array([0.0, 1.0, 2.0]), 'size': 2, 'p': [0.3, 0.3, 0.4]}, ) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu -class TestChoice1(unittest.TestCase): +class TestChoice1(RandomGeneratorTestCase): - def setUp(self): - self.rs = clpy.random.get_random_state() - self.rs.seed(testing.generate_seed()) + target_method = 'choice' def test_dtype_shape(self): - v = self.rs.choice(a=self.a, size=self.size, p=self.p) + v = self.generate(a=self.a, size=self.size, p=self.p) if isinstance(self.size, six.integer_types): expected_shape = (self.size,) else: @@ -315,29 +350,27 @@ def test_dtype_shape(self): @condition.repeat(3, 10) def test_bound(self): - vals = [self.rs.choice(a=self.a, size=self.size, p=self.p).get() - for _ in range(20)] + vals = self.generate_many( + a=self.a, size=self.size, p=self.p, _count=20) + vals = [val.get() for val in vals] size_ = self.size if isinstance(self.size, tuple) else (self.size,) for val in vals: self.assertEqual(val.shape, size_) - self.assertEqual(min(_.min() for _ in vals), 0) - self.assertEqual(max(_.max() for _ in vals), 2) + self.assertEqual(min(val.min() for val in vals), 0) + self.assertEqual(max(val.max() for val in vals), 2) @testing.parameterize( {'a': [0, 1, 2], 'size': 2, 'p': [0.3, 0.3, 0.4]}, ) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu -class TestChoice2(unittest.TestCase): +class TestChoice2(RandomGeneratorTestCase): - def setUp(self): - self.rs = clpy.random.get_random_state() - self.rs.seed(testing.generate_seed()) + target_method = 'choice' def test_dtype_shape(self): - v = self.rs.choice(a=self.a, size=self.size, p=self.p) + v = self.generate(a=self.a, size=self.size, p=self.p) if isinstance(self.size, six.integer_types): expected_shape = (self.size,) else: @@ -351,36 +384,34 @@ def test_dtype_shape(self): @condition.repeat(3, 10) def test_bound(self): - vals = [self.rs.choice(a=self.a, size=self.size, p=self.p).get() - for _ in range(20)] + vals = self.generate_many( + a=self.a, size=self.size, p=self.p, _count=20) + vals = [val.get() for val in vals] size_ = self.size if isinstance(self.size, tuple) else (self.size,) for val in vals: self.assertEqual(val.shape, size_) - self.assertEqual(min(_.min() for _ in vals), 0) - self.assertEqual(max(_.max() for _ in vals), 2) + self.assertEqual(min(val.min() for val in vals), 0) + self.assertEqual(max(val.max() for val in vals), 2) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu -class TestChoiceChi(unittest.TestCase): +class TestChoiceChi(RandomGeneratorTestCase): - def setUp(self): - self.rs = clpy.random.get_random_state() - self.rs.seed(testing.generate_seed()) + target_method = 'choice' @condition.repeat(3, 10) def test_goodness_of_fit(self): trial = 100 - vals = [self.rs.choice(3, 1, True, [0.3, 0.3, 0.4]).get() - for _ in six.moves.xrange(trial)] + vals = self.generate_many(3, 1, True, [0.3, 0.3, 0.4], _count=trial) + vals = [val.get() for val in vals] counts = numpy.histogram(vals, bins=numpy.arange(4))[0] expected = numpy.array([30, 30, 40]) self.assertTrue(hypothesis.chi_square_test(counts, expected)) @condition.repeat(3, 10) def test_goodness_of_fit_2(self): - vals = self.rs.choice(3, (5, 20), True, [0.3, 0.3, 0.4]).get() + vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get() counts = numpy.histogram(vals, bins=numpy.arange(4))[0] expected = numpy.array([30, 30, 40]) self.assertTrue(hypothesis.chi_square_test(counts, expected)) @@ -413,7 +444,6 @@ def test_choice_multinomial(self, xp, dtype): {'a': 3, 'size': 1, 'p': [0.1, 0.1, 0.7]}, ) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu class TestChoiceFailure(unittest.TestCase): @@ -432,16 +462,13 @@ def test_choice_invalid_value(self): {'a': numpy.array([0.0, 2.0, 4.0]), 'size': 2}, ) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu -class TestChoiceReplaceFalse(unittest.TestCase): +class TestChoiceReplaceFalse(RandomGeneratorTestCase): - def setUp(self): - self.rs = clpy.random.get_random_state() - self.rs.seed(testing.generate_seed()) + target_method = 'choice' def test_dtype_shape(self): - v = self.rs.choice(a=self.a, size=self.size, replace=False) + v = self.generate(a=self.a, size=self.size, replace=False) if isinstance(self.size, six.integer_types): expected_shape = (self.size,) else: @@ -455,7 +482,7 @@ def test_dtype_shape(self): @condition.repeat(3, 10) def test_bound(self): - val = self.rs.choice(a=self.a, size=self.size, replace=False).get() + val = self.generate(a=self.a, size=self.size, replace=False).get() size = self.size if isinstance(self.size, tuple) else (self.size,) self.assertEqual(val.shape, size) self.assertTrue((0 <= val).all()) @@ -463,12 +490,53 @@ def test_bound(self): val = numpy.asarray(val) self.assertEqual(numpy.unique(val).size, val.size) - def test_reproduce(self): - rs1 = clpy.random.RandomState(1) - v1 = rs1.choice(a=self.a, size=self.size, replace=False) - rs2 = clpy.random.RandomState(1) - v2 = rs2.choice(a=self.a, size=self.size, replace=False) - self.assertTrue((v1 == v2).all()) + +@testing.gpu +@testing.fix_random() +class TestGumbel(RandomGeneratorTestCase): + # TODO(niboshi): + # Test soundness of distribution. + # Currently only reprocibility is checked. + + target_method = 'gumbel' + + def test_gumbel_1(self): + self.generate() + + def test_gumbel_2(self): + self.generate(0.0, 1.0, size=(3, 2)) + + +@testing.gpu +@testing.fix_random() +class TestRandint(RandomGeneratorTestCase): + # TODO(niboshi): + # Test soundness of distribution. + # Currently only reprocibility is checked. + + target_method = 'randint' + + def test_randint_1(self): + self.generate(3) + + def test_randint_2(self): + self.generate(3, 4, size=(3, 2)) + + +@testing.gpu +@testing.fix_random() +class TestUniform(RandomGeneratorTestCase): + # TODO(niboshi): + # Test soundness of distribution. + # Currently only reprocibility is checked. + + target_method = 'uniform' + + def test_uniform_1(self): + self.generate() + + def test_uniform_2(self): + self.generate(-4.2, 2.4, size=(3, 2)) @testing.parameterize( @@ -476,7 +544,6 @@ def test_reproduce(self): {'a': [1, 2, 3], 'size': 5}, ) @testing.fix_random() -@testing.with_requires('numpy>=1.11.0') @testing.gpu class TestChoiceReplaceFalseFailure(unittest.TestCase): @@ -575,7 +642,7 @@ def test_get_random_state_no_clpy_no_chainer_seed(self): self._check_different(rvs0, rvs1) - def test_get_random_state_no_cupy_with_chainer_seed(self): + def test_get_random_state_no_clpy_with_chainer_seed(self): rvs0 = self._get_rvs(generator.RandomState(5)) os.environ.pop('CLPY_SEED', None) @@ -584,7 +651,7 @@ def test_get_random_state_no_cupy_with_chainer_seed(self): self._check_same(rvs0, rvs1) - def test_get_random_state_with_cupy_no_chainer_seed(self): + def test_get_random_state_with_clpy_no_chainer_seed(self): rvs0 = self._get_rvs(generator.RandomState(6)) os.environ['CLPY_SEED'] = '6' @@ -593,7 +660,7 @@ def test_get_random_state_with_cupy_no_chainer_seed(self): self._check_same(rvs0, rvs1) - def test_get_random_state_with_cupy_with_chainer_seed(self): + def test_get_random_state_with_clpy_with_chainer_seed(self): rvs0 = self._get_rvs(generator.RandomState(7)) os.environ['CLPY_SEED'] = '7' diff --git a/tests/clpy_tests/sorting_tests/test_search.py b/tests/clpy_tests/sorting_tests/test_search.py index d8bedec4a95..cd258c5f530 100644 --- a/tests/clpy_tests/sorting_tests/test_search.py +++ b/tests/clpy_tests/sorting_tests/test_search.py @@ -58,6 +58,12 @@ def test_argmax_axis2(self, xp, dtype): a = testing.shaped_random((2, 3, 4), xp, dtype) return a.argmax(axis=2) + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_clpy_allclose() + def test_argmax_tie(self, xp, dtype): + a = xp.array([0, 5, 2, 3, 4, 5], dtype) + return a.argmax() + @testing.for_all_dtypes(no_complex=True) @testing.numpy_clpy_raises(accept_error=ValueError) def test_argmax_zero_size(self, xp, dtype): @@ -124,6 +130,12 @@ def test_argmin_axis2(self, xp, dtype): a = testing.shaped_random((2, 3, 4), xp, dtype) return a.argmin(axis=2) + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_clpy_allclose() + def test_argmin_tie(self, xp, dtype): + a = xp.array([0, 1, 2, 3, 0, 5], dtype) + return a.argmin() + @testing.for_all_dtypes(no_complex=True) @testing.numpy_clpy_raises(accept_error=ValueError) def test_argmin_zero_size(self, xp, dtype): diff --git a/tests/clpy_tests/test_cudnn.py b/tests/clpy_tests/test_cudnn.py new file mode 100644 index 00000000000..4fbc22793fa --- /dev/null +++ b/tests/clpy_tests/test_cudnn.py @@ -0,0 +1,37 @@ +import unittest + +import numpy + +import cupy +try: + import cupy.cuda.cudnn as libcudnn + cudnn_enabled = True + modes = [ + libcudnn.CUDNN_ACTIVATION_SIGMOID, + libcudnn.CUDNN_ACTIVATION_RELU, + libcudnn.CUDNN_ACTIVATION_TANH, + ] + import cupy.cudnn +except ImportError: + cudnn_enabled = False + modes = [] +from cupy import testing + + +@testing.parameterize(*testing.product({ + 'dtype': [numpy.float32, numpy.float64], + 'mode': modes, +})) +@unittest.skipUnless(cudnn_enabled, 'cuDNN is not available') +class TestCudnnActivation(unittest.TestCase): + + def setUp(self): + self.x = testing.shaped_arange((3, 4), cupy, self.dtype) + self.y = testing.shaped_arange((3, 4), cupy, self.dtype) + self.g = testing.shaped_arange((3, 4), cupy, self.dtype) + + def test_activation_forward(self): + cupy.cudnn.activation_forward(self.x, self.mode) + + def test_activation_backward(self): + cupy.cudnn.activation_backward(self.x, self.y, self.g, self.mode) diff --git a/tests/clpy_tests/testing_tests/test_helper.py b/tests/clpy_tests/testing_tests/test_helper.py index 1f7e0b6ff66..02c3908a259 100644 --- a/tests/clpy_tests/testing_tests/test_helper.py +++ b/tests/clpy_tests/testing_tests/test_helper.py @@ -32,56 +32,122 @@ def test_unsigned_only(self): class TestCheckCupyNumpyError(unittest.TestCase): + tbs = { + clpy: 'xxxx', + numpy: 'yyyy' + } + def test_both_success(self): + @testing.helper.numpy_clpy_raises() + def dummy_both_success(self, xp): + pass + with self.assertRaises(AssertionError): - helper._check_clpy_numpy_error(self, None, None, None, None) + dummy_both_success(self) def test_clpy_error(self): - clpy_error = Exception() - clpy_tb = 'xxxx' - with six.assertRaisesRegex(self, AssertionError, clpy_tb): - helper._check_clpy_numpy_error(self, clpy_error, clpy_tb, - None, None) + @testing.helper.numpy_clpy_raises() + def dummy_clpy_error(self, xp): + if xp is clpy: + raise Exception(self.tbs.get(clpy)) + + with six.assertRaisesRegex(self, AssertionError, self.tbs.get(clpy)): + dummy_clpy_error(self) def test_numpy_error(self): - numpy_error = Exception() - numpy_tb = 'yyyy' - with six.assertRaisesRegex(self, AssertionError, numpy_tb): - helper._check_clpy_numpy_error(self, None, None, - numpy_error, numpy_tb) + @testing.helper.numpy_clpy_raises() + def dummy_numpy_error(self, xp): + if xp is numpy: + raise Exception(self.tbs.get(numpy)) + + with six.assertRaisesRegex(self, AssertionError, self.tbs.get(numpy)): + dummy_numpy_error(self) def test_clpy_numpy_different_error(self): - clpy_error = TypeError() - clpy_tb = 'xxxx' - numpy_error = ValueError() - numpy_tb = 'yyyy' + @testing.helper.numpy_clpy_raises() + def dummy_clpy_numpy_different_error(self, xp): + if xp is clpy: + raise TypeError(self.tbs.get(clpy)) + elif xp is numpy: + raise ValueError(self.tbs.get(numpy)) + # Use re.S mode to ignore new line characters - pattern = re.compile(clpy_tb + '.*' + numpy_tb, re.S) + pattern = re.compile( + self.tbs.get(clpy) + '.*' + self.tbs.get(numpy), re.S) with six.assertRaisesRegex(self, AssertionError, pattern): - helper._check_clpy_numpy_error(self, clpy_error, clpy_tb, - numpy_error, numpy_tb) + dummy_clpy_numpy_different_error(self) + + def test_clpy_derived_error(self): + @testing.helper.numpy_clpy_raises() + def dummy_clpy_derived_error(self, xp): + if xp is clpy: + raise ValueError(self.tbs.get(clpy)) + elif xp is numpy: + raise Exception(self.tbs.get(numpy)) + + dummy_clpy_derived_error(self) # Assert no exceptions + + def test_numpy_derived_error(self): + @testing.helper.numpy_clpy_raises() + def dummy_numpy_derived_error(self, xp): + if xp is clpy: + raise Exception(self.tbs.get(clpy)) + elif xp is numpy: + raise IndexError(self.tbs.get(numpy)) + + # NumPy errors may not derive from CuPy errors, i.e. CuPy errors should + # be at least as explicit as the NumPy error + pattern = re.compile( + self.tbs.get(clpy) + '.*' + self.tbs.get(numpy), re.S) + with six.assertRaisesRegex(self, AssertionError, pattern): + dummy_numpy_derived_error(self) def test_same_error(self): - clpy_error = Exception() - clpy_tb = 'xxxx' - numpy_error = Exception() - numpy_tb = 'yyyy' - # Nothing happens - helper._check_clpy_numpy_error(self, clpy_error, clpy_tb, - numpy_error, numpy_tb, - accept_error=Exception) + @testing.helper.numpy_clpy_raises(accept_error=Exception) + def dummy_same_error(self, xp): + raise Exception(self.tbs.get(xp)) + + dummy_same_error(self) + + def test_clpy_derived_unaccept_error(self): + @testing.helper.numpy_clpy_raises(accept_error=ValueError) + def dummy_clpy_derived_unaccept_error(self, xp): + if xp is clpy: + raise IndexError(self.tbs.get(clpy)) + elif xp is numpy: + raise Exception(self.tbs.get(numpy)) + + # Neither `IndexError` nor `Exception` is derived from `ValueError`, + # therefore expect an error + pattern = re.compile( + self.tbs.get(clpy) + '.*' + self.tbs.get(numpy), re.S) + with six.assertRaisesRegex(self, AssertionError, pattern): + dummy_clpy_derived_unaccept_error(self) + + def test_numpy_derived_unaccept_error(self): + @testing.helper.numpy_clpy_raises(accept_error=ValueError) + def dummy_numpy_derived_unaccept_error(self, xp): + if xp is clpy: + raise Exception(self.tbs.get(clpy)) + elif xp is numpy: + raise ValueError(self.tbs.get(numpy)) + + # `Exception` is not derived from `ValueError`, therefore expect an + # error + pattern = re.compile( + self.tbs.get(clpy) + '.*' + self.tbs.get(numpy), re.S) + with six.assertRaisesRegex(self, AssertionError, pattern): + dummy_numpy_derived_unaccept_error(self) def test_forbidden_error(self): - clpy_error = Exception() - clpy_tb = 'xxxx' - numpy_error = Exception() - numpy_tb = 'yyyy' - # Use re.S mode to ignore new line characters - pattern = re.compile(clpy_tb + '.*' + numpy_tb, re.S) + @testing.helper.numpy_clpy_raises(accept_error=False) + def dummy_forbidden_error(self, xp): + raise Exception(self.tbs.get(xp)) + + pattern = re.compile( + self.tbs.get(clpy) + '.*' + self.tbs.get(numpy), re.S) with six.assertRaisesRegex(self, AssertionError, pattern): - helper._check_clpy_numpy_error( - self, clpy_error, clpy_tb, - numpy_error, numpy_tb, accept_error=False) + dummy_forbidden_error(self) class NumPyCuPyDecoratorBase(object):