diff options
author | paveldyakov <70019067+paveldyakov@users.noreply.github.com> | 2021-03-03 10:10:38 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-03 10:10:38 +0300 |
commit | 39a58022a0eb67a8a8b63fdd46d9bfc3acaa9425 (patch) | |
tree | f045494b1f7ae1d84022fbb8173e0572033aeb27 | |
parent | Will add retry/try catch protection for UB18 test (#134) (diff) | |
download | llvm-project-39a58022a0eb67a8a8b63fdd46d9bfc3acaa9425.tar.gz llvm-project-39a58022a0eb67a8a8b63fdd46d9bfc3acaa9425.tar.bz2 llvm-project-39a58022a0eb67a8a8b63fdd46d9bfc3acaa9425.zip |
[RNG] Add optimizations (#138)
[RNG] Add optimizations
7 files changed, 330 insertions, 116 deletions
diff --git a/include/oneapi/dpl/internal/random_impl/discard_block_engine.h b/include/oneapi/dpl/internal/random_impl/discard_block_engine.h index eb891a71e5e6..c1b5ba8667ca 100644 --- a/include/oneapi/dpl/internal/random_impl/discard_block_engine.h +++ b/include/oneapi/dpl/internal/random_impl/discard_block_engine.h @@ -82,25 +82,42 @@ class discard_block_engine if (!__num_to_skip) return; - for (; __num_to_skip > 0; --__num_to_skip) - generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>(); + if (__num_to_skip < (used_block - n_)) + { + n_ += __num_to_skip; + engine_.discard(__num_to_skip); + } + else + { + unsigned long long __n_skip = + __num_to_skip + static_cast<unsigned long long>((__num_to_skip + n_) / used_block) * + static_cast<unsigned long long>(block_size - used_block); + // Check the oveflow case + if (__n_skip >= __num_to_skip) + { + n_ = (__num_to_skip - (used_block - n_)) % used_block; + engine_.discard(__n_skip); + } + else + { + for (; __num_to_skip > 0; --__num_to_skip) + operator()(); + } + } } // operator () returns bits of engine recurrence result_type operator()() { - result_type res = generate_internal<internal::type_traits_t<result_type>::num_elems>(); - - return res; + return generate_internal<internal::type_traits_t<result_type>::num_elems>(); } // operator () overload for result portion generation result_type - operator()(unsigned int __randoms_num) + operator()(unsigned int __random_nums) { - result_type res = generate_internal<internal::type_traits_t<result_type>::num_elems>(__randoms_num); - return res; + return generate_internal<internal::type_traits_t<result_type>::num_elems>(__random_nums); } // Property function @@ -154,25 +171,36 @@ class discard_block_engine generate_internal() { result_type __res; - for (int __i = 0; __i < _N; ++__i) + if (_N < (used_block - n_)) { - __res[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>(); + __res = engine_(); + n_ += _N; + } + else + { + for (int __i = 0; __i < _N; ++__i) + { + __res[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>(); + } } - return __res; } template <int _N> typename ::std::enable_if<(_N > 0), result_type>::type - generate_internal(unsigned int __randoms_num) + generate_internal(unsigned int __random_nums) { - result_type __res; - for (unsigned int __i = 0; __i < __randoms_num; ++__i) + if (__random_nums >= _N) + return operator()(); + + result_type __part_vec; + + for (unsigned int __i = 0; __i < __random_nums; ++__i) { - __res[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>(); + __part_vec[__i] = generate_internal_scalar<internal::type_traits_t<result_type>::num_elems>(); } - return __res; + return __part_vec; } _Engine engine_; diff --git a/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h b/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h index 96efa8ea94da..d5a7d418fda0 100644 --- a/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h +++ b/include/oneapi/dpl/internal/random_impl/linear_congruential_engine.h @@ -75,9 +75,9 @@ class linear_congruential_engine // Skipping sequence if (__num_to_skip == 0) return; - constexpr bool flag = (increment == 0) && (modulus < ::std::numeric_limits<::std::uint32_t>::max()) && - (multiplier < ::std::numeric_limits<::std::uint32_t>::max()); - skip_seq<internal::type_traits_t<result_type>::num_elems, flag>(__num_to_skip); + constexpr bool __flag = (increment == 0) && (modulus < ::std::numeric_limits<::std::uint32_t>::max()) && + (multiplier < ::std::numeric_limits<::std::uint32_t>::max()); + skip_seq<internal::type_traits_t<result_type>::num_elems, __flag>(__num_to_skip); } // operator () returns bits of engine recurrence @@ -94,9 +94,9 @@ class linear_congruential_engine // operator () overload for result portion generation result_type - operator()(unsigned int __randoms_num) + operator()(unsigned int __random_nums) { - return result_portion_internal<internal::type_traits_t<result_type>::num_elems>(__randoms_num); + return result_portion_internal<internal::type_traits_t<result_type>::num_elems>(__random_nums); } private: @@ -143,7 +143,7 @@ class linear_congruential_engine state_[0] = mod_scalar(state_[0]); - for (int __i = 1u; __i < _N; __i++) + for (int __i = 1u; __i < _N; ++__i) state_[__i] = mod_scalar(state_[__i - 1u]); } @@ -154,9 +154,7 @@ class linear_congruential_engine ::std::uint64_t __a2; ::std::uint64_t __mod = static_cast<::std::uint64_t>(modulus); ::std::uint64_t __a = static_cast<::std::uint64_t>(multiplier); - scalar_type __r; - - __r = 1; + scalar_type __r = 1; do { @@ -182,7 +180,7 @@ class linear_congruential_engine typename ::std::enable_if<(_N == 0) && (_FLAG == false)>::type skip_seq(unsigned long long __num_to_skip) { - for (unsigned long long __i = 0; __i < __num_to_skip; ++__i) + for (; __num_to_skip > 0; --__num_to_skip) state_ = mod_scalar(state_); } @@ -190,7 +188,7 @@ class linear_congruential_engine typename ::std::enable_if<(_N == 1) && (_FLAG == false)>::type skip_seq(unsigned long long __num_to_skip) { - for (unsigned long long __i = 0; __i < __num_to_skip; ++__i) + for (; __num_to_skip > 0; --__num_to_skip) state_[0] = mod_scalar(state_[0]); } @@ -198,11 +196,11 @@ class linear_congruential_engine typename ::std::enable_if<(_N > 1) && (_FLAG == false)>::type skip_seq(unsigned long long __num_to_skip) { - for (unsigned long long __i = 0; __i < __num_to_skip; ++__i) + for (; __num_to_skip > 0; --__num_to_skip) { - for (int __j = 0; __j < (_N - 1); ++__j) + for (int __i = 0; __i < (_N - 1); ++__i) { - state_[__j] = state_[__j + 1]; + state_[__i] = state_[__i + 1]; } state_[_N - 1] = mod_scalar(state_[_N - 2]); } @@ -232,24 +230,24 @@ class linear_congruential_engine { ::std::uint64_t __mod = modulus, __inc = increment; ::std::uint64_t __mult = pow_mult_n(__num_to_skip); - for (unsigned int __i = 0; __i < _N; ++__i) + for (int __i = 0; __i < _N; ++__i) state_[__i] = static_cast<scalar_type>(((__mult * static_cast<::std::uint64_t>(state_[__i])) % __mod)); } // result_portion implementation template <int _N> typename ::std::enable_if<(_N > 0), result_type>::type - result_portion_internal(unsigned int __randoms_num) + result_portion_internal(unsigned int __random_nums) { result_type __part_vec; - if (__randoms_num < 1) - return __part_vec; - unsigned int __num_to_gen = (__randoms_num > _N) ? _N : __randoms_num; - for (unsigned int __i = 0; __i < __num_to_gen; ++__i) + if (__random_nums >= _N) + return operator()(); + + for (unsigned int __i = 0; __i < __random_nums; ++__i) __part_vec[__i] = state_[__i]; - discard(__num_to_gen); + discard(__random_nums); return __part_vec; } diff --git a/include/oneapi/dpl/internal/random_impl/normal_distribution.h b/include/oneapi/dpl/internal/random_impl/normal_distribution.h index 60ef897cfc4c..08615a6c3b95 100644 --- a/include/oneapi/dpl/internal/random_impl/normal_distribution.h +++ b/include/oneapi/dpl/internal/random_impl/normal_distribution.h @@ -118,23 +118,16 @@ class normal_distribution template <class _Engine> result_type - operator()(_Engine& __engine, unsigned int __randoms_num) + operator()(_Engine& __engine, unsigned int __random_nums) { - return operator()<_Engine>(__engine, param_type(mean_, stddev_), __randoms_num); + return operator()<_Engine>(__engine, param_type(mean_, stddev_), __random_nums); } template <class _Engine> result_type - operator()(_Engine& __engine, const param_type& __params, unsigned int __randoms_num) + operator()(_Engine& __engine, const param_type& __params, unsigned int __random_nums) { - result_type __part_vec; - if (__randoms_num < 1) - return __part_vec; - - int __portion = (__randoms_num > size_of_type_) ? size_of_type_ : __randoms_num; - - __part_vec = result_portion_internal<size_of_type_, _Engine>(__engine, __params, __portion); - return __part_vec; + return result_portion_internal<size_of_type_, _Engine>(__engine, __params, __random_nums); } private: @@ -161,14 +154,14 @@ class normal_distribution // Callback function template <typename _Type = float> - scalar_type + inline scalar_type callback() { return ((scalar_type*)(internal::gaussian_sp_table))[1]; } template <> - scalar_type + inline scalar_type callback<double>() { return ((scalar_type*)(internal::gaussian_dp_table))[1]; @@ -176,14 +169,14 @@ class normal_distribution // Get 2 * pi function template <typename _Type = float> - scalar_type + inline scalar_type pi2() { return ((scalar_type*)(internal::gaussian_sp_table))[0]; } template <> - scalar_type + inline scalar_type pi2<double>() { return ((scalar_type*)(internal::gaussian_dp_table))[0]; @@ -194,9 +187,10 @@ class normal_distribution typename ::std::enable_if<(_Ndistr != 0), result_type>::type generate(_Engine& __engine, const param_type __params) { - return generate_vec_internal(__engine, __params, _Ndistr); + return generate_vec<_Ndistr, _Engine>(__engine, __params); } + // Specialization of the scalar generation template <int _Ndistr, class _Engine> typename ::std::enable_if<(_Ndistr == 0), result_type>::type generate(_Engine& __engine, const param_type __params) @@ -232,14 +226,86 @@ class normal_distribution return __res; } - // Implementation for the generate vector function + // Specialization of the vector generation with size = [1; 3] + template <int __N, class _Engine> + typename ::std::enable_if<(__N <= 3), result_type>::type + generate_vec(_Engine& __engine, const param_type __params) + { + return generate_n_elems<_Engine>(__engine, __params, __N); + } + + // Specialization of the vector generation with size = [4; 8; 16] + template <int __N, class _Engine> + typename ::std::enable_if<(__N > 3), result_type>::type + generate_vec(_Engine& __engine, const param_type __params) + { + uniform_result_type __u; + scalar_type __mean = __params.first, __stddev = __params.second; + result_type __res; + + constexpr unsigned int __vec_size = __N / 2; + sycl::vec<scalar_type, __vec_size> __sin, __cos; + sycl::vec<scalar_type, __vec_size> __u1_transformed; + + __u = uniform_real_distribution_(__engine, + param_type(static_cast<scalar_type>(0.0), static_cast<scalar_type>(1.0)), __N); + + sycl::vec<scalar_type, __vec_size> __u1 = __u.even(); + sycl::vec<scalar_type, __vec_size> __u2 = __u.odd(); + + // Calculate sycl::log with callback + __u1_transformed = + select(sycl::log(__u1), sycl::vec<scalar_type, __vec_size>{callback<scalar_type>()}, + sycl::isequal(__u1, sycl::vec<scalar_type, __vec_size>{static_cast<scalar_type>(0.0)})); + + // Get sincos + __sin = sycl::sincos(pi2<scalar_type>() * __u2, &__cos); + + if (!flag_) + { + __u1_transformed = sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed); + __res.even() = __u1_transformed * __sin * __stddev + __mean; + __res.odd() = __u1_transformed * __cos * __stddev + __mean; + + // Flag is still false as code-branch for 4/8/16 vector sizes + } + else + { + __res[0] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * saved_ln_) * + sycl::cos(pi2<scalar_type>() * saved_u2_)); + + for (int __i = 0, __j = 0; __i < __N; __i += 2, ++__j) + { + __res[__i + 1] = + (sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed[__j]) * __sin[__j]) * __stddev + + __mean; + __res[__i + 2] = + (sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed[__j]) * __cos[__j]) * __stddev + + __mean; + } + + __res[__N - 1] = (sycl::sqrt(static_cast<scalar_type>(-2.0) * __u1_transformed[__vec_size - 1]) * + __sin[__vec_size - 1]) * + __stddev + + __mean; + + saved_ln_ = __u1_transformed[__vec_size - 1]; + saved_u2_ = __u2[__vec_size - 1]; + + // Flag is still true as code-branch for 4/8/16 vector sizes + } + return __res; + } + + // Implementation for the N vector's elements generation template <class _Engine> result_type - generate_vec_internal(_Engine& __engine, const param_type __params, unsigned int __N) + generate_n_elems(_Engine& __engine, const param_type __params, unsigned int __N) { uniform_result_type __u; scalar_type __u1, __u2, __ln; + scalar_type __sin, __cos; scalar_type __mean = __params.first, __stddev = __params.second; result_type __res; @@ -253,11 +319,12 @@ class normal_distribution { __u1 = __u[__i]; __u2 = __u[__i + 1]; + + __sin = sycl::sincos(pi2<scalar_type>() * __u2, &__cos); + __ln = (__u1 == static_cast<scalar_type>(0.0)) ? callback<scalar_type>() : sycl::log(__u1); - __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * - sycl::sin(pi2<scalar_type>() * __u2)); - __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * - sycl::cos(pi2<scalar_type>() * __u2)); + __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __sin); + __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __cos); } if (__tail) { @@ -286,13 +353,13 @@ class normal_distribution for (unsigned int __i = 1; __i < (__N - __tail); __i += 2) { + __sin = sycl::sincos(pi2<scalar_type>() * __u2, &__cos); + __u1 = __u[__i - 1]; __u2 = __u[__i]; __ln = (__u1 == static_cast<scalar_type>(0.0)) ? callback<scalar_type>() : sycl::log(__u1); - __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * - sycl::sin(pi2<scalar_type>() * __u2)); - __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * - sycl::cos(pi2<scalar_type>() * __u2)); + __res[__i] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __sin); + __res[__i + 1] = __mean + __stddev * (sycl::sqrt(-static_cast<scalar_type>(2.0) * __ln) * __cos); } if (__tail) { @@ -315,7 +382,14 @@ class normal_distribution typename ::std::enable_if<(_Ndistr != 0), result_type>::type result_portion_internal(_Engine& __engine, const param_type __params, unsigned int __N) { - return generate_vec_internal(__engine, __params, __N); + result_type __part_vec; + if (__N == 0) + return __part_vec; + else if (__N >= _Ndistr) + return operator()(__engine); + + __part_vec = generate_n_elems(__engine, __params, __N); + return __part_vec; } }; diff --git a/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h b/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h index 48831f699881..918e394c6872 100644 --- a/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h +++ b/include/oneapi/dpl/internal/random_impl/subtract_with_carry_engine.h @@ -84,18 +84,14 @@ class subtract_with_carry_engine result_type operator()() { - result_type res = generate_internal<internal::type_traits_t<result_type>::num_elems>(); - - return res; + return generate_internal<internal::type_traits_t<result_type>::num_elems>(); } // operator () overload for result portion generation result_type - operator()(unsigned int __randoms_num) + operator()(unsigned int __random_nums) { - result_type __res = generate_internal<internal::type_traits_t<result_type>::num_elems>(__randoms_num); - - return __res; + return result_portion_internal<internal::type_traits_t<result_type>::num_elems>(__random_nums); } private: @@ -142,12 +138,14 @@ class subtract_with_carry_engine } else { - x_[i_] = max() - x_[__id_2] - c_ + x_[__id_1] + 1u; + x_[__id] = max() - x_[__id_2] - c_ + x_[__id_1] + 1u; c_ = 1; } + i_++; if (i_ >= long_lag) i_ = 0; + return x_[__id]; }; @@ -171,17 +169,22 @@ class subtract_with_carry_engine return __res; } + // result_portion implementation template <int _N> typename ::std::enable_if<(_N > 0), result_type>::type - generate_internal(unsigned int __randoms_num) + result_portion_internal(unsigned int __random_nums) { - result_type __res; - for (unsigned int __i = 0; __i < __randoms_num; ++__i) + result_type __part_vec; + + if (__random_nums >= _N) + return operator()(); + + for (unsigned int __i = 0; __i < __random_nums; ++__i) { - __res[__i] = generate_internal_scalar(); + __part_vec[__i] = generate_internal_scalar(); } - return __res; + return __part_vec; } scalar_type x_[long_lag]; diff --git a/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h b/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h index 3954ce411764..9e4b6530ba03 100644 --- a/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h +++ b/include/oneapi/dpl/internal/random_impl/uniform_int_distribution.h @@ -104,23 +104,16 @@ class uniform_int_distribution // Generation by portion template <class _Engine> result_type - operator()(_Engine& __engine, unsigned int __randoms_num) + operator()(_Engine& __engine, unsigned int __random_nums) { - return operator()<_Engine>(__engine, param_type(a_, b_), __randoms_num); + return operator()<_Engine>(__engine, param_type(a_, b_), __random_nums); } template <class _Engine> result_type - operator()(_Engine& __engine, const param_type& __params, unsigned int __randoms_num) + operator()(_Engine& __engine, const param_type& __params, unsigned int __random_nums) { - result_type __part_vec; - if (__randoms_num < 1) - return __part_vec; - - unsigned int __portion = (__randoms_num > size_of_type_) ? size_of_type_ : __randoms_num; - - __part_vec = result_portion_internal<size_of_type_, _Engine>(__engine, __params, __portion); - return __part_vec; + return result_portion_internal<size_of_type_, _Engine>(__engine, __params, __random_nums); } private: @@ -150,7 +143,11 @@ class uniform_int_distribution __engine, ::std::pair<double, double>(static_cast<double>(__params.first), static_cast<double>(__params.second) + 1.0)); - return __res.template convert<scalar_type, sycl::rounding_mode::rte>(); + result_type __res_ret; + for (int __i = 0; __i < _Ndistr; ++__i) + __res_ret[__i] = static_cast<scalar_type>(__res[__i]); + + return __res_ret; } template <int _Ndistr, class _Engine> @@ -169,13 +166,22 @@ class uniform_int_distribution typename ::std::enable_if<(_Ndistr != 0), result_type>::type result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N) { + result_type __part_vec; + if (__N == 0) + return __part_vec; + else if (__N >= _Ndistr) + return operator()(__engine); + RealType __res = uniform_real_distribution_(__engine, ::std::pair<double, double>(static_cast<double>(__params.first), static_cast<double>(__params.second) + 1.0), __N); - return __res.template convert<scalar_type, sycl::rounding_mode::rte>(); + for (unsigned int __i = 0; __i < __N; ++__i) + __part_vec[__i] = static_cast<scalar_type>(__res[__i]); + + return __part_vec; } }; diff --git a/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h b/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h index 3c4bf1803084..8aa4ea2aa831 100644 --- a/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h +++ b/include/oneapi/dpl/internal/random_impl/uniform_real_distribution.h @@ -98,33 +98,23 @@ class uniform_real_distribution result_type operator()(_Engine& __engine, const param_type& __params) { - result_type __res = - generate<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems, _Engine>( - __engine, __params); - return __res; + return generate<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems, _Engine>( + __engine, __params); } template <class _Engine> result_type - operator()(_Engine& __engine, unsigned int __randoms_num) + operator()(_Engine& __engine, unsigned int __random_nums) { - return operator()<_Engine>(__engine, param_type(a_, b_), __randoms_num); + return operator()<_Engine>(__engine, param_type(a_, b_), __random_nums); } template <class _Engine> result_type - operator()(_Engine& __engine, const param_type& __params, unsigned int __randoms_num) + operator()(_Engine& __engine, const param_type& __params, unsigned int __random_nums) { - result_type __part_vec; - if (__randoms_num < 1) - return __part_vec; - - unsigned int __portion = (__randoms_num > size_of_type_) ? size_of_type_ : __randoms_num; - - __part_vec = - result_portion_internal<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems, - _Engine>(__engine, __params, __portion); - return __part_vec; + return result_portion_internal<size_of_type_, internal::type_traits_t<typename _Engine::result_type>::num_elems, + _Engine>(__engine, __params, __random_nums); } private: @@ -145,7 +135,11 @@ class uniform_real_distribution generate(_Engine& __engine, const param_type& __params) { auto __engine_output = __engine(); - auto __res = __engine_output.template convert<scalar_type, sycl::rounding_mode::rte>(); + result_type __res; + + for (int __i = 0; __i < _Ndistr; ++__i) + __res[__i] = static_cast<scalar_type>(__engine_output[__i]); + __res = ((__res - __engine.min()) / (1 + static_cast<scalar_type>(__engine.max() - __engine.min()))) * (__params.second - __params.first) + __params.first; @@ -200,7 +194,7 @@ class uniform_real_distribution { sycl::vec<scalar_type, _Ndistr> __res; int __i; - int __tail_size = _Ndistr % _Negnine; + constexpr int __tail_size = _Ndistr % _Negnine; for (__i = 0; __i < _Ndistr; __i += _Negnine) { auto __engine_output = __engine(); @@ -248,11 +242,11 @@ class uniform_real_distribution // Implementation for result_portion function template <int _Ndistr, int _Negnine, class _Engine> typename ::std::enable_if<((_Ndistr <= _Negnine) & (_Ndistr != 0)), result_type>::type - result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N) + generate_n_elems(_Engine& __engine, const param_type& __params, unsigned int __N) { auto __engine_output = __engine(__N); result_type __res; - for (unsigned int __i = 0; __i < __N; ++__i) + for (int __i = 0; __i < __N; ++__i) { __res[__i] = static_cast<scalar_type>(__engine_output[__i]); __res[__i] = @@ -266,15 +260,15 @@ class uniform_real_distribution template <int _Ndistr, int _Negnine, class _Engine> typename ::std::enable_if<((_Ndistr > _Negnine) & (_Negnine != 0)), result_type>::type - result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N) + generate_n_elems(_Engine& __engine, const param_type& __params, unsigned int __N) { result_type __res; - unsigned int __i; + int __i; if (_Negnine >= __N) { auto __engine_output = __engine(__N); - for (unsigned int __i = 0; __i < __N; ++__i) + for (__i = 0; __i < __N; ++__i) { __res[__i] = static_cast<scalar_type>(__engine_output[__i]); __res[__i] = @@ -321,10 +315,10 @@ class uniform_real_distribution template <int _Ndistr, int _Negnine, class _Engine> typename ::std::enable_if<((_Ndistr > _Negnine) & (_Negnine == 0)), result_type>::type - result_portion_internal(_Engine& __engine, const param_type& __params, unsigned int __N) + generate_n_elems(_Engine& __engine, const param_type& __params, unsigned int __N) { result_type __res; - for (unsigned int __i = 0; __i < __N; ++__i) + for (int __i = 0; __i < __N; ++__i) { __res[__i] = static_cast<scalar_type>(__engine()); __res[__i] = @@ -335,6 +329,21 @@ class uniform_real_distribution return __res; } + + // Implementation for result_portion function + template <int _Ndistr, int _Negnine, class _Engine> + typename ::std::enable_if<(_Ndistr != 0), result_type>::type + result_portion_internal(_Engine& __engine, const param_type __params, unsigned int __N) + { + result_type __part_vec; + if (__N == 0) + return __part_vec; + else if (__N >= _Ndistr) + return operator()(__engine); + + __part_vec = generate_n_elems<_Ndistr, _Negnine, _Engine>(__engine, __params, __N); + return __part_vec; + } }; } // namespace dpl diff --git a/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp b/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp index cf28e6f869f8..4725de219ae2 100644 --- a/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp +++ b/test/rng_testsuite/statistics_tests/normal_distribution_test.pass.cpp @@ -215,6 +215,72 @@ int test_portion(oneapi::dpl::internal::element_type_t<RealType> mean, oneapi::d } template<class RealType, class UIntType> +int test_flag(oneapi::dpl::internal::element_type_t<RealType> mean, oneapi::dpl::internal::element_type_t<RealType> stddev, int nsamples) { + + sycl::queue queue(sycl::default_selector{}); + + // memory allocation + std::vector<oneapi::dpl::internal::element_type_t<RealType>> std_samples(nsamples); + std::vector<oneapi::dpl::internal::element_type_t<RealType>> dpstd_samples(nsamples); + + constexpr int num_elems = oneapi::dpl::internal::type_traits_t<RealType>::num_elems == 0 ? 1 : oneapi::dpl::internal::type_traits_t<RealType>::num_elems; + constexpr int num_to_skip = (num_elems + 1) % 2 ? (num_elems + 2) : (num_elems + 1); + + // dpstd generation + { + sycl::buffer<oneapi::dpl::internal::element_type_t<RealType>, 1> dpstd_buffer(dpstd_samples.data(), nsamples); + + queue.submit([&](sycl::handler &cgh) { + auto dpstd_acc = dpstd_buffer.template get_access<sycl::access::mode::write>(cgh); + + cgh.parallel_for<>(sycl::range<1>(nsamples / (num_elems + 1)), + [=](sycl::item<1> idx) { + + unsigned long long offset = idx.get_linear_id() * num_to_skip; + oneapi::dpl::linear_congruential_engine<UIntType, a, c, m> engine(seed, offset); + oneapi::dpl::normal_distribution<RealType> distr(mean, stddev); + + // Generate the first element + auto res_1 = distr(engine, 1); + + // Generate the rest elements + auto res = distr(engine); + + dpstd_acc[idx * (num_elems + 1)] = res_1[0]; + for(int i = 0; i < num_elems; ++i) + dpstd_acc[idx * (num_elems + 1) + i + 1] = res[i]; + }); + }); + queue.wait(); + } + + // std generation + generate_std<oneapi::dpl::internal::element_type_t<UIntType>, oneapi::dpl::internal::element_type_t<RealType>> + (num_elems + 1, nsamples, mean, stddev, std_samples); + + // comparison + int err = 0; + for(int i = 0; i < nsamples; ++i) { + if (abs(std_samples[i] - dpstd_samples[i]) > eps) { + std::cout << "\nError: std_sample[" << i << "] = " << std_samples[i] << ", dpstd_samples[" << i << "] = " << dpstd_samples[i]; + err++; + } + } + + // statistics check + err += statistics_check(nsamples, mean, stddev, dpstd_samples); + + if(err) { + std::cout << "\tFailed" << std::endl; + } + else { + std::cout << "\tPassed" << std::endl; + } + + return err; +} + +template<class RealType, class UIntType> int tests_set(int nsamples) { constexpr int nparams = 2; @@ -254,6 +320,20 @@ int tests_set_portion(std::int32_t nsamples, unsigned int part) { return 0; } +template<class RealType, class UIntType> +int tests_set_flag(int nsamples) { + + int err; + // Test for all non-zero parameters + std::cout << "normal_distribution test<type>, mean = " << 0.0 << ", stddev = " << 1.0 << + ", nsamples = " << nsamples << ", flag = true, vec_size = " << oneapi::dpl::internal::type_traits_t<RealType>::num_elems; + err = test_flag<RealType, UIntType>(0.0, 1.0, nsamples); + if (err) + return 1; + + return 0; +} + #endif // _ONEDPL_BACKEND_SYCL int main() { @@ -353,6 +433,22 @@ int main() { return 1; } + + // testing flag = true case + std::cout << "----------------------------------------------------------------------" << std::endl; + std::cout << "Float vector sizes = [1, 2, 3, 4, 8, 16], flag = true" << std::endl; + std::cout << "----------------------------------------------------------------------" << std::endl; + err = tests_set_flag<sycl::vec<float, 1>, sycl::vec<std::uint32_t, 1>>(160); + err = tests_set_flag<sycl::vec<float, 2>, sycl::vec<std::uint32_t, 2>>(99); + err = tests_set_flag<sycl::vec<float, 3>, sycl::vec<std::uint32_t, 3>>(160); + err = tests_set_flag<sycl::vec<float, 4>, sycl::vec<std::uint32_t, 4>>(100); + err = tests_set_flag<sycl::vec<float, 8>, sycl::vec<std::uint32_t, 8>>(99); + err = tests_set_flag<sycl::vec<float, 16>, sycl::vec<std::uint32_t, 16>>(170); + if(err) { + std::cout << "Test FAILED" << std::endl; + return 1; + } + #else std::cout << "\tTest is skipped for non-SYCL backend" << std::endl; #endif // _ONEDPL_BACKEND_SYCL |