![]() |
Eigen-unsupported
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Mehdi Goli Codeplay Software Ltd. 00005 // Ralph Potter Codeplay Software Ltd. 00006 // Luke Iwanski Codeplay Software Ltd. 00007 // Cummins Chris PhD student at The University of Edinburgh. 00008 // Contact: <eigen@codeplay.com> 00009 // 00010 // This Source Code Form is subject to the terms of the Mozilla 00011 // Public License v. 2.0. If a copy of the MPL was not distributed 00012 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00013 00014 /***************************************************************** 00015 * TensorSyclRun.h 00016 * 00017 * \brief: 00018 * Schedule_kernel invoke an specialised version of kernel struct. The 00019 * specialisation is based on the data dimension in sycl buffer 00020 * 00021 *****************************************************************/ 00022 00023 #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 00024 #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 00025 00026 namespace Eigen { 00027 namespace TensorSycl { 00032 template <typename Expr, typename Dev> 00033 void run(Expr &expr, Dev &dev) { 00034 Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev); 00035 const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); 00036 if (needs_assign) { 00037 typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr; 00038 auto functors = internal::extractFunctors(evaluator); 00039 00040 size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; 00041 dev.m_queue.submit([&](cl::sycl::handler &cgh) { 00042 00043 // create a tuple of accessors from Evaluator 00044 auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator); 00045 const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; 00046 size_t GRange=range; 00047 if (tileSize>GRange) tileSize=GRange; 00048 else if(GRange>tileSize){ 00049 size_t xMode = GRange % tileSize; 00050 if (xMode != 0) GRange += (tileSize - xMode); 00051 } 00052 // run the kernel 00053 cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { 00054 typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr; 00055 auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); 00056 auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice()); 00057 if (itemID.get_global_linear_id() < range) { 00058 device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id())); 00059 } 00060 }); 00061 }); 00062 dev.m_queue.throw_asynchronous(); 00063 } 00064 00065 evaluator.cleanup(); 00066 } 00067 } // namespace TensorSycl 00068 } // namespace Eigen 00069 00070 #endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP