From fbeecd1598168ae8fad3994236cf450ff4f0e280 Mon Sep 17 00:00:00 2001 From: tbbdev Date: Fri, 29 Sep 2017 16:10:50 +0300 Subject: [PATCH] Committing Parallel STL 20170726 open source release --- .gitattributes | 45 + .gitignore | 86 + CHANGES | 55 + LICENSE | 201 +++ README.md | 43 +- doc/Release_Notes.txt | 102 ++ examples/convex_hull/Makefile | 59 + examples/convex_hull/Makefile.windows | 48 + examples/convex_hull/convex_hull.cpp | 168 ++ examples/convex_hull/msvs/convex_hull.sln | 28 + examples/convex_hull/msvs/convex_hull.vcxproj | 162 ++ examples/convex_hull/readme.html | 399 +++++ examples/convex_hull/utils.h | 78 + .../convex_hull.xcodeproj/project.pbxproj | 294 ++++ examples/dot_product/Makefile | 59 + examples/dot_product/Makefile.windows | 48 + examples/dot_product/dot_product.cpp | 56 + examples/dot_product/msvs/dot_product.sln | 28 + examples/dot_product/msvs/dot_product.vcxproj | 159 ++ examples/dot_product/readme.html | 392 +++++ .../dot_product.xcodeproj/project.pbxproj | 293 ++++ examples/gamma_correction/Makefile | 59 + examples/gamma_correction/Makefile.windows | 48 + .../gamma_correction/gamma_correction.cpp | 98 ++ .../msvs/gamma_correction.sln | 28 + .../msvs/gamma_correction.vcxproj | 163 ++ examples/gamma_correction/readme.html | 403 +++++ examples/gamma_correction/utils.cpp | 107 ++ examples/gamma_correction/utils.h | 105 ++ .../project.pbxproj | 304 ++++ include/pstl/_internal/algorithm_impl.h | 1527 +++++++++++++++++ include/pstl/_internal/common.h | 153 ++ .../pstl/_internal/execution_policy_impl.h | 138 ++ include/pstl/_internal/memory_impl.h | 415 +++++ include/pstl/_internal/numeric_impl.h | 213 +++ include/pstl/_internal/parallel_impl_tbb.h | 667 +++++++ include/pstl/_internal/pstl_config.h | 122 ++ include/pstl/_internal/simd_impl.h | 391 +++++ include/pstl/algorithm | 888 ++++++++++ include/pstl/execution | 132 ++ include/pstl/iterators.h | 210 +++ include/pstl/memory | 138 ++ include/pstl/numeric | 187 ++ linux/pstlvars.csh | 70 + linux/pstlvars.sh | 64 + mac/pstlvars.csh | 31 + mac/pstlvars.sh | 31 + windows/pstlvars.bat | 70 + 48 files changed, 9564 insertions(+), 1 deletion(-) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 CHANGES create mode 100644 LICENSE create mode 100644 doc/Release_Notes.txt create mode 100644 examples/convex_hull/Makefile create mode 100644 examples/convex_hull/Makefile.windows create mode 100644 examples/convex_hull/convex_hull.cpp create mode 100644 examples/convex_hull/msvs/convex_hull.sln create mode 100644 examples/convex_hull/msvs/convex_hull.vcxproj create mode 100644 examples/convex_hull/readme.html create mode 100644 examples/convex_hull/utils.h create mode 100644 examples/convex_hull/xcode/convex_hull.xcodeproj/project.pbxproj create mode 100644 examples/dot_product/Makefile create mode 100644 examples/dot_product/Makefile.windows create mode 100644 examples/dot_product/dot_product.cpp create mode 100644 examples/dot_product/msvs/dot_product.sln create mode 100644 examples/dot_product/msvs/dot_product.vcxproj create mode 100644 examples/dot_product/readme.html create mode 100644 examples/dot_product/xcode/dot_product.xcodeproj/project.pbxproj create mode 100644 examples/gamma_correction/Makefile create mode 100644 examples/gamma_correction/Makefile.windows create mode 100644 examples/gamma_correction/gamma_correction.cpp create mode 100644 examples/gamma_correction/msvs/gamma_correction.sln create mode 100644 examples/gamma_correction/msvs/gamma_correction.vcxproj create mode 100644 examples/gamma_correction/readme.html create mode 100644 examples/gamma_correction/utils.cpp create mode 100644 examples/gamma_correction/utils.h create mode 100644 examples/gamma_correction/xcode/gamma_correction.xcodeproj/project.pbxproj create mode 100644 include/pstl/_internal/algorithm_impl.h create mode 100644 include/pstl/_internal/common.h create mode 100644 include/pstl/_internal/execution_policy_impl.h create mode 100644 include/pstl/_internal/memory_impl.h create mode 100644 include/pstl/_internal/numeric_impl.h create mode 100644 include/pstl/_internal/parallel_impl_tbb.h create mode 100644 include/pstl/_internal/pstl_config.h create mode 100644 include/pstl/_internal/simd_impl.h create mode 100644 include/pstl/algorithm create mode 100644 include/pstl/execution create mode 100644 include/pstl/iterators.h create mode 100644 include/pstl/memory create mode 100644 include/pstl/numeric create mode 100644 linux/pstlvars.csh create mode 100644 linux/pstlvars.sh create mode 100644 mac/pstlvars.csh create mode 100644 mac/pstlvars.sh create mode 100644 windows/pstlvars.bat diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..039edb3a9a5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,45 @@ +# Set the default behavior, in case people don't have core.autocrlf set. +* text=auto + +# Explicitly declare text files you want to always be normalized and converted +# to native line endings on checkout. +*.c text +*.h text +*.cpp text +*.def text +*.rc text +*.i text +*.sh text +*.csh text +*.mk text +*.java text +*.csv text +*.lst text +*.asm text +*.cfg text +*.css text +*.inc text +*.js text +*.rb text +*.strings text +*.txt text +*export.lst text +*.xml text +*.py text +*.md text +*.classpath text +*.cproject text +*.project text +*.properties text +*.java text +*.gradle text + +# Declare files that will always have CRLF line endings on checkout. +*.sln text eol=crlf +*.bat text eol=crlf + +# Denote all files that are truly binary and should not be modified. +*.png binary +*.jpg binary +*.ico binary +*.spir binary diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..0182b1411bf --- /dev/null +++ b/.gitignore @@ -0,0 +1,86 @@ +# Ignore the debug and release directories created with Makefile builds # +######################################################################### +build/*_debug/ +build/*_release/ + +# Compiled source # +################### +*.com +*.class +*.dll +*.lib +*.pdb +*.exe +*.o +*.so +*.so.1 +*.so.2 +*.dylib +*.a +*.obj +*.pyc + +*.orig +*.raw +*.sample +*.slo +*.swp +*.config +*.la +*.lai +*.lo +*.nhdr +*.nii.gz +*.nrrd + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.tgz +*.zip + +# Logs and databases # +###################### +*.log +*.sql +*.sqlite + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# IDE generated files # +###################### +/.ninja_deps +/.ninja_log +/build.ninja +/rules.ninja +*~ +.emacs.desktop + +# Build system generated files # +################################ +CMakeCache.txt +CMakeFiles/ + +# Other # +######### +.clang_complete +.idea +.svn +crash* +*.tmp diff --git a/CHANGES b/CHANGES new file mode 100644 index 00000000000..330c0602b80 --- /dev/null +++ b/CHANGES @@ -0,0 +1,55 @@ +------------------------------------------------------------------------ +Parallel STL release within Intel(R) Parallel Studio XE 2018 + +Features / APIs: + +- Aligned the implementation with the draft N4659 of the C++ standard. + In particular, inner_product no longer supports execution policies. +- reduce and transform_reduce support unseq and par_unseq execution + policies if std::plus<> is used for reduction. +- Added counting_iterator and zip_iterator to support advanced use cases. + To use, include pstl/iterators.h header file. +- Added macro PSTL_USE_PARALLEL_POLICIES to control usage of parallel + execution policies (par, par_unseq); by default these are enabled. + +Examples: + +- Added dot_product and convex_hull samples to demonstrate Parallel STL + usage. + +------------------------------------------------------------------------ +Parallel STL beta update release + +Features / APIs: + +- sequenced_policy is supported by all C++17 algorithms. +- Added other execution policies support for inner_product algorithm. + +------------------------------------------------------------------------ +Parallel STL beta initial release + +Features / APIs: + +- Implemented C++17 execution policies: sequenced_policy, + parallel_policy, parallel_unsequenced_policy. +- Implemented unsequenced_policy based on the ISO C++ working group + paper P0076R3. +- sequenced_policy is supported by all except a few C++17 algorithms. +- Other execution policies are supported by the following algorithms: + adjacent_find, all_of, any_of, copy, copy_if, copy_n, count, + count_if, equal, exclusive_scan, fill, fill_n, find, find_if, + find_if_not, for_each, for_each_n, generate, generate_n, + inclusive_scan, none_of, is_sorted, is_sorted_until, reduce, + remove_copy, remove_copy_if, sort, stable_sort, transform, + transform_exclusive_scan, transform_inclusive_scan, + transform_reduce, unique_copy. + +Documentation: + +- Initial set of documents: Getting Started Guide, Release Notes. + +Examples: + +- Added gamma_correction sample to demonstrate Parallel STL usage. + +------------------------------------------------------------------------ diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index cdda0cf2609..9d3bfedc237 100644 --- a/README.md +++ b/README.md @@ -1 +1,42 @@ -test commit \ No newline at end of file +# Parallel STL +[![Stable release](https://img.shields.io/badge/version-20170726-green.svg)](https://github.com/intel/parallestl/releases/tag/20170726) +[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE) + +Parallel STL is an implementation of the C++ standard library algorithms with support for execution policies, +as specified in the working draft N4659 for the next version of the C++ standard, commonly called C++17. +The implementation also supports the unsequenced execution policy specified in the ISO* C++ working group paper P0076R3. + +Parallel STL offers a portable implementation of threaded and vectorized execution of standard C++ algorithms, optimized and validated for Intel(R) 64 processors. +For sequential execution, it relies on an available implementation of the C++ standard library. + +## Prerequisites +To use Parallel STL, you must have the following software installed: +* C++ compiler with: + * Support for C++11 + * Support for OpenMP* 4.0 SIMD constructs +* Intel(R) Threading Building Blocks (Intel(R) TBB) which is available to download in the GitHub [repository](https://github.com/01org/tbb/) + +## Release Information +Here are the latest [Changes](CHANGES) and [Release Notes](doc/Release_Notes.txt) (contains system requirements and known issues). + +## License +Parallel STL is licensed under [Apache License Version 2.0](LICENSE). + +## Documentation +[Getting Started](https://software.intel.com/en-us/get-started-with-pstl) with Parallel STL. + +## Support +Please report issues and suggestions via +[GitHub issues](https://github.com/intel/parallelstl/issues) or start a topic on the +[Intel(R) TBB forum](http://software.intel.com/en-us/forums/intel-threading-building-blocks/). + +## How to Contribute +Please, read the instructions on the official [Intel(R) TBB open source site](https://www.threadingbuildingblocks.org/submit-contribution). + +## Engineering team contacts +* [E-mail us.](mailto:inteltbbdevelopers@intel.com) + +------------------------------------------------------------------------ +Intel and the Intel logo are trademarks of Intel Corporation or its subsidiaries in the U.S. and/or other countries. + +\* Other names and brands may be claimed as the property of others. diff --git a/doc/Release_Notes.txt b/doc/Release_Notes.txt new file mode 100644 index 00000000000..c57a683b066 --- /dev/null +++ b/doc/Release_Notes.txt @@ -0,0 +1,102 @@ +------------------------------------------------------------------------ + Parallel STL +------------------------------------------------------------------------ + +System Requirements +------------------- + +Parallel STL is available as a part of Intel(R) Parallel Studio XE 2018 + and Intel(R) System Studio 2018. + + Parallel STL distributions are validated and officially supported for +the hardware, software, operating systems and compilers listed here. + +Hardware - Recommended + + Microsoft* Windows* Systems + Intel(R) Core(TM) processor family + Intel(R) Xeon(R) processor family + Intel(R) Xeon Phi(TM) processor family + Linux* Systems + Intel(R) Core(TM) processor family + Intel(R) Xeon(R) processor family + Intel(R) Xeon Phi(TM) processor family + macOS* Systems + Intel(R) Core(TM) processor family + Android* Systems + Intel(R) Atom(TM) processor family + +Hardware - Supported + + Intel(R) Pentium(R) 4 processor family + Intel(R) Xeon Phi(TM) coprocessor + Intel(R) Atom(TM) processor family + Non Intel(R) processors compatible with the above processors + +Software - Minimum Requirements + + Supported operating system (see below) + Supported compiler (see below) + +Software - Recommended + + Intel(R) Parallel Studio XE 2017, 2018 + Intel(R) System Studio 2017, 2018 + +Software - Supported Operating Systems + + Systems with Microsoft* Windows* operating systems + Microsoft* Windows* 10 + Microsoft* Windows* 8.1 + Microsoft* Windows* 7 SP1 + Microsoft* Windows* Server 2016 + Microsoft* Windows* Server 2012 R2 + Microsoft* Windows* Server 2008 R2 SP1 + Windows* Embedded 10 + Windows* Embedded 8.1 + Systems with Linux* operating systems + CentOS 7.1 + Debian* 8, 9 + Fedora* 24, 25 + Red Hat* Enterprise Linux* 7 + SuSE* Linux* Enterprise Server 12 + Ubuntu* 14.04 LTS, 16.04 LTS, 17.04 + Yocto 2.2, 2.3 + Systems with OS X* and macOS* operating systems + OS X* 10.10, 10.11 + macOS* 10.12 + Systems with Android* operating systems + Android* 5.x, 6.x, 7.x + +Software - Supported Compilers + + Intel(R) C++ Compiler 16, 17 and 18 version + Microsoft* Visual C++ 14.0 (Microsoft* Visual Studio* 2015, + Windows* OS only) + Microsoft* Visual C++ 14.1 (Microsoft* Visual Studio* 2017, + Windows* OS only) + Microsoft* Windows* Software Development Kit for Windows* 10 + For each supported Linux* operating system, the standard gcc + version provided with that operating system is supported + GNU Compilers (gcc) 4.8 - 7.1 + Clang* 3.3 - 3.8 + Xcode* 6.3 - 8.3 + Android* NDK r13 - r15b + +Known Issues or limitations + + unseq and par_unseq policies only have effect with compilers that + support '#pragma omp simd' or '#pragma simd' + Parallel and vector execution is only supported for a subset of + algorithms if random access iterators are provided, while for + the rest execution will remain serial. + Depending on a compiler, zip_iterator may not work with + unseq and par_unseq policies. + +------------------------------------------------------------------------ + +Intel, the Intel logo, Intel Core, Intel Atom, Xeon, Intel Xeon Phi, and +Pentium are trademarks of Intel Corporation in the U.S. and/or other +countries. + +* Other names and brands may be claimed as the property of others. diff --git a/examples/convex_hull/Makefile b/examples/convex_hull/Makefile new file mode 100644 index 00000000000..4395c9d3ad5 --- /dev/null +++ b/examples/convex_hull/Makefile @@ -0,0 +1,59 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# GNU Makefile that builds and runs example. +run_cmd= +PROG=convex_hull.exe +ARGS= + +CXXFLAGS += -D__PSTL_USE_TBB -std=c++11 +# The C++ compiler +ifneq (,$(shell which icc 2>/dev/null)) +CXX=icc + +ifneq (, $(filter $(target), mic)) +CXXFLAGS += -mmic +else +CXXFLAGS += -xHOST +endif +CXXFLAGS += -qopenmp-simd +endif # which icc + +ifeq ($(shell uname), Linux) +LIBS+= -lrt +else ifeq ($(shell uname), Darwin) +override CXXFLAGS += -Wl,-rpath,$(TBBROOT)/lib +endif + +all: release test + +release: *.cpp + $(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(PROG) $^ -ltbb $(LIBS) + +debug: *.cpp + $(CXX) -O0 -g -DTBB_USE_DEBUG=1 $(CXXFLAGS) -o $(PROG) $^ -ltbb_debug $(LIBS) + +clean: + $(RM) $(PROG) *.o *.d + +test: + $(run_cmd) ./$(PROG) $(ARGS) + +perf_build: release + +perf_run: test diff --git a/examples/convex_hull/Makefile.windows b/examples/convex_hull/Makefile.windows new file mode 100644 index 00000000000..d93572cfb9d --- /dev/null +++ b/examples/convex_hull/Makefile.windows @@ -0,0 +1,48 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# Common Makefile that builds and runs example. + +# Just specify your program basename +PROG=convex_hull +ARGS= + +# Trying to find if icl.exe is set +CXX1 = $(TBB_CXX)- +CXX2 = $(CXX1:icl.exe-=icl.exe) +CXX = $(CXX2:-=cl.exe) + +# The C++ compiler options +MYCXXFLAGS = /TP /QxHOST /Qopenmp-simd /EHsc /W3 /nologo /D _CONSOLE /D _MBCS /D WIN32 /D _CRT_SECURE_NO_DEPRECATE $(CXXFLAGS) +MYLDFLAGS =/INCREMENTAL:NO /NOLOGO /DEBUG /FIXED:NO $(LDFLAGS) + +all: release test +release: + $(CXX) *.cpp /MD /O2 /D NDEBUG $(MYCXXFLAGS) /link tbb.lib $(LIBS) $(MYLDFLAGS) /OUT:$(PROG).exe +debug: + $(CXX) *.cpp /MDd /Od /Zi /D TBB_USE_DEBUG /D _DEBUG $(MYCXXFLAGS) /link tbb_debug.lib $(LIBS) $(MYLDFLAGS) /OUT:$(PROG).exe +clean: + @cmd.exe /C del $(PROG).exe *.obj *.?db *.manifest +test: + $(PROG) $(ARGS) +compiler_check: + @$(CXX) >nul 2>&1 || echo "$(CXX) command not found. Check if CXX=$(CXX) is set properly" + +perf_build: release + +perf_run: test diff --git a/examples/convex_hull/convex_hull.cpp b/examples/convex_hull/convex_hull.cpp new file mode 100644 index 00000000000..e25efaf8819 --- /dev/null +++ b/examples/convex_hull/convex_hull.cpp @@ -0,0 +1,168 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +/* + This file contains the Parallel STL-based implementation of quickhull algortihm. + Quickhull algorithm description: + 1. Initial phase + 1) Find two points that guaranteed to belong to the convex hull. Min and max points in X can be used for it. + 2) Divide the initial set of points in two subsets by the line formed by two points from previous step. + This subset will be processed recursively. + + 2. Iteration Phase + 1) Divide current subset by dividing line [p1,p2] into right and left subsets. + 2) New point (p) of the convex hull is found as farthest point of right subset from the dividing line. + 3) If the right subset has more than 1 point, repeat the iteration phase with the right subset and dividing lines [p1,p] and [p,p2]. + + The implementation based on std::copy_if, std::max_element and std::minmax_element algorithms of Parallel STL. + Each of the algorithms use par_unseq policy. In order to get effect of the policy usage problem size should be big enough. + By default problem size was set as 5M points. With point set with less than 500K points par_unseq policy could be inefficient. + Correctness of the convex hull is checked by std::any_of algorithm with counting iterators. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +typedef util::point point_t; +typedef std::vector< point_t > pointVec_t; +const size_t grain_size = 500000; + +template +void find_hull_points(Policy exec, Iterator first, Iterator last, pointVec_t &H, point_t p1, point_t p2); + +//Iteration Phase based on the divide and conquer technique +template +void divide_and_conquer(Policy exec, Iterator first, Iterator last, pointVec_t &H, point_t p1, point_t fp, point_t p2) { + pointVec_t H1; + //decomposes the work and combines the results + if (first - last > grain_size){ //for small set size parallel policy could be inefficient + find_hull_points(exec, first, last, H, p1, fp); + find_hull_points(exec, first, last, H1, fp, p2); + } + else { + find_hull_points(pstl::execution::unseq, first, last, H, p1, fp); + find_hull_points(pstl::execution::unseq, first, last, H1, fp, p2); + } + H.insert(H.end(), H1.cbegin(), H1.cend()); +} + +//Find points of the convex hull on right sides of the segments [p1, p2] +template +void find_hull_points(Policy exec, Iterator first, Iterator last, pointVec_t &H, point_t p1, point_t p2) { + + pointVec_t P_reduced(last - first); + + //Find points from the range [first, last-1] that are on the right side of the segment [p1,p2] + Iterator end = std::copy_if(exec, first, last, P_reduced.begin(), + [&p1, &p2](const point_t& pnt) { + return cross_product(p1, p2, pnt) > 0; + }); + + if ((end - P_reduced.cbegin()) < 2) { + //Add points into the hull + H.push_back(p1); + H.insert(H.end(), P_reduced.cbegin(), end); + } + else { + //Find the farthest point from the segment [p1,p1], it will be in the convex hull + auto far_point = *std::max_element(exec, P_reduced.cbegin(), end, + [&p1, &p2](const point_t & pnt1, const point_t & pnt2) { + double how_far1 = cross_product(p1, p2, pnt1); + double how_far2 = cross_product(p1, p2, pnt2); + return how_far1 == how_far2 ? pnt1 < pnt2 : how_far1 < how_far2; + }); + + //Repeat for segments [p1, far_point] and [far_point, p2] with points from [P_reduced.cbegin(), end-1] + divide_and_conquer(exec, P_reduced.cbegin(), end, H, p1, far_point, p2); + } +} + +//Quickhull algorithm +//The algorihm based on the divide and conquer technique +void quickhull(const pointVec_t &points, pointVec_t &hull) { + if (points.size() < 2) { + hull.insert(hull.end(), points.cbegin(), points.cend()); + return; + } + //Find left and right most points, they will be in the convex hull + auto minmaxx = std::minmax_element(pstl::execution::par_unseq, points.cbegin(), points.cend()); + + pointVec_t H; + point_t p1 = *minmaxx.first; + point_t p2 = *minmaxx.second; + //Divide the set of points into two subsets, which will be processed recursively + divide_and_conquer(pstl::execution::par_unseq, points.cbegin(), points.cend(), hull, p1, p2, p1); +} + +// Check if a polygon is convex +bool is_convex(const pointVec_t & points) { + return std::all_of(pstl::execution::par_unseq, + pstl::counting_iterator(size_t(0)), + pstl::counting_iterator(points.size()), + [&points](size_t i) { + point_t p0(points[i]); + point_t p1(points[(i + 1) % points.size()]); + point_t p2(points[(i + 2) % points.size()]); + return (cross_product(p0, p1, p2) < 0); + }); +} + +int main(int argc, char* argv[]) { + + const size_t numberOfPoints = 5000000; + const std::string output_file("ConvexHull.csv"); + + pointVec_t points(numberOfPoints); + pointVec_t hull; + + //initialize set of points + std::generate(pstl::execution::par, points.begin(), points.end(), util::random_point); + std::cout << "Points were initialized. Number of the points " << points.size() << std::endl; + + using ms = std::chrono::milliseconds; + + auto tm_start = std::chrono::high_resolution_clock::now(); + //execution of the quickhull algorithm + quickhull(points, hull); + auto tm_end = std::chrono::high_resolution_clock::now(); + + std::cout << "Computational time " << std::chrono::duration_cast (tm_end - tm_start).count() << "ms" + << " Points in the hull: " << hull.size() << " The convex hull is " << (is_convex(hull) ? "correct" : "incorrect") << std::endl; + + //writing the results + std::ofstream fout(output_file); + if (fout.is_open()) { + for (auto p : hull) + fout << p << std::endl; + std::cout << "The convex hull has been stored to a file " << output_file << std::endl; + } + else { + std::cout << "Cannot open a file " << output_file << " to store result" << std::endl; + } + + return 0; +} diff --git a/examples/convex_hull/msvs/convex_hull.sln b/examples/convex_hull/msvs/convex_hull.sln new file mode 100644 index 00000000000..a421dcceed0 --- /dev/null +++ b/examples/convex_hull/msvs/convex_hull.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.23107.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "convex_hull", "convex_hull.vcxproj", "{33020498-816E-4A1D-A073-B0E4834AC979}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x64.ActiveCfg = Debug|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x64.Build.0 = Debug|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x86.ActiveCfg = Debug|Win32 + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x86.Build.0 = Debug|Win32 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x64.ActiveCfg = Release|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x64.Build.0 = Release|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x86.ActiveCfg = Release|Win32 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/examples/convex_hull/msvs/convex_hull.vcxproj b/examples/convex_hull/msvs/convex_hull.vcxproj new file mode 100644 index 00000000000..d073fde9521 --- /dev/null +++ b/examples/convex_hull/msvs/convex_hull.vcxproj @@ -0,0 +1,162 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {33020498-816E-4A1D-A073-B0E4834AC979} + Win32Proj + convex_hull + 8.1 + + + + Application + true + Intel C++ Compiler 17.0 + Unicode + true + + + Application + false + Intel C++ Compiler 17.0 + true + Unicode + true + + + Application + true + Intel C++ Compiler 17.0 + Unicode + true + + + Application + false + Intel C++ Compiler 17.0 + true + Unicode + true + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + + + Console + true + + + + + + + Level3 + Disabled + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + Disable + + + Console + true + true + true + + + + + + + + + + + + \ No newline at end of file diff --git a/examples/convex_hull/readme.html b/examples/convex_hull/readme.html new file mode 100644 index 00000000000..ea2bc5aa9f5 --- /dev/null +++ b/examples/convex_hull/readme.html @@ -0,0 +1,399 @@ + + + + + + Parallel STL. Convex hull sample + + + + + +

+ This example demonstrates computing the convex_hull of a set of points on the plane based on the quickhull algorithm. + See https://en.wikipedia.org/wiki/Quickhull for more information. +

+ The example generates a set of random 2D points by std::generate. Then performs the quickhull algorithm on it. + Left and right most points are found by std::minmax_element. + On each step points on the right side of oriented line are copied by std::copy_if and the farthest point is found by std::max_element. + Correctness of the convex hull is checked by std::any_of algorithm with counting iterators. + The output of the example application is a CSV files with points of the convex hull. +

+ +
+
System Requirements
+ +
+

+ For the most up-to-date system requirements, see the release notes. +

+
+
+ +
+
Files
+ +
+
+
convex_hull.cpp +
Implementation of the quickhull algorithm based on Parallel STL. +
utils.h +
Utility code: template point, random points generator. +
Makefile +
Makefile for building the example. +
+
+
+ +
+
Directories
+ +
+
+
msvs +
Contains a Microsoft* Visual Studio* IDE workspace for building and running the example (Windows* OS systems only). +
+
+
+ +
+
Build instructions
+ +
+

To use Parallel STL, set up the environment by calling the pstlvars script (if you use a command line) or set the %PSTLROOT% environment variable pointing to the <pstl_installdir> folder (in Microsoft* Visual Studio* IDE on Windows* OS).

+

Use the Makefile to build the example on the command line.

+

Use the msvs/convex_hull.sln project file to build the example in the Microsoft* Visual Studio* IDE (Windows* systems only).

+
+
+ + +
+
Usage
+ +
+
+
convex_hull or convex_hull.exe +
Outputs the result convex hull ConvexHull.csv +
+
+
+ +
+
+ +
+
Legal Information
+ +
+

+ Intel and the Intel logo are trademarks of Intel Corporation in the U.S. and/or other countries. +
* Other names and brands may be claimed as the property of others. +
© 2017, Intel Corporation +

+
+
+ + + diff --git a/examples/convex_hull/utils.h b/examples/convex_hull/utils.h new file mode 100644 index 00000000000..6fa0a9c8009 --- /dev/null +++ b/examples/convex_hull/utils.h @@ -0,0 +1,78 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#include +#include +#include + +namespace util { + + template + struct point { + T x; + T y; + point() {} + point(T xx, T yy) : x(xx), y(yy) {} + point(const point& p) : x(p.x), y(p.y) {} + + bool operator ==(const point &p2) const { + return (this->x == p2.x && this->y == p2.y); + } + bool operator !=(const point &p2) const { + return !(*(this) == p2); + } + bool operator < (const point & p2) const { + return (this->x == p2.x ? this->y < p2.y : this->x < p2.x); + } + }; + + template + T cross_product(const point& start, const point& end1, const point& end2) { + return ((end1.x - start.x)*(end2.y - start.y) - (end2.x - start.x)*(end1.y - start.y)); + } + + template + std::ostream& operator <<(std::ostream& ostr, point _p) { + return ostr << _p.x << ',' << _p.y; + } + + template + std::istream& operator >>(std::istream& istr, point _p) { + return istr >> _p.x >> _p.y; + } + + // The variable is declared out of the scope of random_point() to avoid code generation issues with some compilers + thread_local static std::default_random_engine rd; + + template + point random_point() { + const int rand_max = 10000; + std::uniform_int_distribution dist(-rand_max, rand_max); + T x = dist(rd); + T y = dist(rd); + const double r = x*x + y*y; + if (r > rand_max) { + x /= r; + y /= r; + } + return point(x, y); + } + +} diff --git a/examples/convex_hull/xcode/convex_hull.xcodeproj/project.pbxproj b/examples/convex_hull/xcode/convex_hull.xcodeproj/project.pbxproj new file mode 100644 index 00000000000..864918b4f39 --- /dev/null +++ b/examples/convex_hull/xcode/convex_hull.xcodeproj/project.pbxproj @@ -0,0 +1,294 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 8491CF201F2F817000DA6075 /* convex_hull.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8491CF1E1F2F817000DA6075 /* convex_hull.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8491CEF81F2A221C00DA6075 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 8491CEFA1F2A221C00DA6075 /* convex_hull */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = convex_hull; sourceTree = BUILT_PRODUCTS_DIR; }; + 8491CF1E1F2F817000DA6075 /* convex_hull.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convex_hull.cpp; path = ../convex_hull.cpp; sourceTree = ""; }; + 8491CF1F1F2F817000DA6075 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utils.h; path = ../utils.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8491CEF71F2A221C00DA6075 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 8491CEF11F2A221C00DA6075 = { + isa = PBXGroup; + children = ( + 8491CF1E1F2F817000DA6075 /* convex_hull.cpp */, + 8491CF1F1F2F817000DA6075 /* utils.h */, + 8491CEFB1F2A221C00DA6075 /* Products */, + ); + sourceTree = ""; + }; + 8491CEFB1F2A221C00DA6075 /* Products */ = { + isa = PBXGroup; + children = ( + 8491CEFA1F2A221C00DA6075 /* convex_hull */, + ); + name = Products; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8491CEF91F2A221C00DA6075 /* convex_hull */ = { + isa = PBXNativeTarget; + buildConfigurationList = 8491CF011F2A221C00DA6075 /* Build configuration list for PBXNativeTarget "convex_hull" */; + buildPhases = ( + 8491CEF61F2A221C00DA6075 /* Sources */, + 8491CEF71F2A221C00DA6075 /* Frameworks */, + 8491CEF81F2A221C00DA6075 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = convex_hull; + productName = convex_hull; + productReference = 8491CEFA1F2A221C00DA6075 /* convex_hull */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 8491CEF21F2A221C00DA6075 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0800; + ORGANIZATIONNAME = tbb; + TargetAttributes = { + 8491CEF91F2A221C00DA6075 = { + CreatedOnToolsVersion = 8.0; + ProvisioningStyle = Automatic; + }; + }; + }; + buildConfigurationList = 8491CEF51F2A221C00DA6075 /* Build configuration list for PBXProject "convex_hull" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 8491CEF11F2A221C00DA6075; + productRefGroup = 8491CEFB1F2A221C00DA6075 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8491CEF91F2A221C00DA6075 /* convex_hull */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 8491CEF61F2A221C00DA6075 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 8491CF201F2F817000DA6075 /* convex_hull.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 8491CEFF1F2A221C00DA6075 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVES = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 10.10; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + }; + name = Debug; + }; + 8491CF001F2A221C00DA6075 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVES = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 10.10; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = macosx; + }; + name = Release; + }; + 8491CF021F2A221C00DA6075 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD)"; + GCC_VERSION = com.intel.compilers.icc.latest; + HEADER_SEARCH_PATHS = ( + "$(TBBROOT)/include", + /opt/intel/tbb/include, + "$(PSTLROOT)/include", + /opt/intel/pstl/include, + ); + ICC_CXX_LANG_DIALECT = "c++11"; + ICC_OTHER_CPLUSPLUSFLAGS = "$(ICC_OTHER_CFLAGS) $(OTHER_CPLUSPLUSFLAGS) -qopenmp-simd -xHOST"; + ICC_TBB = YES; + LD_RUNPATH_SEARCH_PATHS = "$(TBBROOT)/lib /opt/intel/tbb/lib"; + LIBRARY_SEARCH_PATHS = ( + "$(TBBROOT)/lib", + /opt/intel/tbb/lib, + ); + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-DTBB_USE_DEBUG=1", + "-D__PSTL_USE_TBB", + ); + OTHER_LDFLAGS = "-ltbb_debug"; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 8491CF031F2A221C00DA6075 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD)"; + GCC_GENERATE_DEBUGGING_SYMBOLS = NO; + GCC_OPTIMIZATION_LEVEL = 2; + GCC_VERSION = com.intel.compilers.icc.latest; + HEADER_SEARCH_PATHS = ( + "$(TBBROOT)/include", + /opt/intel/tbb/include, + "$(PSTLROOT)/include", + /opt/intel/pstl/include, + ); + ICC_CXX_LANG_DIALECT = "c++11"; + ICC_OTHER_CPLUSPLUSFLAGS = "$(ICC_OTHER_CFLAGS) $(OTHER_CPLUSPLUSFLAGS) -qopenmp-simd -xHOST"; + ICC_TBB = YES; + LD_RUNPATH_SEARCH_PATHS = "$(TBBROOT)/lib /opt/intel/tbb/lib"; + LIBRARY_SEARCH_PATHS = ( + "$(TBBROOT)/lib", + /opt/intel/tbb/lib, + ); + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-DNDEBUG", + "-D__PSTL_USE_TBB", + ); + OTHER_LDFLAGS = "-ltbb"; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 8491CEF51F2A221C00DA6075 /* Build configuration list for PBXProject "convex_hull" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8491CEFF1F2A221C00DA6075 /* Debug */, + 8491CF001F2A221C00DA6075 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 8491CF011F2A221C00DA6075 /* Build configuration list for PBXNativeTarget "convex_hull" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8491CF021F2A221C00DA6075 /* Debug */, + 8491CF031F2A221C00DA6075 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 8491CEF21F2A221C00DA6075 /* Project object */; +} diff --git a/examples/dot_product/Makefile b/examples/dot_product/Makefile new file mode 100644 index 00000000000..5b7dd467241 --- /dev/null +++ b/examples/dot_product/Makefile @@ -0,0 +1,59 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# GNU Makefile that builds and runs example. +run_cmd= +PROG=dot_product.exe +ARGS= + +CXXFLAGS += -D__PSTL_USE_TBB -std=c++11 +# The C++ compiler +ifneq (,$(shell which icc 2>/dev/null)) +CXX=icc + +ifneq (, $(filter $(target), mic)) +CXXFLAGS += -mmic +else +CXXFLAGS += -xHOST +endif +CXXFLAGS += -qopenmp-simd +endif # which icc + +ifeq ($(shell uname), Linux) +LIBS+= -lrt +else ifeq ($(shell uname), Darwin) +override CXXFLAGS += -Wl,-rpath,$(TBBROOT)/lib +endif + +all: release test + +release: *.cpp + $(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(PROG) $^ -ltbb $(LIBS) + +debug: *.cpp + $(CXX) -O0 -g -DTBB_USE_DEBUG=1 $(CXXFLAGS) -o $(PROG) $^ -ltbb_debug $(LIBS) + +clean: + $(RM) $(PROG) *.o *.d + +test: + $(run_cmd) ./$(PROG) $(ARGS) + +perf_build: release + +perf_run: test diff --git a/examples/dot_product/Makefile.windows b/examples/dot_product/Makefile.windows new file mode 100644 index 00000000000..9fe78efa671 --- /dev/null +++ b/examples/dot_product/Makefile.windows @@ -0,0 +1,48 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# Common Makefile that builds and runs example. + +# Just specify your program basename +PROG=dot_product +ARGS= + +# Trying to find if icl.exe is set +CXX1 = $(TBB_CXX)- +CXX2 = $(CXX1:icl.exe-=icl.exe) +CXX = $(CXX2:-=cl.exe) + +# The C++ compiler options +MYCXXFLAGS = /TP /QxHOST /Qopenmp-simd /EHsc /W3 /nologo /D _CONSOLE /D _MBCS /D WIN32 /D _CRT_SECURE_NO_DEPRECATE $(CXXFLAGS) +MYLDFLAGS =/INCREMENTAL:NO /NOLOGO /DEBUG /FIXED:NO $(LDFLAGS) + +all: release test +release: + $(CXX) *.cpp /MD /O2 /D NDEBUG $(MYCXXFLAGS) /link tbb.lib $(LIBS) $(MYLDFLAGS) /OUT:$(PROG).exe +debug: + $(CXX) *.cpp /MDd /Od /Zi /D TBB_USE_DEBUG /D _DEBUG $(MYCXXFLAGS) /link tbb_debug.lib $(LIBS) $(MYLDFLAGS) /OUT:$(PROG).exe +clean: + @cmd.exe /C del $(PROG).exe *.obj *.?db *.manifest +test: + $(PROG) $(ARGS) +compiler_check: + @$(CXX) >nul 2>&1 || echo "$(CXX) command not found. Check if CXX=$(CXX) is set properly" + +perf_build: release + +perf_run: test diff --git a/examples/dot_product/dot_product.cpp b/examples/dot_product/dot_product.cpp new file mode 100644 index 00000000000..6ade3e3273a --- /dev/null +++ b/examples/dot_product/dot_product.cpp @@ -0,0 +1,56 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +/* + This file contains the implementation of dot product based on std::transform_reduce PSTL algorithm +*/ + +#include +#include +#include +#include +#include +#include + +double random_number_generator() { + // usage of thread local random engines allows running the generator in concurrent mode + thread_local static std::default_random_engine rd; + std::uniform_real_distribution dist(0, 1); + return dist(rd); +} + +int main(int argc, char* argv[]) { + + const size_t size = 10000000; + + std::vector v1(size), v2(size); + + //initialize vectors with random numbers + std::generate(pstl::execution::par, v1.begin(), v1.end(), random_number_generator); + std::generate(pstl::execution::par, v2.begin(), v2.end(), random_number_generator); + + //the dot product calculation + double res = std::transform_reduce(pstl::execution::par_unseq, v1.cbegin(), v1.cend(), v2.cbegin(), .0, + std::plus(), std::multiplies()); + + std::cout << "The dot product is: " << res << std::endl; + + return 0; +} diff --git a/examples/dot_product/msvs/dot_product.sln b/examples/dot_product/msvs/dot_product.sln new file mode 100644 index 00000000000..6d55a51f5a2 --- /dev/null +++ b/examples/dot_product/msvs/dot_product.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.23107.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dot_product", "dot_product.vcxproj", "{33020498-816E-4A1D-A073-B0E4834AC979}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x64.ActiveCfg = Debug|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x64.Build.0 = Debug|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x86.ActiveCfg = Debug|Win32 + {33020498-816E-4A1D-A073-B0E4834AC979}.Debug|x86.Build.0 = Debug|Win32 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x64.ActiveCfg = Release|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x64.Build.0 = Release|x64 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x86.ActiveCfg = Release|Win32 + {33020498-816E-4A1D-A073-B0E4834AC979}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/examples/dot_product/msvs/dot_product.vcxproj b/examples/dot_product/msvs/dot_product.vcxproj new file mode 100644 index 00000000000..88c897950b2 --- /dev/null +++ b/examples/dot_product/msvs/dot_product.vcxproj @@ -0,0 +1,159 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {33020498-816E-4A1D-A073-B0E4834AC979} + Win32Proj + dot_product + 8.1 + + + + Application + true + Intel C++ Compiler 17.0 + Unicode + true + + + Application + false + Intel C++ Compiler 17.0 + true + Unicode + true + + + Application + true + Intel C++ Compiler 17.0 + Unicode + true + + + Application + false + Intel C++ Compiler 17.0 + true + Unicode + true + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + + + Console + true + + + + + + + Level3 + Disabled + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(PSTLROOT)\include + Disable + + + Console + true + true + true + + + + + + + + + \ No newline at end of file diff --git a/examples/dot_product/readme.html b/examples/dot_product/readme.html new file mode 100644 index 00000000000..70856fc7c16 --- /dev/null +++ b/examples/dot_product/readme.html @@ -0,0 +1,392 @@ + + + + + + Parallel STL. Dot product sample + + + + + +

+ This example demonstrates computing the dot product of two vectors. + See Dot_product for more information. +

+ The example computes a dot product (std::transform_reduce) of a couple of vectors filled randomly by floating numbers (std::generate). +

+ +
+
System Requirements
+ +
+

+ For the most up-to-date system requirements, see the release notes. +

+
+
+ +
+
Files
+ +
+
+
dot_product.cpp +
Implementation of the dot product based on Parallel STL. +
Makefile +
Makefile for building the example. +
+
+
+ +
+
Directories
+ +
+
+
msvs +
Contains a Microsoft* Visual Studio* IDE workspace for building and running the example (Windows* OS systems only). +
+
+
+ +
+
Build instructions
+ +
+

To use Parallel STL, set up the environment by calling the pstlvars script (if you use a command line) or set the %PSTLROOT% environment variable pointing to the <pstl_installdir> folder (in Microsoft* Visual Studio* IDE on Windows* OS).

+

Use the Makefile to build the example on the command line.

+

Use the msvs/dot_product.sln project file to build the example in the Microsoft* Visual Studio* IDE (Windows* systems only).

+
+
+ + +
+
Usage
+ +
+
+
dot_product or dot_product.exe +
+
+
+ +
+
+ +
+
Legal Information
+ +
+

+ Intel and the Intel logo are trademarks of Intel Corporation in the U.S. and/or other countries. +
* Other names and brands may be claimed as the property of others. +
© 2017, Intel Corporation +

+
+
+ + + diff --git a/examples/dot_product/xcode/dot_product.xcodeproj/project.pbxproj b/examples/dot_product/xcode/dot_product.xcodeproj/project.pbxproj new file mode 100644 index 00000000000..9436108e330 --- /dev/null +++ b/examples/dot_product/xcode/dot_product.xcodeproj/project.pbxproj @@ -0,0 +1,293 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 8491CF1D1F2F810A00DA6075 /* dot_product.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8491CF1C1F2F810A00DA6075 /* dot_product.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8491CF0E1F2F521E00DA6075 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 8491CF101F2F521E00DA6075 /* dot_product */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = dot_product; sourceTree = BUILT_PRODUCTS_DIR; }; + 8491CF1C1F2F810A00DA6075 /* dot_product.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dot_product.cpp; path = ../dot_product.cpp; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8491CF0D1F2F521E00DA6075 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 8491CF071F2F521E00DA6075 = { + isa = PBXGroup; + children = ( + 8491CF1C1F2F810A00DA6075 /* dot_product.cpp */, + 8491CF111F2F521E00DA6075 /* Products */, + ); + sourceTree = ""; + }; + 8491CF111F2F521E00DA6075 /* Products */ = { + isa = PBXGroup; + children = ( + 8491CF101F2F521E00DA6075 /* dot_product */, + ); + name = Products; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8491CF0F1F2F521E00DA6075 /* dot_product */ = { + isa = PBXNativeTarget; + buildConfigurationList = 8491CF171F2F521E00DA6075 /* Build configuration list for PBXNativeTarget "dot_product" */; + buildPhases = ( + 8491CF0C1F2F521E00DA6075 /* Sources */, + 8491CF0D1F2F521E00DA6075 /* Frameworks */, + 8491CF0E1F2F521E00DA6075 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = dot_product; + productName = dot_product; + productReference = 8491CF101F2F521E00DA6075 /* dot_product */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 8491CF081F2F521E00DA6075 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0800; + ORGANIZATIONNAME = tbb; + TargetAttributes = { + 8491CF0F1F2F521E00DA6075 = { + CreatedOnToolsVersion = 8.0; + ProvisioningStyle = Automatic; + }; + }; + }; + buildConfigurationList = 8491CF0B1F2F521E00DA6075 /* Build configuration list for PBXProject "dot_product" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 8491CF071F2F521E00DA6075; + productRefGroup = 8491CF111F2F521E00DA6075 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8491CF0F1F2F521E00DA6075 /* dot_product */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 8491CF0C1F2F521E00DA6075 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 8491CF1D1F2F810A00DA6075 /* dot_product.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 8491CF151F2F521E00DA6075 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVES = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_VERSION = com.intel.compilers.icc.latest; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(PSTLROOT)/include", + /opt/intel/pstl/include, + "$(TBBROOT)/include", + /opt/intel/tbb/include, + ); + ICC_CXX_LANG_DIALECT = "c++11"; + ICC_OTHER_CPLUSPLUSFLAGS = "$(ICC_OTHER_CFLAGS) $(OTHER_CPLUSPLUSFLAGS) -xHOST -qopenmp-simd"; + ICC_TBB = YES; + LD_RUNPATH_SEARCH_PATHS = "$(TBBROOT)/lib /opt/intel/tbb/lib"; + LIBRARY_SEARCH_PATHS = ( + "$(TBBROOT)/lib", + /opt/intel/tbb/lib, + ); + MACOSX_DEPLOYMENT_TARGET = 10.10; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-DTBB_USE_DEBUG=1", + "-D__PSTL_USE_TBB", + ); + OTHER_LDFLAGS = "-ltbb_debug"; + SDKROOT = macosx; + }; + name = Debug; + }; + 8491CF161F2F521E00DA6075 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVES = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_GENERATE_DEBUGGING_SYMBOLS = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 2; + GCC_VERSION = com.intel.compilers.icc.latest; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(PSTLROOT)/include", + /opt/intel/pstl/include, + "$(TBBROOT)/include", + /opt/intel/tbb/include, + ); + ICC_CXX_LANG_DIALECT = "c++11"; + ICC_DEBUG = NO; + ICC_OTHER_CPLUSPLUSFLAGS = "$(ICC_OTHER_CFLAGS) $(OTHER_CPLUSPLUSFLAGS) -xHOST -qopenmp-simd"; + ICC_TBB = YES; + LD_RUNPATH_SEARCH_PATHS = "$(TBBROOT)/lib /opt/intel/tbb/lib"; + LIBRARY_SEARCH_PATHS = ( + "$(TBBROOT)/lib", + /opt/intel/tbb/lib, + ); + MACOSX_DEPLOYMENT_TARGET = 10.10; + MTL_ENABLE_DEBUG_INFO = NO; + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-DNDEBUG", + "-D__PSTL_USE_TBB", + ); + OTHER_LDFLAGS = "-ltbb"; + SDKROOT = macosx; + }; + name = Release; + }; + 8491CF181F2F521E00DA6075 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + MACOSX_DEPLOYMENT_TARGET = 10.10; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 8491CF191F2F521E00DA6075 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + MACOSX_DEPLOYMENT_TARGET = 10.10; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 8491CF0B1F2F521E00DA6075 /* Build configuration list for PBXProject "dot_product" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8491CF151F2F521E00DA6075 /* Debug */, + 8491CF161F2F521E00DA6075 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 8491CF171F2F521E00DA6075 /* Build configuration list for PBXNativeTarget "dot_product" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8491CF181F2F521E00DA6075 /* Debug */, + 8491CF191F2F521E00DA6075 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 8491CF081F2F521E00DA6075 /* Project object */; +} diff --git a/examples/gamma_correction/Makefile b/examples/gamma_correction/Makefile new file mode 100644 index 00000000000..d315e485c63 --- /dev/null +++ b/examples/gamma_correction/Makefile @@ -0,0 +1,59 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# GNU Makefile that builds and runs example. +run_cmd= +PROG=gamma_correction.exe +ARGS= + +CXXFLAGS += -D__PSTL_USE_TBB -std=c++11 +# The C++ compiler +ifneq (,$(shell which icc 2>/dev/null)) +CXX=icc + +ifneq (, $(filter $(target), mic)) +CXXFLAGS += -mmic +else +CXXFLAGS += -xHOST +endif +CXXFLAGS += -qopenmp-simd +endif # which icc + +ifeq ($(shell uname), Linux) +LIBS+= -lrt +else ifeq ($(shell uname), Darwin) +override CXXFLAGS += -Wl,-rpath,$(TBBROOT)/lib +endif + +all: release test + +release: *.cpp + $(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(PROG) $^ -ltbb $(LIBS) + +debug: *.cpp + $(CXX) -O0 -g -DTBB_USE_DEBUG=1 $(CXXFLAGS) -o $(PROG) $^ -ltbb_debug $(LIBS) + +clean: + $(RM) $(PROG) *.o *.d + +test: + $(run_cmd) ./$(PROG) $(ARGS) + +perf_build: release + +perf_run: test diff --git a/examples/gamma_correction/Makefile.windows b/examples/gamma_correction/Makefile.windows new file mode 100644 index 00000000000..58a85428400 --- /dev/null +++ b/examples/gamma_correction/Makefile.windows @@ -0,0 +1,48 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# Common Makefile that builds and runs example. + +# Just specify your program basename +PROG=gamma_correction +ARGS= + +# Trying to find if icl.exe is set +CXX1 = $(TBB_CXX)- +CXX2 = $(CXX1:icl.exe-=icl.exe) +CXX = $(CXX2:-=cl.exe) + +# The C++ compiler options +MYCXXFLAGS = /TP /QxHOST /Qopenmp-simd /EHsc /W3 /nologo /D _CONSOLE /D _MBCS /D WIN32 /D _CRT_SECURE_NO_DEPRECATE $(CXXFLAGS) +MYLDFLAGS =/INCREMENTAL:NO /NOLOGO /DEBUG /FIXED:NO $(LDFLAGS) + +all: release test +release: + $(CXX) *.cpp /MD /O2 /D NDEBUG $(MYCXXFLAGS) /link tbb.lib $(LIBS) $(MYLDFLAGS) /OUT:$(PROG).exe +debug: + $(CXX) *.cpp /MDd /Od /Zi /D TBB_USE_DEBUG /D _DEBUG $(MYCXXFLAGS) /link tbb_debug.lib $(LIBS) $(MYLDFLAGS) /OUT:$(PROG).exe +clean: + @cmd.exe /C del $(PROG).exe *.obj *.?db *.manifest +test: + $(PROG) $(ARGS) +compiler_check: + @$(CXX) >nul 2>&1 || echo "$(CXX) command not found. Check if CXX=$(CXX) is set properly" + +perf_build: release + +perf_run: test diff --git a/examples/gamma_correction/gamma_correction.cpp b/examples/gamma_correction/gamma_correction.cpp new file mode 100644 index 00000000000..cb8bbad4b8e --- /dev/null +++ b/examples/gamma_correction/gamma_correction.cpp @@ -0,0 +1,98 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#include +#include +#include + +#include "pstl/algorithm" +#include "pstl/execution" +#include "utils.h" + +//! fractal class +class fractal { +public: + //! Constructor + fractal(int x, int y): my_size{x, y} {} + //! One pixel calculation routine + double calcOnePixel(int x, int y); + +private: + //! Size of the fractal area + const int my_size[2]; + //! Fractal properties + double cx = -0.7436; + const double cy = 0.1319; + const double magn = 2000000.0; + const int max_iter = 1000; +}; + +double fractal::calcOnePixel(int x0, int y0) { + double fx0 = double(x0) - double(my_size[0]) / 2; + double fy0 = double(y0) - double(my_size[1]) / 2; + fx0 = fx0 / magn + cx; + fy0 = fy0 / magn + cy; + + double res = 0, x = 0, y = 0; + for(int iter = 0; x*x + y*y <= 4 && iter < max_iter; ++iter) { + const double val = x*x - y*y + fx0; + y = 2*x*y + fy0, x = val; + res += exp(-sqrt(x*x+y*y)); + } + + return res; +} + +template +void applyGamma(Rows& image, double g) { + typedef decltype(image[0]) Row; + typedef decltype(image[0][0]) Pixel; + const int w = image[1] - image[0]; + + //execution STL algorithms with execution policies - std::execution::par and std::execution::unseq + std::for_each(std::execution::par, image.begin(), image.end(), [g, w](Row& r) { + std::transform(std::execution::unseq, r, r+w, r, [g](Pixel& p) { + double v = 0.3*p.bgra[2] + 0.59*p.bgra[1] + 0.11*p.bgra[0]; //RGB Luminance value + assert(v > 0); + double res = pow(v, g); + if(res > 255) + res = 255; + return image::pixel(res, res, res); + }); + }); +} + +int main(int argc, char* argv[]) { + + //create a fractal image + image img(800, 800); + fractal fr(img.width(), img.height()); + img.fill([&fr](int x, int y) { return fr.calcOnePixel(x, y); }); + img.write("image_1.bmp"); + + //apply gamma + applyGamma(img.rows(), 1.1); + + //write result to disk + img.write("image_1_gamma.bmp"); + std::cout<<"done"< + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {33020498-816E-4A1D-A073-B0E4834AC979} + Win32Proj + gamma_correction + 8.1 + + + + Application + true + Intel C++ Compiler 17.0 + Unicode + true + + + Application + false + Intel C++ Compiler 17.0 + true + Unicode + true + + + Application + true + Intel C++ Compiler 17.0 + Unicode + true + + + Application + false + Intel C++ Compiler 17.0 + true + Unicode + true + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\include;$(PSTLROOT)\include + + + Console + true + + + + + + + Level3 + Disabled + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\include;$(PSTLROOT)\include + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\include;$(PSTLROOT)\include + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\include;$(PSTLROOT)\include + Disable + + + Console + true + true + true + + + + + + + + + + + + + \ No newline at end of file diff --git a/examples/gamma_correction/readme.html b/examples/gamma_correction/readme.html new file mode 100644 index 00000000000..b56158eef5e --- /dev/null +++ b/examples/gamma_correction/readme.html @@ -0,0 +1,403 @@ + + + + + + Parallel STL. Gamma_correction sample + + + + + +

+ This example demonstrates gamma correction — a nonlinear operation used to encode and decode the luminance of each image pixel. + See https://en.wikipedia.org/wiki/Gamma_correction for more information. +

+ The example creates a fractal image in memory and performs gamma correction on it. + The output of the example application is a BMP image with corrected luminance. +

+ This example uses C++11 lambda expressions. Specifying a compiler option such as -std=c++11 or similar might be necessary in order to build the example. + For more information, please refer to the documentation for the compiler you use. +

+ +
+
System Requirements
+ +
+

+ For the most up-to-date system requirements, see the release notes. +

+
+
+ +
+
Files
+ +
+
+
gamma_correction.cpp +
Implementation of the gamma correction algorithm based on Parallel STL. +
utils.cpp +
Utility code (class image) to write an image to disk as a BMP file. +
utils.h +
Declaration of the image class. +
Makefile +
Makefile for building the example. +
+
+
+ +
+
Directories
+ +
+
+
msvs +
Contains a Microsoft* Visual Studio* IDE workspace for building and running the example (Windows* OS systems only). +
xcode +
Contains a Xcode* IDE workspace for building and running the example (macOS* systems only). +
+
+
+ +
+
Build instructions
+ +
+

To use Parallel STL, set up the environment by calling the pstlvars script (if you use a command line) or set the %PSTLROOT% environment variable pointing to the <pstl_installdir> folder (in Microsoft* Visual Studio* IDE on Windows* OS).

+

Use the Makefile to build the example on the command line.

+

Use the msvs/gamma_correction.sln project file to build the example in the Microsoft* Visual Studio* IDE (Windows* systems only).

+
+
+ + +
+
Usage
+ +
+
+
gamma_correction or gamma_correction.exe +
Outputs the original fractal image image_1.bmp and the fractal image with corrected luminance image_1_gamma.bmp +
+
+
+ +
+
+ +
+
Legal Information
+ +
+

+ Intel and the Intel logo are trademarks of Intel Corporation in the U.S. and/or other countries. +
* Other names and brands may be claimed as the property of others. +
© 2017, Intel Corporation +

+
+
+ + + diff --git a/examples/gamma_correction/utils.cpp b/examples/gamma_correction/utils.cpp new file mode 100644 index 00000000000..7786bc1c2f7 --- /dev/null +++ b/examples/gamma_correction/utils.cpp @@ -0,0 +1,107 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#include +#include +#include +#include +#include "utils.h" + +void image::reset(int w, int h) { + + if(w <= 0 || h <= 0) { + std::cout << "Warning: Invalid image size.\n"; + return; + } + + my_width = w, my_height = h; + + //reset raw data + my_data.resize(my_width*my_height); + my_rows.resize(my_height); + + //reset rows + for(int i = 0; i < my_rows.size(); ++i) + my_rows[i] = &my_data[0]+i*my_width; + + my_padSize = (4-(w*sizeof(my_data[0]))%4)%4; + int sizeData = w*h*sizeof(my_data[0]) + h*my_padSize; + int sizeAll = sizeData + sizeof(file) + sizeof(info); + + //BITMAPFILEHEADER + file.sizeRest = 14; + file.type = 0x4d42; //same as 'BM' in ASCII + file.size = sizeAll; + file.reserved = 0; + file.offBits = 54; + + //BITMAPINFOHEADER + info.size = 40; + info.width = w; + info.height = h; + info.planes = 1; + info.bitCount = 32; + info.compression = 0; + info.sizeImage = sizeData; + info.yPelsPerMeter = 0; + info.xPelsPerMeter = 0; + info.clrUsed = 0; + info.clrImportant = 0; +} + +image::image(int w, int h) { + reset(w, h); +} + +void image::fill(std::uint8_t r, std::uint8_t g, std::uint8_t b, int x, int y) { + if(my_data.empty()) + return; + + assert(my_data.size() == my_width*my_height); + assert(my_rows.size() == my_height); + + if(x < 0 && y < 0) //fill whole image + std::fill(my_data.begin(), my_data.end(), pixel(b, g, r)); + else { + auto& bgra = my_data[my_width*x + y].bgra; + bgra[3] = 0, bgra[2] = r, bgra[1] = g, bgra[0] = b; + } +} + +void image::write(const char* fname) const { + + if(my_data.empty()) { + std::cout << "Warning: An image is empty.\n"; + return; + } + + assert(my_width > 0 && my_height > 0); + + std::ofstream stream(fname); + + assert(file.sizeRest == sizeof(file)-sizeof(file.sizeRest)); + stream.write((char*)&file.type, file.sizeRest); + + assert(info.size == sizeof(info)); + stream.write((char*)&info, info.size); + + assert(info.sizeImage == my_data.size() * sizeof(my_data[0])); + stream.write((char*)my_data[0].bgra, my_data.size()*sizeof(my_data[0])); +} diff --git a/examples/gamma_correction/utils.h b/examples/gamma_correction/utils.h new file mode 100644 index 00000000000..92dc948b576 --- /dev/null +++ b/examples/gamma_correction/utils.h @@ -0,0 +1,105 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#include +#include + +class image { +public: + union pixel { + std::uint8_t bgra[4]; + std::uint32_t value; + pixel() {} + template pixel(T b, T g, T r) { + bgra[0] = (std::uint8_t)b, bgra[1] = (std::uint8_t)g, bgra[2] = (std::uint8_t)r, bgra[3] = 0; + } + }; +public: + image(int w = 1920, int h = 1080); + + int width() const { return my_width; } + int height() const { return my_height; } + + void write(const char* fname) const; + void fill(std::uint8_t r, std::uint8_t g, std::uint8_t b, int x = -1, int y = -1); + + template + void fill(F f) { + + if(my_data.empty()) + reset(my_width, my_height); + + int i = -1; + int w = this->my_width; + std::for_each(my_data.begin(), my_data.end(), [&i, w, f](image::pixel& p) { + ++i; + int x = i / w, y = i % w; + auto val = f(x, y); + if(val > 255) + val = 255; + p = image::pixel(val, val, val); + }); + } + + std::vector& rows() { return my_rows; } + +private: + void reset(int w, int h); + +private: + //don't allow copying + image(const image&); + void operator=(const image&); + +private: + int my_width; + int my_height; + int my_padSize; + + std::vector my_data; //raw raster data + std::vector my_rows; + + //data structures 'file' and 'info' are using to store an image as BMP file + //for more details see https://en.wikipedia.org/wiki/BMP_file_format + using BITMAPFILEHEADER = struct { + std::uint16_t sizeRest; // field is not from specification, + // was added for alignemt. store size of rest of the fields + std::uint16_t type; + std::uint32_t size; + std::uint32_t reserved; + std::uint32_t offBits; + }; + BITMAPFILEHEADER file; + + using BITMAPINFOHEADER = struct { + std::uint32_t size; + std::int32_t width; + std::int32_t height; + std::uint16_t planes; + std::uint16_t bitCount; + std::uint32_t compression; + std::uint32_t sizeImage; + std::int32_t xPelsPerMeter; + std::int32_t yPelsPerMeter; + std::uint32_t clrUsed; + std::uint32_t clrImportant; + }; + BITMAPINFOHEADER info; +}; diff --git a/examples/gamma_correction/xcode/gamma_correction.xcodeproj/project.pbxproj b/examples/gamma_correction/xcode/gamma_correction.xcodeproj/project.pbxproj new file mode 100644 index 00000000000..c3af899f3f7 --- /dev/null +++ b/examples/gamma_correction/xcode/gamma_correction.xcodeproj/project.pbxproj @@ -0,0 +1,304 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 84D4756B1C3CB6DF0088D54A /* gamma_correction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 84D475681C3CB6DF0088D54A /* gamma_correction.cpp */; }; + 84D4756C1C3CB6DF0088D54A /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 84D475691C3CB6DF0088D54A /* utils.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 84D4755C1C3CB69C0088D54A /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 84D4755E1C3CB69C0088D54A /* gamma_correction */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = gamma_correction; sourceTree = BUILT_PRODUCTS_DIR; }; + 84D475681C3CB6DF0088D54A /* gamma_correction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = gamma_correction.cpp; path = ../gamma_correction.cpp; sourceTree = ""; }; + 84D475691C3CB6DF0088D54A /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = utils.cpp; path = ../utils.cpp; sourceTree = ""; }; + 84D4756A1C3CB6DF0088D54A /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utils.h; path = ../utils.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 84D4755B1C3CB69C0088D54A /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 84D475551C3CB69C0088D54A = { + isa = PBXGroup; + children = ( + 84D475681C3CB6DF0088D54A /* gamma_correction.cpp */, + 84D475691C3CB6DF0088D54A /* utils.cpp */, + 84D4756A1C3CB6DF0088D54A /* utils.h */, + 84D4755F1C3CB69C0088D54A /* Products */, + ); + sourceTree = ""; + }; + 84D4755F1C3CB69C0088D54A /* Products */ = { + isa = PBXGroup; + children = ( + 84D4755E1C3CB69C0088D54A /* gamma_correction */, + ); + name = Products; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 84D4755D1C3CB69C0088D54A /* gamma_correction */ = { + isa = PBXNativeTarget; + buildConfigurationList = 84D475651C3CB69C0088D54A /* Build configuration list for PBXNativeTarget "gamma_correction" */; + buildPhases = ( + 84D4755A1C3CB69C0088D54A /* Sources */, + 84D4755B1C3CB69C0088D54A /* Frameworks */, + 84D4755C1C3CB69C0088D54A /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = gamma_correction; + productName = gamma_correction; + productReference = 84D4755E1C3CB69C0088D54A /* gamma_correction */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 84D475561C3CB69C0088D54A /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0800; + ORGANIZATIONNAME = tbb; + TargetAttributes = { + 84D4755D1C3CB69C0088D54A = { + CreatedOnToolsVersion = 8.0; + ProvisioningStyle = Automatic; + }; + }; + }; + buildConfigurationList = 84D475591C3CB69C0088D54A /* Build configuration list for PBXProject "gamma_correction" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 84D475551C3CB69C0088D54A; + productRefGroup = 84D4755F1C3CB69C0088D54A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 84D4755D1C3CB69C0088D54A /* gamma_correction */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 84D4755A1C3CB69C0088D54A /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 84D4756B1C3CB6DF0088D54A /* gamma_correction.cpp in Sources */, + 84D4756C1C3CB6DF0088D54A /* utils.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 84D475631C3CB69C0088D54A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVES = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_VERSION = ""; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(PSTLROOT)/include", + /opt/intel/pstl/include, + "$(TBBROOT)/include", + /opt/intel/tbb/include, + ); + ICC_CXX_LANG_DIALECT = "c++11"; + ICC_TBB = YES; + LD_RUNPATH_SEARCH_PATHS = "$(TBBROOT)/lib /opt/intel/tbb/lib"; + LIBRARY_SEARCH_PATHS = ( + "$(TBBROOT)/lib", + /opt/intel/tbb/lib, + ); + MACOSX_DEPLOYMENT_TARGET = 10.10; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + OTHER_LDFLAGS = "-ltbb"; + SDKROOT = macosx; + }; + name = Debug; + }; + 84D475641C3CB69C0088D54A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVES = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_VERSION = ""; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(PSTLROOT)/include", + /opt/intel/pstl/include, + "$(TBBROOT)/include", + /opt/intel/tbb/include, + ); + ICC_CXX_LANG_DIALECT = "c++11"; + ICC_TBB = YES; + LD_RUNPATH_SEARCH_PATHS = "$(TBBROOT)/lib /opt/intel/tbb/lib"; + LIBRARY_SEARCH_PATHS = ( + "$(TBBROOT)/lib", + /opt/intel/tbb/lib, + ); + MACOSX_DEPLOYMENT_TARGET = 10.10; + MTL_ENABLE_DEBUG_INFO = NO; + OTHER_LDFLAGS = "-ltbb"; + SDKROOT = macosx; + }; + name = Release; + }; + 84D475661C3CB69C0088D54A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD)"; + CLANG_WARN_DOCUMENTATION_COMMENTS = NO; + GCC_VERSION = com.intel.compilers.icc.latest; + ICC_OTHER_CPLUSPLUSFLAGS = "$(ICC_OTHER_CFLAGS) $(OTHER_CPLUSPLUSFLAGS) -qopenmp-simd -xHOST"; + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-DTBB_USE_DEBUG", + "-D__PSTL_USE_TBB", + ); + OTHER_LDFLAGS = "-ltbb_debug"; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 84D475671C3CB69C0088D54A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD)"; + CLANG_WARN_DOCUMENTATION_COMMENTS = NO; + GCC_GENERATE_DEBUGGING_SYMBOLS = NO; + GCC_OPTIMIZATION_LEVEL = 2; + GCC_VERSION = com.intel.compilers.icc.latest; + ICC_DEBUG = NO; + ICC_OPTLEVEL = speed; + ICC_OTHER_CPLUSPLUSFLAGS = "$(ICC_OTHER_CFLAGS) $(OTHER_CPLUSPLUSFLAGS) -qopenmp-simd -xHOST"; + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-D__PSTL_USE_TBB", + ); + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 84D475591C3CB69C0088D54A /* Build configuration list for PBXProject "gamma_correction" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84D475631C3CB69C0088D54A /* Debug */, + 84D475641C3CB69C0088D54A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 84D475651C3CB69C0088D54A /* Build configuration list for PBXNativeTarget "gamma_correction" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84D475661C3CB69C0088D54A /* Debug */, + 84D475671C3CB69C0088D54A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 84D475561C3CB69C0088D54A /* Project object */; +} diff --git a/include/pstl/_internal/algorithm_impl.h b/include/pstl/_internal/algorithm_impl.h new file mode 100644 index 00000000000..4e4a07708a5 --- /dev/null +++ b/include/pstl/_internal/algorithm_impl.h @@ -0,0 +1,1527 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_algorithm_impl_H +#define __PSTL_algorithm_impl_H + +#include +#include +#include +#include +#include + +#include "execution_policy_impl.h" + +namespace __icp_algorithm { +//------------------------------------------------------------------------ +// forward +//------------------------------------------------------------------------ +template +void parallel_for(Index first, Index last, F f); +template +Index parallel_first(Index first, Index last, Brick f); +template +bool parallel_or(Index first, Index last, Brick f); +template +void parallel_strict_scan(Index n, T initial, R reduce, C combine, S scan, A apex); +template +void parallel_stable_sort(RandomAccessIterator xs, RandomAccessIterator xe, Compare comp, LeafSort leaf_sort); + +//------------------------------------------------------------------------ +// any_of +//------------------------------------------------------------------------ + +template +bool brick_any_of( const InputIterator first, const InputIterator last, Pred pred, /*is_vector=*/std::false_type ) noexcept { + return std::any_of(first,last,pred); +}; + +template +bool brick_any_of( const InputIterator first, const InputIterator last, Pred pred, /*is_vector=*/std::true_type ) noexcept { + return simd_or( first, last-first, pred ); +}; + + +template +bool pattern_any_of( InputIterator first, InputIterator last, Pred pred, IsVector is_vector, /*parallel=*/std::false_type ) noexcept { + return brick_any_of(first,last,pred,is_vector); +} + +template +bool pattern_any_of( InputIterator first, InputIterator last, Pred pred, IsVector is_vector, /*parallel=*/std::true_type ) { + return parallel_or( first, last, + [pred, is_vector](InputIterator i, InputIterator j) {return brick_any_of(i, j, pred, is_vector);} ); +} + + +//------------------------------------------------------------------------ +// walk1 (pseudo) +// +// walk1 evaluates f(x) for each x drawn from [first,last) +//------------------------------------------------------------------------ +template +void brick_walk1( Iterator first, Iterator last, Function f, /*vector=*/std::false_type ) noexcept { + for(; first!=last; ++first ) + f(*first); +} + +template +void brick_walk1( T* __restrict first, T* __restrict last, Function f, /*vector=*/std::false_type ) noexcept { + for(; first!=last; ++first ) + f(*first); +} + +template +void brick_walk1( Iterator first, Iterator last, Function f, /*vector=*/std::true_type ) noexcept { + simd_walk_1(first, last-first, f); +} + + +template +void pattern_walk1( Iterator first, Iterator last, Function f, IsVector is_vector, /*parallel=*/std::false_type ) noexcept { + brick_walk1( first, last, f, is_vector ); +} + +template +void pattern_walk1( Iterator first, Iterator last, Function f, IsVector is_vector, /*parallel=*/std::true_type ) { + parallel_for( first, last, [f,is_vector](Iterator i, Iterator j) { + brick_walk1(i,j,f,is_vector); + }); +} + + +// [alg.foreach] +// for_each_n with no policy +template +InputIterator for_each_n(InputIterator first, Size n, Function f) { + for( ; n > 0; ++first, --n ) + f(*first); + return first; +} + +//------------------------------------------------------------------------ +// walk1_n +//------------------------------------------------------------------------ +template +InputIterator brick_walk1_n(InputIterator first, Size n, Function f, + /*IsVectorTag=*/std::false_type ) { + return for_each_n( first, n, f ); // calling serial version +} + +template +RandomAccessIterator brick_walk1_n( RandomAccessIterator first, Size n, Function f, + /*vectorTag=*/std::true_type ) noexcept(noexcept(f(first[0]))) { + RandomAccessIterator last = first + n; + RandomAccessIterator begin = first < last ? first : last; + RandomAccessIterator end = first < last ? last : first; + Size positive_n = end - begin; +__PSTL_PRAGMA_SIMD + for( Size i = 0; i < positive_n; ++i ) + f( begin[i] ); + return end; +} + +template +InputIterator pattern_walk1_n( InputIterator first, Size n, Function f, IsVector is_vector, /*is_parallel=*/std::false_type ) noexcept { + return brick_walk1_n(first, n, f, is_vector); +} + +template +RandomAccessIterator pattern_walk1_n( RandomAccessIterator first, Size n, Function f, IsVector is_vector, /*is_parallel=*/std::true_type ) { + RandomAccessIterator last = first + n; + parallel_for( first, last, + [ &f, is_vector ]( RandomAccessIterator first, RandomAccessIterator last ) { + brick_walk1_n( first, last - first, f, is_vector ); + } ); + return last; +} + + +//------------------------------------------------------------------------ +// walk2 (pseudo) +// +// walk2 evaluates f(x,y) for (x,y) drawn from [first1,last1) and [first2,...) +//------------------------------------------------------------------------ +template +Iterator2 brick_walk2( Iterator1 first1, Iterator1 last1, Iterator2 first2, Function f, /*vector=*/std::false_type ) noexcept { + for(; first1!=last1; ++first1, ++first2 ) + f(*first1,*first2); + return first2; +} + +template +Iterator2 brick_walk2( Iterator1 first1, Iterator1 last1, Iterator2 first2, Function f, /*vector=*/std::true_type) noexcept { + return simd_walk_2(first1, last1-first1, first2, f); +} + + +template +Iterator2 pattern_walk2( Iterator1 first1, Iterator1 last1, Iterator2 first2, Function f, IsVector is_vector, /*parallel=*/std::false_type ) noexcept { + return brick_walk2(first1,last1,first2,f,is_vector); +} + +template +Iterator2 pattern_walk2(Iterator1 first1, Iterator1 last1, Iterator2 first2, Function f, IsVector is_vector, /*parallel=*/std::true_type ) { + parallel_for( + first1, last1, + [f,first1,first2,is_vector](Iterator1 i, Iterator1 j) { + brick_walk2(i,j,first2+(i-first1),f,is_vector); + } + ); + return first2+(last1-first1); +} + + +//------------------------------------------------------------------------ +// walk3 (pseudo) +// +// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) +//------------------------------------------------------------------------ +template +Iterator3 brick_walk3( Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator3 first3, Function f, /*vector=*/std::false_type ) noexcept { + for(; first1!=last1; ++first1, ++first2, ++first3 ) + f(*first1, *first2, *first3); + return first3; +} + +template +Iterator3 brick_walk3( Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator3 first3, Function f, /*vector=*/std::true_type) noexcept { + return simd_walk_3(first1, last1-first1, first2, first3, f); +} + + +template +Iterator3 pattern_walk3( Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator3 first3, Function f, IsVector is_vector, /*parallel=*/std::false_type ) noexcept { + return brick_walk3(first1, last1, first2, first3, f, is_vector); +} + +template +Iterator3 pattern_walk3(Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator3 first3, Function f, IsVector is_vector, /*parallel=*/std::true_type ) { + parallel_for( + first1, last1, + [f, first1, first2, first3, is_vector](Iterator1 i, Iterator1 j) { + brick_walk3(i, j, first2+(i-first1), first3+(i-first1), f, is_vector); + } + ); + return first3+(last1-first1); +} + + +//------------------------------------------------------------------------ +// find_if +//------------------------------------------------------------------------ +template +InputIterator brick_find_if(InputIterator first, InputIterator last, Predicate pred, /*is_vector=*/std::false_type) noexcept { + return std::find_if(first, last, pred); +} + +template +InputIterator brick_find_if(InputIterator first, InputIterator last, Predicate pred, /*is_vector=*/std::true_type) noexcept { + return simd_first(first, last-first, pred); +} + +template +InputIterator pattern_find_if( InputIterator first, InputIterator last, Predicate pred, IsVector is_vector, /*is_parallel=*/std::false_type ) noexcept { + return brick_find_if(first,last,pred,is_vector); +} + +template +InputIterator pattern_find_if( InputIterator first, InputIterator last, Predicate pred, IsVector is_vector, /*is_parallel=*/std::true_type ) { + return parallel_first( first, last, [pred,is_vector](InputIterator i, InputIterator j) { + return brick_find_if(i,j,pred,is_vector); + }); +} + +//------------------------------------------------------------------------ +// find_end +//------------------------------------------------------------------------ +template +ForwardIt1 search_serial(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, BinaryPredicate p, bool b_first) { + if(s_first == s_last) + return last; + + ForwardIt1 result = last; + for(; first != last; ++first) { + auto it1 = first; + auto it2 = s_first; + for(; it2 != s_last && it1 != last; ++it2, ++it1) { + if(!p(*it1, *it2)) + break; + } + if(it2 == s_last) {//subsequence was found + result = first; + if(b_first) //first occurrence semantic + break; + } + } + return result; +} + +template +ForwardIterator1 brick_find_end(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, /*is_vector=*/std::false_type) noexcept { + return search_serial(first, last, s_first, s_last, pred, false); +} + +template +ForwardIterator1 brick_find_end(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + return simd_search(first, last, s_first, s_last, pred, false); +} + +template +ForwardIterator1 pattern_find_end(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_find_end(first, last, s_first, s_last, pred, is_vector); +} + +template +ForwardIterator1 pattern_find_end(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_find_end(first, last, s_first, s_last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// find_first_of +//------------------------------------------------------------------------ +template +InputIterator brick_find_first_of(InputIterator first, InputIterator last, ForwardIterator s_first, ForwardIterator s_last, BinaryPredicate pred, /*is_vector=*/std::false_type) noexcept { + return std::find_first_of(first, last, s_first, s_last, pred); +} + +template +InputIterator brick_find_first_of(InputIterator first, InputIterator last, ForwardIterator s_first, ForwardIterator s_last, BinaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::find_first_of(first, last, s_first, s_last, pred); +} + +template +InputIterator pattern_find_first_of(InputIterator first, InputIterator last, ForwardIterator s_first, ForwardIterator s_last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_find_first_of(first, last, s_first, s_last, pred, is_vector); +} + +template +InputIterator pattern_find_first_of(InputIterator first, InputIterator last, ForwardIterator s_first, ForwardIterator s_last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_find_first_of(first, last, s_first, s_last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// search +//------------------------------------------------------------------------ +template +ForwardIterator1 brick_search(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, /*vector=*/std::false_type) noexcept { + return search_serial(first, last, s_first, s_last, pred, true); +} + +template +ForwardIterator1 brick_search(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, /*vector=*/std::true_type) noexcept { + return simd_search(first, last, s_first, s_last, pred, true); +} + +template +ForwardIterator1 pattern_search(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_search(first, last, s_first, s_last, pred, is_vector); +} + +template +ForwardIterator1 pattern_search(ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_search(first, last, s_first, s_last, pred, is_vector); +} +//------------------------------------------------------------------------ +// search_n +//------------------------------------------------------------------------ +template +ForwardIterator brick_search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value, BinaryPredicate pred, /*vector=*/std::false_type) noexcept { + return std::search_n(first, last, count, value, pred); +} + +template +ForwardIterator brick_search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value, BinaryPredicate pred, /*vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::search_n(first, last, count, value, pred); +} + +template +ForwardIterator pattern_search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_search_n(first, last, count, value, pred, is_vector); +} + +template +ForwardIterator pattern_search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_search_n(first, last, count, value, pred, is_vector); +} + +//------------------------------------------------------------------------ +// copy_n +//------------------------------------------------------------------------ + +template +OutputIterator brick_copy_n(InputIterator first, Size n, OutputIterator result, /*vector=*/std::false_type) noexcept { + return std::copy_n(first, n, result); +} + +template +OutputIterator brick_copy_n(InputIterator first, Size n, OutputIterator result, /*vector=*/std::true_type) noexcept { + return simd_copy_n(first, n, result); +} + +template +OutputIterator pattern_copy_n(InputIterator first, Size n, OutputIterator result, IsVector is_vector, /*parallel=*/std::false_type) noexcept { + return brick_copy_n(first, n, result, is_vector); +} + +template +OutputIterator pattern_copy_n(InputIterator first, Size n, OutputIterator result, IsVector is_vector, /*parallel=*/std::true_type) { + parallel_for( + Size(0), n, + [first,result,is_vector](Size i, Size j) { + brick_copy_n(first+i, j-i, result+i, is_vector); + } + ); + return result+n; +} + +//------------------------------------------------------------------------ +// copy +//------------------------------------------------------------------------ +template +OutputIterator brick_copy(InputIterator first, InputIterator last, OutputIterator result, /*vector=*/std::false_type) noexcept { + return std::copy(first, last, result); +} + +template +OutputIterator brick_copy(InputIterator first, InputIterator last, OutputIterator result, /*vector=*/std::true_type) noexcept { + return brick_copy_n(first, last - first, result, std::true_type()); +} + +template +OutputIterator pattern_copy(InputIterator first, InputIterator last, OutputIterator result, IsVector is_vector, /*parallel=*/std::false_type) noexcept { + return brick_copy(first, last, result, is_vector); +} + +template +OutputIterator pattern_copy(InputIterator first, InputIterator last, OutputIterator result, IsVector is_vector, /*parallel=*/std::true_type) { + return pattern_copy_n(first, last - first, result, is_vector, std::true_type()); +} + +//------------------------------------------------------------------------ +// copy_if +//------------------------------------------------------------------------ +template +OutputIterator brick_copy_if(InputIterator first, InputIterator last, OutputIterator result, UnaryPredicate pred, /*vector=*/std::false_type) noexcept { + return std::copy_if(first, last, result, pred); +} + +template +OutputIterator brick_copy_if(InputIterator first, InputIterator last, OutputIterator result, UnaryPredicate pred, /*vector=*/std::true_type) noexcept { +#if (__PSTL_MONOTONIC_PRESENT) + return simd_copy_if(first, last-first, result, pred); +#else + return std::copy_if(first, last, result, pred); +#endif +} + +// TODO: Try to use transform_reduce for combining brick_copy_if_phase1 on IsVector. +template +DifferenceType brick_calc_mask_1(InputIterator first, InputIterator last, bool* __restrict mask, UnaryPredicate pred, /*vector=*/std::false_type) noexcept { + DifferenceType count = 0; + for (; first != last; ++first, ++mask) { + *mask = pred(*first); + count += *mask; + } + return count; +} + +template +DifferenceType brick_calc_mask_1(InputIterator first, InputIterator last, bool* __restrict mask, UnaryPredicate pred, /*vector=*/std::true_type) noexcept { + return simd_calc_mask_1(first, last-first, mask, pred); +} + +template +void brick_copy_by_mask(InputIterator first, InputIterator last, OutputIterator result, bool* mask, /*vector=*/std::false_type ) noexcept { + for(;first!=last; ++first, ++mask) { + if( *mask ) { + *result = *first; + ++result; + } + } +} + +template +void brick_copy_by_mask(InputIterator first, InputIterator last, OutputIterator result, bool* __restrict mask, /*vector=*/std::true_type) noexcept { +#if (__PSTL_MONOTONIC_PRESENT) + simd_copy_by_mask(first, last-first, result, mask); +#else + brick_copy_by_mask(first, last, result, mask, std::false_type()); +#endif + +} + +template +OutputIterator pattern_copy_if(InputIterator first, InputIterator last, OutputIterator result, UnaryPredicate pred, IsVector is_vector, /*parallel=*/std::false_type) noexcept { + return brick_copy_if(first, last, result, pred, is_vector); +} + +template +OutputIterator pattern_copy_if(InputIterator first, InputIterator last, OutputIterator result, UnaryPredicate pred, IsVector is_vector, /*parallel=*/std::true_type) { + typedef typename std::iterator_traits::difference_type difference_type; + difference_type n = last-first; + if( difference_type(1) < n ) { + raw_buffer mask_buf(n*sizeof(bool)); + if( mask_buf ) { + bool* mask = static_cast(mask_buf.get()); + difference_type m; + parallel_strict_scan( n, difference_type(0), + [=](difference_type i, difference_type len) { // Reduce + return brick_calc_mask_1(first+i, first+(i+len), + mask + i, + pred, + is_vector); + }, + std::plus(), // Combine + [=](difference_type i, difference_type len, difference_type initial) { // Scan + brick_copy_by_mask(first+i, first+(i+len), + result+initial, + mask + i, + is_vector); + }, + [&m](difference_type total) {m=total;}); + return result + m; + } + } + // Out of memory or trivial sequence - use serial algorithm + return brick_copy_if(first, last, result, pred, is_vector); +} + +//------------------------------------------------------------------------ +// unique +//------------------------------------------------------------------------ + +template +ForwardIterator brick_unique(ForwardIterator first, ForwardIterator last, BinaryPredicate pred, /*is_vector=*/std::false_type) noexcept { + return std::unique(first, last, pred); +} + +template +ForwardIterator brick_unique(ForwardIterator first, ForwardIterator last, BinaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::unique(first, last, pred); +} + +template +ForwardIterator pattern_unique(ForwardIterator first, ForwardIterator last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_unique(first, last, pred, is_vector); +} + +template +ForwardIterator pattern_unique(ForwardIterator first, ForwardIterator last, BinaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_unique(first, last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// unique_copy +//------------------------------------------------------------------------ + +template +OutputIterator brick_unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate pred, /*vector=*/std::false_type) noexcept { + return std::unique_copy(first, last, result, pred); +} + +template +OutputIterator brick_unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate pred, /*vector=*/std::true_type) noexcept { +#if (__PSTL_MONOTONIC_PRESENT) + return simd_unique_copy(first, last-first, result, pred); +#else + return std::unique_copy(first, last, result, pred); +#endif +} + +template +OutputIterator pattern_unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate pred, IsVector is_vector, /*parallel=*/std::false_type) noexcept { + return brick_unique_copy(first, last, result, pred, is_vector); +} + +template +DifferenceType brick_calc_mask_2(InputIterator first, InputIterator last, bool* __restrict mask, BinaryPredicate pred, /*vector=*/std::false_type) noexcept { + DifferenceType count = 0; + for (; first != last; ++first, ++mask) { + *mask = !pred(*first, *(first-1)); + count += *mask; + } + return count; +} + +template +DifferenceType brick_calc_mask_2(InputIterator first, InputIterator last, bool* __restrict mask, BinaryPredicate pred, /*vector=*/std::true_type) noexcept { + return simd_calc_mask_2(first, last-first, mask, pred); +} + +template +OutputIterator pattern_unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate pred, IsVector is_vector, /*parallel=*/std::true_type) { + typedef typename std::iterator_traits::difference_type difference_type; + difference_type n = last-first; + if( difference_type(2)(mask_buf.get()); + difference_type m; + parallel_strict_scan( n, difference_type(0), + [=](difference_type i, difference_type len) -> difference_type { // Reduce + difference_type extra = 0; + if( i==0 ) { + // Special boundary case + mask[i] = true; + if( --len==0 ) return 1; + ++i; + ++extra; + } + return brick_calc_mask_2( + first+i, first+(i+len), + mask + i, + pred, + is_vector) + extra; + }, + std::plus(), // Combine + [=](difference_type i, difference_type len, difference_type initial) { // Scan + // Phase 2 is same as for pattern_copy_if + brick_copy_by_mask( + first+i, first+(i+len), + result+initial, + mask + i, + is_vector); + }, + [&m](difference_type total) {m=total;}); + return result + m; + } + } + // Out of memory or trivial sequence - use serial algorithm + return brick_unique_copy(first, last, result, pred, is_vector); +} + +//------------------------------------------------------------------------ +// swap_ranges +//------------------------------------------------------------------------ + +template +ForwardIterator2 brick_swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, /*is_vector=*/std::false_type) noexcept { + return std::swap_ranges(first1, last1, first2); +} + +template +ForwardIterator2 brick_swap_ranges(ForwardIterator1 first1, ForwardIterator2 last1, ForwardIterator2 first2, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::swap_ranges(first1, last1, first2); +} + +template +ForwardIterator2 pattern_swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_swap_ranges(first1, last1, first2, is_vector); +} + +template +ForwardIterator2 pattern_swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_swap_ranges(first1, last1, first2, is_vector); +} + +//------------------------------------------------------------------------ +// replace +//------------------------------------------------------------------------ + +template +void brick_replace_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, const T& new_value, /*is_vector=*/std::false_type) noexcept { + std::replace_if(first, last, pred, new_value); +} + +template +void brick_replace_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, const T& new_value, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + brick_replace_if(first, last, pred, new_value, std::false_type()); +} + +template +void pattern_replace_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, const T& new_value, IsVector is_vector, /*parallel=*/std::false_type) noexcept { + brick_replace_if(first, last, pred, new_value, is_vector); +} + +template +void pattern_replace_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, const T& new_value, IsVector is_vector, /*parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_replace_if(first, last, pred, new_value, is_vector); +} + +//------------------------------------------------------------------------ +// reverse +//------------------------------------------------------------------------ + +template +void brick_reverse(BidirectionalIterator first, BidirectionalIterator last,/*is_vector=*/std::false_type) noexcept { + std::reverse(first, last); +} + +template +void brick_reverse(BidirectionalIterator first, BidirectionalIterator last,/*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + brick_reverse(first, last, std::false_type()); +} + +template +void pattern_reverse(BidirectionalIterator first, BidirectionalIterator last, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_reverse(first, last, is_vector); +} + +template +void pattern_reverse(BidirectionalIterator first, BidirectionalIterator last, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_reverse(first, last, is_vector); +} + +//------------------------------------------------------------------------ +// reverse_copy +//------------------------------------------------------------------------ + +template +OutputIterator brick_reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator d_first, /*is_vector=*/std::false_type) noexcept { + return std::reverse_copy(first, last, d_first); +} + +template +OutputIterator brick_reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator d_first, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return brick_reverse_copy(first, last, d_first, std::false_type()); +} + +template +OutputIterator pattern_reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator d_first, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_reverse_copy(first, last, d_first, is_vector); +} + +template +OutputIterator pattern_reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator d_first, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_reverse_copy(first, last, d_first, is_vector); +} + +//------------------------------------------------------------------------ +// rotate +//------------------------------------------------------------------------ + +template +ForwardIterator brick_rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last, /*is_vector=*/std::false_type) noexcept { + +#if __PSTL_CPP11_STD_ROTATE_BROKEN + std::rotate(first, middle, last); + return std::next(first, std::distance(middle, last)); +#else + return std::rotate(first, middle, last); +#endif +} + +template +ForwardIterator brick_rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return brick_rotate(first, middle, last, std::false_type()); +} + +template +ForwardIterator pattern_rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_rotate(first, middle, last, is_vector); +} + +template +ForwardIterator pattern_rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_rotate(first, middle, last, is_vector); +} + +//------------------------------------------------------------------------ +// rotate_copy +//------------------------------------------------------------------------ + +template +OutputIterator brick_rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result, /*is_vector=*/std::false_type) noexcept { + return std::rotate_copy(first, middle, last, result); +} + +template +OutputIterator brick_rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::rotate_copy(first, middle, last, result); +} + +template +OutputIterator pattern_rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_rotate_copy(first, middle, last, result, is_vector); +} + +template +OutputIterator pattern_rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_rotate_copy(first, middle, last, result, is_vector); +} + +//------------------------------------------------------------------------ +// is_partitioned +//------------------------------------------------------------------------ + +template +bool brick_is_partitioned(InputIterator first, InputIterator last, UnaryPredicate pred, /*is_vector=*/std::false_type) noexcept +{ + return std::is_partitioned(first, last, pred); +} + +template +bool brick_is_partitioned(InputIterator first, InputIterator last, UnaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return brick_is_partitioned(first, last, pred, std::false_type()); +} + +template +bool pattern_is_partitioned(InputIterator first, InputIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_is_partitioned(first, last, pred, is_vector); +} + + +template +bool pattern_is_partitioned(InputIterator first, InputIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_is_partitioned(first, last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// partition +//------------------------------------------------------------------------ + +template +ForwardIterator brick_partition(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, /*is_vector=*/std::false_type) noexcept { + return std::partition(first, last, pred); +} + +template +ForwardIterator brick_partition(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::partition(first, last, pred); +} + +template +ForwardIterator pattern_partition(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_partition(first, last, pred, is_vector); +} + +template +ForwardIterator pattern_partition(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_partition(first, last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// stable_partition +//------------------------------------------------------------------------ + +template +BidirectionalIterator brick_stable_partition(BidirectionalIterator first, BidirectionalIterator last, UnaryPredicate pred, /*is_vector=*/std::false_type) noexcept { + return std::stable_partition(first, last, pred); +} + +template +BidirectionalIterator brick_stable_partition(BidirectionalIterator first, BidirectionalIterator last, UnaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::stable_partition(first, last, pred); +} + +template +BidirectionalIterator pattern_stable_partition(BidirectionalIterator first, BidirectionalIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallelization=*/std::false_type) noexcept { + return brick_stable_partition(first, last, pred, is_vector); +} + +template +BidirectionalIterator pattern_stable_partition(BidirectionalIterator first, BidirectionalIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallelization=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_stable_partition(first, last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// partition_copy +//------------------------------------------------------------------------ + +template +std::pair +brick_partition_copy(InputIterator first, InputIterator last, OutputIterator1 out_true, OutputIterator2 out_false, UnaryPredicate pred, /*is_vector=*/std::false_type) noexcept { + return std::partition_copy(first, last, out_true, out_false, pred); +} + +template +std::pair +brick_partition_copy(InputIterator first, InputIterator last, OutputIterator1 out_true, OutputIterator2 out_false, UnaryPredicate pred, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return brick_partition_copy(first, last, out_true, out_false, pred, std::false_type()); +} + + +template +std::pair +pattern_partition_copy(InputIterator first, InputIterator last, OutputIterator1 out_true, OutputIterator2 out_false, UnaryPredicate pred, IsVector is_vector,/*is_parallelization=*/std::false_type) noexcept { + return brick_partition_copy(first, last, out_true, out_false, pred, is_vector); +} + +template +std::pair +pattern_partition_copy(InputIterator first, InputIterator last, OutputIterator1 out_true, OutputIterator2 out_false, UnaryPredicate pred, IsVector is_vector, /*is_parallelization=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_partition_copy(first, last, out_true, out_false, pred, is_vector); +} + +//------------------------------------------------------------------------ +// sort +//------------------------------------------------------------------------ + +template +void pattern_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp, IsVector /*is_vector*/, /*is_parallel=*/std::false_type, IsMoveConstructible) noexcept { + std::sort(first, last, comp); +} + + +template +void pattern_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp, IsVector /*is_vector*/, /*is_parallel=*/std::true_type, /*is_move_constructible=*/std::true_type ) { + parallel_stable_sort(first, last, comp, + [](RandomAccessIterator first, RandomAccessIterator last, Compare comp) { + std::sort(first, last, comp); + }); +} + +//------------------------------------------------------------------------ +// stable_sort +//------------------------------------------------------------------------ + +template +void pattern_stable_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp, IsVector /*is_vector*/, /*is_parallel=*/std::false_type) noexcept { + std::stable_sort(first, last, comp); +} + +template +void pattern_stable_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp, IsVector /*is_vector*/, /*is_parallel=*/std::true_type) { + parallel_stable_sort(first, last, comp, + [](RandomAccessIterator first, RandomAccessIterator last, Compare comp) { + std::stable_sort(first, last, comp); + }); +} + +//------------------------------------------------------------------------ +// partial_sort +//------------------------------------------------------------------------ + +template +void brick_partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp, /*is_vector=*/std::false_type) noexcept { + std::partial_sort(first, middle, last, comp); +} + +template +void brick_partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + std::partial_sort(first, middle, last, comp); +} + +template +void pattern_partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_partial_sort(first, middle, last, comp, is_vector); +} + +template +void pattern_partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_partial_sort(first, middle, last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// partial_sort_copy +//------------------------------------------------------------------------ + +template +RandomAccessIterator brick_partial_sort_copy(InputIterator first, InputIterator last, RandomAccessIterator d_first, RandomAccessIterator d_last, Compare comp, /*is_vector*/std::false_type) noexcept { + return std::partial_sort_copy(first, last, d_first, d_last, comp); +} + +template +RandomAccessIterator brick_partial_sort_copy(InputIterator first, InputIterator last, RandomAccessIterator d_first, RandomAccessIterator d_last, Compare comp, /*is_vector*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::partial_sort_copy(first, last, d_first, d_last, comp); +} + +template +RandomAccessIterator pattern_partial_sort_copy(InputIterator first, InputIterator last, RandomAccessIterator d_first, RandomAccessIterator d_last, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_partial_sort_copy(first, last, d_first, d_last, comp, is_vector); +} + +template +RandomAccessIterator pattern_partial_sort_copy(InputIterator first, InputIterator last, RandomAccessIterator d_first, RandomAccessIterator d_last, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_partial_sort_copy(first, last, d_first, d_last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// equal +//------------------------------------------------------------------------ + +template +bool brick_equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate p, /* IsVector = */ std::false_type) noexcept { + return std::equal(first1, last1, first2, p); +} + +template +bool brick_equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate p, /* is_vector = */ std::true_type) noexcept { + return simd_first(first1, last1-first1, first2, __icp_algorithm::not_pred(p)) == last1; +} + +template +bool pattern_equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate p, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_equal(first1, last1, first2, p, is_vector); +} + +//------------------------------------------------------------------------ +// count +//------------------------------------------------------------------------ +template +typename std::iterator_traits::difference_type +brick_count(InputIterator first, InputIterator last, Predicate pred, /* is_vector = */ std::true_type) noexcept { + return simd_count(first, last-first, pred); +} + +template +typename std::iterator_traits::difference_type +brick_count(InputIterator first, InputIterator last, Predicate pred, /* is_vector = */ std::false_type) noexcept { + return std::count_if(first, last, pred); +} + +template +typename std::iterator_traits::difference_type +pattern_count(InputIterator first, InputIterator last, Predicate pred, /* is_parallel */ std::false_type, IsVector vec) noexcept { + return brick_count(first, last, pred, vec); +} + +//------------------------------------------------------------------------ +// adjacent_find +//------------------------------------------------------------------------ +template +ForwardIt brick_adjacent_find(ForwardIt first, ForwardIt last, BinaryPredicate pred, /* IsVector = */ std::true_type, bool or_semantic) noexcept { + return simd_adjacent_find(first, last, pred, or_semantic); +} + +template +ForwardIt brick_adjacent_find(ForwardIt first, ForwardIt last, BinaryPredicate pred, /* IsVector = */ std::false_type, bool or_semantic) noexcept { + return std::adjacent_find(first, last, pred); +} + +template +ForwardIt pattern_adjacent_find(ForwardIt first, ForwardIt last, BinaryPredicate pred, /* is_parallel */ std::false_type, IsVector vec, bool or_semantic) noexcept { + return brick_adjacent_find(first, last, pred, vec, or_semantic); +} + +//------------------------------------------------------------------------ +// nth_element +//------------------------------------------------------------------------ + +template +void brick_nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare comp, /* is_vector = */ std::false_type) noexcept { + std::nth_element(first, nth, last, comp); +} + +template +void brick_nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + std::nth_element(first, nth, last, comp); +} + +template +void pattern_nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_nth_element(first, nth, last, comp, is_vector); +} + +template +void pattern_nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_nth_element(first, nth, last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// fill, fill_n +//------------------------------------------------------------------------ +template +void brick_fill(ForwardIterator first, ForwardIterator last, const T& value, /* is_vector = */ std::true_type) noexcept { + simd_fill_n(first, last-first, value); +} + +template +void brick_fill(ForwardIterator first, ForwardIterator last, const T& value, /* is_vector = */std::false_type) noexcept { + std::fill(first, last, value); +} + +template +void pattern_fill(ForwardIterator first, ForwardIterator last, const T& value, /*is_parallel=*/std::false_type, IsVector vec) noexcept { + brick_fill(first, last, value, vec); +} + +template +OutputIterator brick_fill_n(OutputIterator first, Size count, const T& value, /* is_vector = */ std::true_type) noexcept { + return simd_fill_n(first, count, value);; +} + +template +OutputIterator brick_fill_n(OutputIterator first, Size count, const T& value, /* is_vector = */ std::false_type) noexcept { + return std::fill_n(first, count, value); +} + +template +OutputIterator pattern_fill_n(OutputIterator first, Size count, const T& value, /*is_parallel=*/std::false_type, IsVector vec) noexcept { + return brick_fill_n(first, count, value, vec); +} + +//------------------------------------------------------------------------ +// generate, generate_n +//------------------------------------------------------------------------ +template +void brick_generate(ForwardIterator first, ForwardIterator last, Generator g, /* is_vector = */ std::true_type) noexcept { + simd_generate_n(first, last-first, g); +} + +template +void brick_generate(ForwardIterator first, ForwardIterator last, Generator g, /* is_vector = */std::false_type) noexcept { + std::generate(first, last, g); +} + +template +void pattern_generate(ForwardIterator first, ForwardIterator last, Generator g, /*is_parallel=*/std::false_type, IsVector vec) noexcept { + brick_generate(first, last, g, vec); +} + +template +OutputIterator brick_generate_n(OutputIterator first, Size count, Generator g, /* is_vector = */ std::true_type) noexcept { + return simd_generate_n(first, count, g); +} + +template +OutputIterator brick_generate_n(OutputIterator first, Size count, Generator g, /* is_vector = */ std::false_type) noexcept { + return std::generate_n(first, count, g); +} + +template +OutputIterator pattern_generate_n(OutputIterator first, Size count, Generator g, /*is_parallel=*/std::false_type, IsVector vec) noexcept { + return brick_generate_n(first, count, g, vec); +} + +//------------------------------------------------------------------------ +// remove +//------------------------------------------------------------------------ + +template +ForwardIterator brick_remove_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, /* is_vector = */ std::false_type) noexcept { + return std::remove_if(first, last, pred); +} + +template +ForwardIterator brick_remove_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::remove_if(first, last, pred); +} + +template +ForwardIterator pattern_remove_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallel*/ std::false_type) noexcept { + return brick_remove_if(first, last, pred, is_vector); +} + +template +ForwardIterator pattern_remove_if(ForwardIterator first, ForwardIterator last, UnaryPredicate pred, IsVector is_vector, /*is_parallel*/ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_remove_if(first, last, pred, is_vector); +} + +//------------------------------------------------------------------------ +// merge +//------------------------------------------------------------------------ + +template +OutputIterator brick_merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator d_first, Compare comp, /* is_vector = */ std::false_type) noexcept { + return std::merge(first1, last1, first2, last2, d_first, comp); +} + +template +OutputIterator brick_merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator d_first, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::merge(first1, last1, first2, last2, d_first, comp); +} + +template +OutputIterator pattern_merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator d_first, Compare comp, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_merge(first1, last1, first2, last2, d_first, comp, is_vector); +} + +template +OutputIterator pattern_merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator d_first, Compare comp, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_merge(first1, last1, first2, last2, d_first, comp, is_vector); +} + +//------------------------------------------------------------------------ +// inplace_merge +//------------------------------------------------------------------------ +template +void brick_inplace_merge(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last, Compare comp, /* is_vector = */ std::false_type) noexcept { + std::inplace_merge(first, middle, last, comp); +} + +template +void brick_inplace_merge(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial") + std::inplace_merge(first, middle, last, comp); +} + +template +void pattern_inplace_merge(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + brick_inplace_merge(first, middle, last, comp, is_vector); +} + +template +void pattern_inplace_merge(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_inplace_merge(first, middle, last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// includes +//------------------------------------------------------------------------ + +template +bool brick_includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, /* IsVector = */ std::false_type) noexcept { + return std::includes(first1, last1, first2, last2, comp); +} + +template +bool brick_includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, /* IsVector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial") + return std::includes(first1, last1, first2, last2, comp); +} + +template +bool pattern_includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_includes(first1, last1, first2, last2, comp, is_vector); +} + +template +bool pattern_includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_includes(first1, last1, first2, last2, comp, is_vector); +} + +//------------------------------------------------------------------------ +// set_union +//------------------------------------------------------------------------ + +template +OutputIterator brick_set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::false_type) noexcept { + return std::set_union(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator brick_set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, + InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_union(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator pattern_set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_set_union(first1, last1, first2, last2, result, comp, is_vector); +} + +template +OutputIterator pattern_set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_set_union(first1, last1, first2, last2, result, comp, is_vector); +} + +//------------------------------------------------------------------------ +// set_intersection +//------------------------------------------------------------------------ + +template +OutputIterator brick_set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::false_type) noexcept { + return std::set_intersection(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator brick_set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_intersection(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator pattern_set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_set_intersection(first1, last1, first2, last2, result, comp, is_vector); +} + +template +OutputIterator pattern_set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_set_intersection(first1, last1, first2, last2, result, comp, is_vector); +} + +//------------------------------------------------------------------------ +// set_difference +//------------------------------------------------------------------------ + +template +OutputIterator brick_set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::false_type) noexcept { + return std::set_difference(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator brick_set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_difference(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator pattern_set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_set_difference(first1, last1, first2, last2, result, comp, is_vector); +} + +template +OutputIterator pattern_set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_set_difference(first1, last1, first2, last2, result, comp, is_vector); +} + +//------------------------------------------------------------------------ +// set_symmetric_difference +//------------------------------------------------------------------------ + +template +OutputIterator brick_set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp,/*is_vector=*/std::false_type) noexcept { + return std::set_symmetric_difference(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator brick_set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_symmetric_difference(first1, last1, first2, last2, result, comp); +} + +template +OutputIterator pattern_set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_set_symmetric_difference(first1, last1, first2, last2, result, comp, is_vector); +} + +template +OutputIterator pattern_set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_set_symmetric_difference(first1, last1, first2, last2, result, comp, is_vector); +} + +//------------------------------------------------------------------------ +// is_heap_until +//------------------------------------------------------------------------ + +template +RandomAccessIterator brick_is_heap_until(RandomAccessIterator first, RandomAccessIterator last, Compare comp, /* is_vector = */ std::false_type) noexcept { + return std::is_heap_until(first, last, comp); +} + + +template +RandomAccessIterator brick_is_heap_until(RandomAccessIterator first, RandomAccessIterator last, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::is_heap_until(first, last, comp); +} + +template +RandomAccessIterator pattern_is_heap_until(RandomAccessIterator first, RandomAccessIterator last, Compare comp, IsVector vec, /* is_parallel = */ std::false_type) noexcept { + return brick_is_heap_until(first, last, comp, vec); +} + +template +RandomAccessIterator pattern_is_heap_until(RandomAccessIterator first, RandomAccessIterator last, Compare comp, IsVector vec, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_is_heap_until(first, last, comp, vec); +} + +//------------------------------------------------------------------------ +// min_element +//------------------------------------------------------------------------ + +template +ForwardIterator brick_min_element(ForwardIterator first, ForwardIterator last, Compare comp, /* is_vector = */ std::false_type) noexcept { + return std::min_element(first, last, comp); +} + +template +ForwardIterator brick_min_element(ForwardIterator first, ForwardIterator last, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::min_element(first, last, comp); +} + +template +ForwardIterator pattern_min_element(ForwardIterator first, ForwardIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_min_element(first, last, comp, is_vector); +} + +template +ForwardIterator pattern_min_element(ForwardIterator first, ForwardIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_min_element(first, last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// max_element +//------------------------------------------------------------------------ + +template +ForwardIterator brick_max_element(ForwardIterator first, ForwardIterator last, Compare comp, /* is_vector = */ std::false_type) noexcept { + return std::max_element(first, last, comp); +} + +template +ForwardIterator brick_max_element(ForwardIterator first, ForwardIterator last, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::max_element(first, last, comp); +} + +template +ForwardIterator pattern_max_element(ForwardIterator first, ForwardIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_max_element(first, last, comp, is_vector); +} + +template +ForwardIterator pattern_max_element(ForwardIterator first, ForwardIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_max_element(first, last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// minmax_element +//------------------------------------------------------------------------ + +template +std::pair brick_minmax_element(ForwardIterator first, ForwardIterator last, Compare comp, /* is_vector = */ std::false_type) noexcept { + return std::minmax_element(first, last, comp); +} + +template +std::pair brick_minmax_element(ForwardIterator first, ForwardIterator last, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::minmax_element(first, last, comp); +} + +template +std::pair pattern_minmax_element(ForwardIterator first, ForwardIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_minmax_element(first, last, comp, is_vector); +} + +template +std::pair pattern_minmax_element(ForwardIterator first, ForwardIterator last, Compare comp, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_minmax_element(first, last, comp, is_vector); +} + +//------------------------------------------------------------------------ +// mismatch +//------------------------------------------------------------------------ +template +std::pair mismatch_serial(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, BinaryPredicate pred) { + for (; first1 != last1 && first2 != last2 && pred(*first1, *first2); ++first1,++first2){ } + return std::pair(first1, first2); +} + +template +std::pair brick_mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Predicate pred, /* is_vector = */ std::false_type) noexcept { + return mismatch_serial(first1, last1, first2, last2, pred); +} + +template +std::pair brick_mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Predicate pred, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return mismatch_serial(first1, last1, first2, last2, pred); +} + +template +std::pair pattern_mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Predicate pred, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_mismatch(first1, last1, first2, last2, pred, is_vector); +} + +template +std::pair pattern_mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Predicate pred, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_mismatch(first1, last1, first2, last2, pred, is_vector); +} + +//------------------------------------------------------------------------ +// lexicographical_compare +//------------------------------------------------------------------------ + +template +bool brick_lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, /* is_vector = */ std::false_type) noexcept { + return std::lexicographical_compare(first1, last1, first2, last2, comp); +} + +template +bool brick_lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::lexicographical_compare(first1, last1, first2, last2, comp); +} + +template +bool pattern_lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_lexicographical_compare(first1, last1, first2, last2, comp, is_vector); +} + +template +bool pattern_lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_lexicographical_compare(first1, last1, first2, last2, comp, is_vector); +} + +//------------------------------------------------------------------------ +// move +//------------------------------------------------------------------------ + +template +OutputIterator brick_move(InputIterator first, InputIterator last, OutputIterator d_first, /* is_vector = */ std::false_type) noexcept { + return std::move(first, last, d_first); +} + +template +OutputIterator brick_move(InputIterator first, InputIterator last, OutputIterator d_first, /* is_vector = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::move(first, last, d_first); +} + +template +OutputIterator pattern_move(InputIterator first, InputIterator last, OutputIterator d_first, IsVector is_vector, /* is_parallel = */ std::false_type) noexcept { + return brick_move(first, last, d_first, is_vector); +} + +template +OutputIterator pattern_move(InputIterator first, InputIterator last, OutputIterator d_first, IsVector is_vector, /* is_parallel = */ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_move(first, last, d_first, is_vector); +} + +} // namespace __icp_algorithm + +#endif /* __PSTL_algorithm_impl_H */ diff --git a/include/pstl/_internal/common.h b/include/pstl/_internal/common.h new file mode 100644 index 00000000000..9c9bb7a9143 --- /dev/null +++ b/include/pstl/_internal/common.h @@ -0,0 +1,153 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_common_H +#define __PSTL_common_H + +// Header contains implementation of common utilities. + +#if __PSTL_USE_TBB +#include +#endif + +namespace __icp_algorithm { + +static int __PSTL_get_workers_num() { +#if __PSTL_USE_TBB + return tbb::tbb_thread::hardware_concurrency(); +#else + __PSTL_PRAGMA_MESSAGE("Backend was not specified"); + return 1; +#endif +} + +// FIXME - make grain_size use compiler information, or make parallel_for/parallel_transform_reduce use introspection for +// better estimate. + +//! Helper for parallel_for and parallel_reduce +template +DifferenceType __PSTL_grain_size( DifferenceType m ) { + const size_t oversub = 8; + int n = __PSTL_get_workers_num(); + m /= oversub*n; + const int min_grain = 1; + const int max_grain = 1<<16; + if( mmax_grain ) + m = max_grain; + return m; +} + +//! Raw memory buffer with automatic freeing and no exceptions. +/** Some of our algorithms need to start with raw memory buffer, +not an initialize array, because initialization/destruction +would make the span be at least O(N). */ +class raw_buffer { + void* ptr; + raw_buffer(const raw_buffer&) = delete; + void operator=(const raw_buffer&) = delete; +public: + //! Try to obtain buffer of given size. + raw_buffer(size_t bytes): ptr(operator new(bytes, std::nothrow)) {} + //! True if buffer was successfully obtained, zero otherwise. + operator bool() const { return ptr != NULL; } + //! Return pointer to buffer, or NULL if buffer could not be obtained. + void* get() const { return ptr; } + //! Destroy buffer + ~raw_buffer() { operator delete(ptr); } +}; + +template +typename std::result_of::type except_handler(F f) { + try { + return f(); + } + catch(const std::bad_alloc&) { + throw; // re-throw bad_alloc according to 25.2.4.1 [algorithms.parallel.exceptions] + } + catch(...) { + std::terminate(); // Good bye according to 25.2.4.2 [algorithms.parallel.exceptions] + } +} + +//! Unary operator that returns reference to its argument. +struct no_op { + template + T& operator()(T& a) const { return a; } +}; + +//! Logical negation of a predicate +template +class not_pred { + Pred pred; +public: + explicit not_pred( Pred pred_ ) : pred(pred_) {} + + template + bool operator()( Args&& ... args ) const { return !pred(std::forward(args)...); } +}; + +template +class reorder_pred { + Pred pred; +public: + explicit reorder_pred( Pred pred_ ) : pred(pred_) {} + + template + bool operator()(T&& a, T&& b) const { return pred(std::forward(b), std::forward(a)); } +}; + +//! "==" comparison. +/** Not called "equal" to avoid (possibly unfounded) concerns about accidental invocation via + argument-dependent name lookup by code expecting to find the usual std::equal. */ +class pstl_equal { +public: + explicit pstl_equal() {} + + template + bool operator()( X&& x, Y&& y ) const { return std::forward(x)==std::forward(y); } +}; + +//! Like a polymorphic lambda for ==value +template +class equal_value { + const T& value; +public: + explicit equal_value( const T& value_ ) : value(value_) {} + + template + bool operator()( Arg&& arg ) const { return std::forward(arg)==value; } +}; + +//! Logical negation of ==value +template +class not_equal_value { + const T& value; +public: + explicit not_equal_value( const T& value_ ) : value(value_) {} + + template + bool operator()( Arg&& arg ) const { return !(std::forward(arg)==value); } +}; + +} /* namespace __icp_algorithm */ + +#endif /* __PSTL_common_H */ diff --git a/include/pstl/_internal/execution_policy_impl.h b/include/pstl/_internal/execution_policy_impl.h new file mode 100644 index 00000000000..11ce2399f71 --- /dev/null +++ b/include/pstl/_internal/execution_policy_impl.h @@ -0,0 +1,138 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_execution_policy_impl_H +#define __PSTL_execution_policy_impl_H + +#include +#include + +#include "../execution" + +namespace __icp_algorithm { +using namespace pstl::execution; + +/* predicate */ + +template + std::false_type lazy_and( T, std::false_type ) { return std::false_type{}; }; + +template + inline T lazy_and( T a, std::true_type ) { return a; } + +/* iterator */ +template +struct is_random_access_iterator { + static constexpr bool value = + is_random_access_iterator::value && + is_random_access_iterator::value; + typedef std::integral_constant type; +}; + +template +struct is_random_access_iterator + : std::is_same::iterator_category, + std::random_access_iterator_tag> { +}; + + +/* policy */ +template +struct policy_traits {}; + +template <> +struct policy_traits { + typedef std::false_type allow_parallel; + typedef std::false_type allow_unsequenced; + typedef std::false_type allow_vector; +}; + +template <> +struct policy_traits { + typedef std::false_type allow_parallel; + typedef std::true_type allow_unsequenced; + typedef std::true_type allow_vector; +}; + +template <> + + +#if __PSTL_USE_PAR_POLICIES +struct policy_traits { + typedef std::true_type allow_parallel; + typedef std::false_type allow_unsequenced; + typedef std::false_type allow_vector; +}; + +template <> +struct policy_traits { + typedef std::true_type allow_parallel; + typedef std::true_type allow_unsequenced; + typedef std::true_type allow_vector; +}; +#endif + +template using enable_if_execution_policy = typename std::enable_if< + is_execution_policy::type>::value, T>::type; + +template using collector_t = + typename policy_traits::type>::collector_type; + +template using allow_vector = + typename __icp_algorithm::policy_traits::type>::allow_vector; + +template using allow_unsequenced = + typename __icp_algorithm::policy_traits::type>::allow_unsequenced; + +template using allow_parallel = + typename __icp_algorithm::policy_traits::type>::allow_parallel; + + +template +auto is_vectorization_preferred(ExecutionPolicy&& exec) -> +decltype(lazy_and( exec.__allow_vector(), typename is_random_access_iterator::type())) +{ + return lazy_and( exec.__allow_vector(), typename is_random_access_iterator::type() ); +} + +template +auto is_parallelization_preferred(ExecutionPolicy&& exec) -> +decltype(lazy_and( exec.__allow_parallel(), typename is_random_access_iterator::type())) +{ + return lazy_and( exec.__allow_parallel(), typename is_random_access_iterator::type() ); +} + +template +struct prefer_unsequenced_tag { + static constexpr bool value = + allow_unsequenced::value && is_random_access_iterator::value; + typedef std::integral_constant type; +}; + +template +struct prefer_parallel_tag { + static constexpr bool value = + allow_parallel::value && is_random_access_iterator::value; + typedef std::integral_constant type; +}; + +} // namespace __icp_algorithm + +#endif /* __PSTL_execution_policy_impl_H */ diff --git a/include/pstl/_internal/memory_impl.h b/include/pstl/_internal/memory_impl.h new file mode 100644 index 00000000000..0dbd74203dc --- /dev/null +++ b/include/pstl/_internal/memory_impl.h @@ -0,0 +1,415 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_memory_impl_H +#define __PSTL_memory_impl_H + +#include +#include +#include "execution_policy_impl.h" + +namespace __icp_algorithm { + +//------------------------------------------------------------------------ +// uninitialized_copy +//------------------------------------------------------------------------ + +template +ForwardIterator brick_uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result, /*is_vector=*/std::false_type) noexcept { + return std::uninitialized_copy(first, last, result); +} + +template +ForwardIterator brick_uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return std::uninitialized_copy(first, last, result); +} + +template +ForwardIterator pattern_uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_copy(first, last, result, is_vector); +} + +template +ForwardIterator pattern_uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_copy(first, last, result, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_copy_n +//------------------------------------------------------------------------ + +template +ForwardIterator brick_uninitialized_copy_n(InputIterator first, Size n, ForwardIterator result, /*is_vector=*/std::false_type) noexcept { + return std::uninitialized_copy_n(first, n, result); +} + +template +ForwardIterator brick_uninitialized_copy_n(InputIterator first, Size n, ForwardIterator result, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return std::uninitialized_copy_n(first, n, result); +} + +template +ForwardIterator pattern_uninitialized_copy_n(InputIterator first, Size n, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_copy_n(first, n, result, is_vector); +} + +template +ForwardIterator pattern_uninitialized_copy_n(InputIterator first, Size n, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_copy_n(first, n, result, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_move +//------------------------------------------------------------------------ + +template +void destroy_serial(ForwardIterator first, ForwardIterator last) { + typedef typename std::iterator_traits::value_type T; + while (first != last) { + (*first).~T(); + ++first; + } +} + +template +ForwardIterator brick_uninitialized_move(InputIterator first, InputIterator last, ForwardIterator result, /*is_vector=*/std::false_type) noexcept { + typedef typename std::iterator_traits::value_type Value; + ForwardIterator current = result; + + try { + while (first != last) + new (static_cast(std::addressof(*(result++)))) Value(std::move(*(first++))); + + return result; + } catch (...) { + destroy_serial(current, result); + std::terminate(); + } + + return result; +} + +template +ForwardIterator brick_uninitialized_move(InputIterator first, InputIterator last, ForwardIterator result, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return brick_uninitialized_move(first, last, result, std::false_type()); +} + +template +ForwardIterator pattern_uninitialized_move(InputIterator first, InputIterator last, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_move(first, last, result, is_vector); +} + +template +ForwardIterator pattern_uninitialized_move(InputIterator first, InputIterator last, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_move(first, last, result, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_move_n +//------------------------------------------------------------------------ + +template +ForwardIterator brick_uninitialized_move_n(InputIterator first, Size n, ForwardIterator result, /*is_vector=*/std::false_type) noexcept { + typedef typename std::iterator_traits::value_type Value; + ForwardIterator current = result; + + try { + while (n-- > 0) + new (static_cast(std::addressof(*(result++)))) Value(std::move(*(first++))); + + return result; + } catch (...) { + destroy_serial(current, result); + std::terminate(); + } + + return result; +} + +template +ForwardIterator brick_uninitialized_move_n(InputIterator first, Size n, ForwardIterator result, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return brick_uninitialized_move_n(first, n, result, std::false_type()); +} + +template +ForwardIterator pattern_uninitialized_move_n(InputIterator first, Size n, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_move_n(first, n, result, is_vector); +} + +template +ForwardIterator pattern_uninitialized_move_n(InputIterator first, Size n, ForwardIterator result, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_move_n(first, n, result, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_fill +//------------------------------------------------------------------------ + +template +void brick_uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x, /*is_vector=*/std::false_type) noexcept { + std::uninitialized_fill(first, last, x); +} + +template +void brick_uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + std::uninitialized_fill(first, last, x); +} + +template +void pattern_uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_uninitialized_fill(first, last, x, is_vector); +} + +template +void pattern_uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_uninitialized_fill(first, last, x, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_fill_n +//------------------------------------------------------------------------ +// Some C++11 compilers don't have a version of the algorithm std::uninitialized_fill_n that returns an iterator to the element past the last element filled. +template< class ForwardIterator, class Size, class T > +ForwardIterator uninitialized_fill_n_serial(ForwardIterator first, Size n, const T& x) +{ + typedef typename std::iterator_traits::value_type Value; + auto cur = first; + try { + while (n--) { + ::new (static_cast(std::addressof(*cur))) Value(x); + ++cur; + } + return cur; + } + catch (...) { + destroy_serial(first, cur); + std::terminate(); + } +} + +template +ForwardIterator brick_uninitialized_fill_n(ForwardIterator first, Size n, const T& x, /*is_vector=*/std::false_type) noexcept { + return uninitialized_fill_n_serial(first, n, x); +} + +template +ForwardIterator brick_uninitialized_fill_n(ForwardIterator first, Size n, const T& x, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return brick_uninitialized_fill_n(first, n, x, std::false_type()); +} + +template +ForwardIterator pattern_uninitialized_fill_n(ForwardIterator first, Size n, const T& x, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_fill_n(first, n, x, is_vector); +} + +template +ForwardIterator pattern_uninitialized_fill_n(ForwardIterator first, Size n, const T& x, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_fill_n(first, n, x, is_vector); +} + +//------------------------------------------------------------------------ +// destroy +//------------------------------------------------------------------------ + +template +void brick_destroy(ForwardIterator first, ForwardIterator last, /*is_vector=*/std::false_type) noexcept { + destroy_serial(first, last); +} + +template +void brick_destroy(ForwardIterator first, ForwardIterator last, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + brick_destroy(first, last, std::false_type()); +} + +template +void pattern_destroy(ForwardIterator first, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_destroy(first, last, is_vector); +} + +template +void pattern_destroy(ForwardIterator first, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_destroy(first, last, is_vector); +} + +//------------------------------------------------------------------------ +// destroy_n +//------------------------------------------------------------------------ + +template +ForwardIterator destroy_n_serial(ForwardIterator first, Size n) { + typedef typename std::iterator_traits::value_type T; + while (n--) { + (*first).~T(); + ++first; + } + return first; +} + +template +ForwardIterator brick_destroy_n(ForwardIterator first, Size n, /*is_vector=*/std::false_type) noexcept { + return destroy_n_serial(first, n); +} + +template +ForwardIterator brick_destroy_n(ForwardIterator first, Size n, /*is_vector=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return brick_destroy_n(first, n, std::false_type()); +} + +template +ForwardIterator pattern_destroy_n(ForwardIterator first, Size n, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_destroy_n(first, n, is_vector); +} + +template +ForwardIterator pattern_destroy_n(ForwardIterator first, Size n, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { +__PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_destroy_n(first, n, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_default_construct +//------------------------------------------------------------------------ +template +struct Construct { + void operator()(void* ptr) { + ::new (ptr) T; + } +}; + +template +struct Construct { + void operator()(void* ptr) { + ::new (ptr) T(); + } +}; + +template +void brick_uninitialized_construct(ForwardIterator first, ForwardIterator last, /*is_vector=*/std::false_type) noexcept { + typedef typename std::iterator_traits::value_type value_type; + auto cur = first; // Save the iterator for catching exceptions + try { + for (; cur != last; ++cur) + Construct()(static_cast(std::addressof(*cur))); + } + catch (...) { + destroy_serial(first, cur); + std::terminate(); + } +} + +template +void brick_uninitialized_construct(ForwardIterator first, ForwardIterator last, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + brick_uninitialized_construct(first, last, std::false_type()); +} + +template +void pattern_uninitialized_default_construct(ForwardIterator first, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_uninitialized_construct(first, last, is_vector); +} + +template +void pattern_uninitialized_default_construct(ForwardIterator first, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_uninitialized_construct(first, last, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_default_construct_n +//------------------------------------------------------------------------ + +template +ForwardIterator brick_uninitialized_construct_n(ForwardIterator first, Size n, /*is_vector=*/std::false_type) noexcept { + typedef typename std::iterator_traits::value_type value_type; + auto cur = first; + try { + for (; n > 0; ++cur, --n) + Construct()(static_cast(std::addressof(*cur))); + return cur; + } + catch (...) { + destroy_serial(first, cur); + std::terminate(); + } +} + +template +ForwardIterator brick_uninitialized_construct_n(ForwardIterator first, Size n, /*is_vector=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, redirected to serial"); + return brick_uninitialized_construct_n(first, n, std::false_type()); +} + +template +ForwardIterator pattern_uninitialized_default_construct_n(ForwardIterator first, Size n, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_construct_n(first, n, is_vector); +} + +template +ForwardIterator pattern_uninitialized_default_construct_n(ForwardIterator first, Size n, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_construct_n(first, n, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_value_construct +//------------------------------------------------------------------------ + +template +void pattern_uninitialized_value_construct(ForwardIterator first, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + brick_uninitialized_construct(first, last, is_vector); +} + +template +void pattern_uninitialized_value_construct(ForwardIterator first, ForwardIterator last, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + brick_uninitialized_construct(first, last, is_vector); +} + +//------------------------------------------------------------------------ +// uninitialized_value_construct_n +//------------------------------------------------------------------------ + +template +ForwardIterator pattern_uninitialized_value_construct_n(ForwardIterator first, Size n, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_uninitialized_construct_n(first, n, is_vector); +} + +template +ForwardIterator pattern_uninitialized_value_construct_n(ForwardIterator first, Size n, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, redirected to serial"); + return brick_uninitialized_construct_n(first, n, is_vector); +} +} // namespace __icp_algorithm +#endif //__PSTL_memory_impl_H diff --git a/include/pstl/_internal/numeric_impl.h b/include/pstl/_internal/numeric_impl.h new file mode 100644 index 00000000000..210b5f9cb22 --- /dev/null +++ b/include/pstl/_internal/numeric_impl.h @@ -0,0 +1,213 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_numeric_impl_H +#define __PSTL_numeric_impl_H + +#include +#include +#include +#include "simd_impl.h" + +#include "execution_policy_impl.h" + +namespace __icp_algorithm { +//------------------------------------------------------------------------ +// forward +//------------------------------------------------------------------------ +template +T parallel_transform_reduce(Index first, Index last, U u, T init, C combine, R reduce); +template +T parallel_transform_scan(Index n, U u, T init, C combine, R reduce, S scan); + +//------------------------------------------------------------------------ +// transform_reduce (version with two binary functions, according to draft N4659) +//------------------------------------------------------------------------ + +template< class T, class BinaryOperation1, class IsArithmeticIsVector> +struct brick_transform_reduce_imp { + + template + T operator()(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2) noexcept { + return std::inner_product(first1, last1, first2, init, binary_op1, binary_op2); + } + + template< class InputIterator, class UnaryOperation> + T operator()(InputIterator first, InputIterator last, T init, BinaryOperation1 binary_op, UnaryOperation unary_op) noexcept { + for (; first != last; ++first) { + init = binary_op(init, unary_op(*first)); + } + return init; + } +}; + +template< class T> +struct brick_transform_reduce_imp, /*IsArithmeticIsVector*/ std::true_type> { + + template + T operator()(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, std::plus, BinaryOperation2 binary_op2) noexcept { + return simd_transform_reduce(first1, last1-first1, first2, init, binary_op2); + } + + template< class InputIterator, class UnaryOperation> + T operator()(InputIterator first, InputIterator last, T init, std::plus, UnaryOperation unary_op) noexcept { + return simd_transform_reduce(first, last-first, init, unary_op); + } +}; + +template +T brick_transform_reduce(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2, /*is_vector=*/std::true_type) noexcept { + + return brick_transform_reduce_imp< T, BinaryOperation1, std::integral_constant::value> >()(first1, last1, first2, init, binary_op1, binary_op2); +} + +template +T brick_transform_reduce(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2, /*is_vector=*/std::false_type) noexcept { + + return brick_transform_reduce_imp< T, BinaryOperation1, std::false_type >()(first1, last1, first2, init, binary_op1, binary_op2); +} + +template +T pattern_transform_reduce(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2, IsVector is_vector, /*is_parallel=*/std::false_type) noexcept { + return brick_transform_reduce(first1, last1, first2, init, binary_op1, binary_op2, is_vector); +} + +template +T pattern_transform_reduce(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2, IsVector is_vector, /*is_parallel=*/std::true_type) noexcept { + return parallel_transform_reduce(first1, last1, + [first1, first2, binary_op2](InputIterator1 i) { return binary_op2(*i, *(first2 + (i - first1))); }, + init, + binary_op1, // Combine + [first1, first2, binary_op1, binary_op2, is_vector](InputIterator1 i, InputIterator1 j, T init) -> T { + return brick_transform_reduce(i, j, first2 + (i - first1), + init, binary_op1, binary_op2, is_vector); + }); +} + +//------------------------------------------------------------------------ +// transform_reduce (version with unary and binary functions) +//------------------------------------------------------------------------ + +template< class InputIterator, class T, class UnaryOperation, class BinaryOperation > +T brick_transform_reduce(InputIterator first, InputIterator last, T init, BinaryOperation binary_op, UnaryOperation unary_op, /*is_vector=*/std::true_type) noexcept { + return brick_transform_reduce_imp< T, BinaryOperation, std::integral_constant::value> >()(first, last, init, binary_op, unary_op); +} + +template< class InputIterator, class T, class BinaryOperation, class UnaryOperation > +T brick_transform_reduce(InputIterator first, InputIterator last, T init, BinaryOperation binary_op, UnaryOperation unary_op, /*is_vector=*/std::false_type) noexcept { + + return brick_transform_reduce_imp< T, BinaryOperation, std::false_type >()(first, last, init, binary_op, unary_op); +} + +template +T pattern_transform_reduce(InputIterator first, InputIterator last, T init, BinaryOperation binary_op, UnaryOperation unary_op, IsVector is_vector, /*is_parallel=*/std::false_type ) noexcept { + return brick_transform_reduce(first, last, init, binary_op, unary_op, is_vector); +} + +template +T pattern_transform_reduce(InputIterator first, InputIterator last, T init, BinaryOperation binary_op, UnaryOperation unary_op, IsVector is_vector, /*is_parallel=*/std::true_type) { + return parallel_transform_reduce(first, last, + [unary_op](InputIterator i) {return unary_op(*i); }, + init, + binary_op, + [unary_op, binary_op, is_vector](InputIterator i, InputIterator j, T init) { + return brick_transform_reduce(i, j, init, binary_op, unary_op, is_vector); + }); +} + + +//------------------------------------------------------------------------ +// transform_exclusive_scan +// +// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) +//------------------------------------------------------------------------ + +// Exclusive form +template +std::pair brick_transform_scan(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unary_op, T init, BinaryOperation binary_op, /*Inclusive*/ std::false_type) noexcept { + for(; first!=last; ++first, ++result ) { + *result = init; + init = binary_op(init,unary_op(*first)); + } + return std::make_pair(result,init); +} + +// Inclusive form +template +std::pair brick_transform_scan(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unary_op, T init, BinaryOperation binary_op, /*Inclusive*/std::true_type) noexcept { + for(; first!=last; ++first, ++result ) { + init = binary_op(init,unary_op(*first)); + *result = init; + } + return std::make_pair(result,init); +} + +template +OutputIterator pattern_transform_scan(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unary_op, T init, BinaryOperation binary_op, Inclusive, IsVector is_vector, /*is_parallel=*/std::false_type ) noexcept { + return brick_transform_scan(first, last, result, unary_op, init, binary_op, Inclusive()).first; +} + +template +OutputIterator pattern_transform_scan(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unary_op, T init, BinaryOperation binary_op, Inclusive, IsVector is_vector, /*is_parallel=*/std::true_type ) { + typedef typename std::iterator_traits::difference_type difference_type; + parallel_transform_scan( + last-first, + [first, unary_op](size_t i) {return unary_op(first[i]); }, + init, + binary_op, + [first, unary_op, binary_op, is_vector](difference_type i, difference_type j, T init) { + return brick_transform_reduce(first+i, first+j, init, binary_op, unary_op, is_vector); + }, + [first, unary_op, binary_op, result](difference_type i, difference_type j, T init) { + return brick_transform_scan(first+i, first+j, result+i, unary_op, init, binary_op, Inclusive()).second; + }); + return result+(last-first); +} + + +//------------------------------------------------------------------------ +// adjacent_difference +//------------------------------------------------------------------------ + +template +OutputIterator brick_adjacent_difference(InputIterator first, InputIterator last, OutputIterator d_first, BinaryOperation op, /*is_vector*/ std::false_type) noexcept { + return std::adjacent_difference(first, last, d_first, op); +} + +template +OutputIterator brick_adjacent_difference(InputIterator first, InputIterator last, OutputIterator d_first, BinaryOperation op, /*is_vector*/ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Vectorial algorithm unimplemented, referenced to serial"); + return std::adjacent_difference(first, last, d_first, op); +} + +template +OutputIterator pattern_adjacent_difference(InputIterator first, InputIterator last, OutputIterator d_first, BinaryOperation op, IsVector is_vector, /*is_parallel*/ std::false_type) noexcept { + return brick_adjacent_difference(first, last, d_first, op, is_vector); +} + +template +OutputIterator pattern_adjacent_difference(InputIterator first, InputIterator last, OutputIterator d_first, BinaryOperation op, IsVector is_vector, /*is_parallel*/ std::true_type) noexcept { + __PSTL_PRAGMA_MESSAGE("Parallel algorithm unimplemented, referenced to serial"); + return brick_adjacent_difference(first, last, d_first, op, is_vector); +} + +} // namespace __icp_algorithm + +#endif /* __PSTL_numeric_impl_H */ diff --git a/include/pstl/_internal/parallel_impl_tbb.h b/include/pstl/_internal/parallel_impl_tbb.h new file mode 100644 index 00000000000..90636e5ed79 --- /dev/null +++ b/include/pstl/_internal/parallel_impl_tbb.h @@ -0,0 +1,667 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_parallel_impl_tbb_H +#define __PSTL_parallel_impl_tbb_H + +// This header defines the minimum set of parallel routines required to support Parallel STL, +// implemented on top of Intel(R) Threading Building Blocks (Intel(R) TBB) library + +#include + +#include "common.h" +#include "algorithm_impl.h" +#include "numeric_impl.h" /* count and count_if use pattern_transform_reduce */ + +// Bring in minimal required subset of Intel TBB +#include +#include +#include +#include +#include +#include + +#if TBB_INTERFACE_VERSION < 10000 +#error Intel(R) Threading Building Blocks 2018 is required; older versions are not supported. +#endif + +namespace __icp_algorithm { + +//------------------------------------------------------------------------ +// parallel_for +//------------------------------------------------------------------------ + +template +class parallel_for_body { +public: + parallel_for_body( const RealBody& body) : my_body( body ) { } + parallel_for_body(const parallel_for_body& body): my_body(body.my_body) { } + void operator()(const tbb::blocked_range& range) const { + my_body(range.begin(), range.end()); + } +private: + RealBody my_body; +}; + +//! Evaluate brick f[i,j) to each subrange [i,j) of [first,last) +// wrapper over tbb::parallel_for with exceptions handler +template +void parallel_for(Index first, Index last, F f) { + except_handler([=]() { + tbb::this_task_arena::isolate([=]() { + tbb::parallel_for(tbb::blocked_range(first, last), parallel_for_body(f)); + }); + }); +} + +// wrapper over tbb::parallel_reduce with exceptions handler +template +Value parallel_reduce(tbb::blocked_range range, Body &body) { + return except_handler([range, &body]()->Value { + tbb::this_task_arena::isolate([range, &body]() { + tbb::parallel_reduce(range, body); + }); + return body.sum(); + }); +} + +// wrapper over tbb::parallel_reduce with exceptions handler +template +Value parallel_reduce(tbb::blocked_range range, const Value& identity, const RealBody& real_body, const Reduction& reduction) { + return except_handler([range, &identity, &real_body, &reduction]()->Value { + return tbb::this_task_arena::isolate([range, &identity, &real_body, &reduction]()->Value { + return tbb::parallel_reduce(range, identity, real_body, reduction); + }); + }); +} + +// wrapper over tbb::parallel_scan with exceptions handler +template +Value parallel_scan(tbb::blocked_range range, Body &body) { + return except_handler([range, &body]()->Value { + tbb::this_task_arena::isolate([range, &body]() { + tbb::parallel_scan(range, body); + }); + return body.sum(); + }); +} +//------------------------------------------------------------------------ +// parallel_transform_reduce +// +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// u(i) returns f(i,i+1,identity) for a hypothetical left identity element of r +// c(x,y) combines values x and y that were the result of r or u +//------------------------------------------------------------------------ + +template +struct par_trans_red_body { + alignas(T) char sum_storage[sizeof(T)]; // Holds generalized non-commutative sum when has_sum==true + R brick_reduce; // Most likely to have non-empty layout + U u; + C combine; + bool has_sum; // Put last to minimize size of class + T& sum() { + __TBB_ASSERT(has_sum, "sum expected"); + return *(T*)sum_storage; + } + par_trans_red_body( U u_, T init, C c_, R r_) : + brick_reduce(r_), + u(u_), + combine(c_), + has_sum(true) + { + new(sum_storage) T(init); + } + par_trans_red_body( par_trans_red_body& left, tbb::split ) : + brick_reduce(left.brick_reduce), + u(left.u), + combine(left.combine), + has_sum(false) + {} + ~par_trans_red_body() { + // 17.6.5.12 tells us to not worry about catching exceptions from destructors. + if( has_sum ) { + sum().~T(); + } + } + void join(par_trans_red_body& rhs) { + sum() = combine(sum(), rhs.sum()); + } + void operator()(const tbb::blocked_range& range) { + Index i = range.begin(); + Index j = range.end(); + if(!has_sum) { + __TBB_ASSERT(range.size() > 1,"there should be at least 2 elements"); + new(&sum_storage) T(combine(u(i), u(i+1))); // The condition i+1 < j is provided by the grain size of 3 + has_sum = true; + std::advance(i,2); + if(i==j) + return; + } + sum() = brick_reduce(i, j, sum()); + } +}; + +template +T parallel_transform_reduce( Index first, Index last, U u, T init, C combine, R brick_reduce) { + par_trans_red_body body(u, init, combine, brick_reduce); + // The grain size of 3 is used in order to provide mininum 2 elements for each body + return __icp_algorithm::parallel_reduce(tbb::blocked_range(first, last, 3), body); +} + +//------------------------------------------------------------------------ +// parallel_scan +//------------------------------------------------------------------------ + +template +class trans_scan_body { + alignas(T) char sum_storage[sizeof(T)]; // Holds generalized non-commutative sum when has_sum==true + R brick_reduce; // Most likely to have non-empty layout + U u; + C combine; + S scan; + bool has_sum; // Put last to minimize size of class +public: + trans_scan_body(U u_, T init, C combine_, R reduce_, S scan_) : + brick_reduce(reduce_), + u(u_), + combine(combine_), + scan(scan_), + has_sum(true) + { + new(sum_storage) T(init); + } + + trans_scan_body( trans_scan_body& b, tbb::split ) : + brick_reduce(b.brick_reduce), + u(b.u), + combine(b.combine), + scan(b.scan), + has_sum(false) + {} + + ~trans_scan_body() { + // 17.6.5.12 tells us to not worry about catching exceptions from destructors. + if( has_sum ) { + sum().~T(); + } + } + + T& sum() const { + __TBB_ASSERT(has_sum,"sum expected"); + return *(T*)sum_storage; + } + + void operator()(const tbb::blocked_range& range, tbb::pre_scan_tag) { + Index i = range.begin(); + Index j = range.end(); + if(!has_sum) { + new(&sum_storage) T(u(i)); + has_sum = true; + ++i; + if(i==j) + return; + } + sum() = brick_reduce(i, j, sum()); + } + + void operator()(const tbb::blocked_range& range, tbb::final_scan_tag) { + sum() = scan(range.begin(), range.end(), sum()); + } + + void reverse_join(trans_scan_body& a) { + if(has_sum) { + sum() = combine(a.sum(), sum()); + } + else { + new(&sum_storage) T(a.sum()); + has_sum = true; + } + } + + void assign(trans_scan_body& b) { + sum() = b.sum(); + } +}; + +template +T parallel_transform_scan(Index n, U u, T init, C combine, R brick_reduce, S scan) { + if(n) { + trans_scan_body body(u, init, combine, brick_reduce, scan); + return __icp_algorithm::parallel_scan(tbb::blocked_range(0, n), body); + } + else + return init; +} + +template +Index split(Index m) { + Index k = 1; + while( 2*k +void upsweep(Index i, Index m, Index tilesize, T* r, Index lastsize, R reduce, C combine) { + if( m==1 ) + r[0] = reduce(i*tilesize, lastsize); + else { + Index k = split(m); + tbb::parallel_invoke( + [=]{upsweep( i, k, tilesize, r, tilesize, reduce, combine );}, + [=]{upsweep( i+k, m-k, tilesize, r+k, lastsize, reduce, combine );} + ); + if( m==2*k ) + r[m-1] = combine(r[k-1], r[m-1]); + } +} + +template +void downsweep(Index i, Index m, Index tilesize, T* r, Index lastsize, T initial, C combine, S scan) { + if( m==1 ) { + scan(i*tilesize, lastsize, initial ); + } else { + Index k = split(m); + tbb::parallel_invoke( + [=]{downsweep(i, k, tilesize, r, tilesize, initial, combine, scan);}, + // Assumes that combine never throws. + [=]{downsweep(i+k, m-k, tilesize, r+k, lastsize, combine(initial, r[k-1]), combine, scan);} + ); + } +} + +// Adapted from Intel(R) Cilk(TM) version from cilkpub. +// Let i:len denote a counted interval of length n starting at i. s denotes a generalized-sum value. +// Expected actions of the functors are: +// reduce(i,len) -> s -- return reduction value of i:len. +// combine(s1,s2) -> s -- return merged sum +// apex(s) -- do any processing necessary between reduce and scan. +// scan(i,len,initial) -- perform scan over i:len starting with initial. +// The initial range 0:n is partitioned into consecutive subranges. +// reduce and scan are each called exactly once per subrange. +// Thus callers can rely upon side effects in reduce. +// combine must not throw an exception. +// apex is called exactly once, after all calls to reduce and before all calls to scan. +// For example, it's useful for allocating a buffer used by scan but whose size is the sum of all reduction values. +// T must have a trivial constructor and destructor. +template +void parallel_strict_scan( Index n, T initial, R reduce, C combine, S scan, A apex ) { + except_handler([=]() { + tbb::this_task_arena::isolate([=](){ + if( n>1 ) { + Index p = __PSTL_get_workers_num(); + const Index slack = 4; + Index tilesize = (n-1)/(slack*p) + 1; + Index m = (n-1)/tilesize; + raw_buffer buf((m+1)*sizeof(T)); + if( buf ) { + T* r = static_cast(buf.get()); + upsweep(Index(0), Index(m+1), tilesize, r, n-m*tilesize, reduce, combine); + // When apex is a no-op and combine has no side effects, a good optimizer + // should be able to eliminate all code between here and apex. + // Alternatively, provide a default value for apex that can be + // recognized by metaprogramming that conditionlly executes the following. + size_t k = m+1; + T t = r[k-1]; + while( (k&=k-1) ) + t = combine(r[k-1],t); + apex(combine(initial,t)); + downsweep(Index(0), Index(m+1), tilesize, r, n-m*tilesize, initial, combine, scan); + return; + } + } + // Fewer than 2 elements in sequence, or out of memory. Handle has single block. + T sum = initial; + if(n) + sum = combine(sum, reduce(Index(0), n)); + apex(sum); + if(n) + scan(Index(0), n, initial); + }); + }); +} + +//------------------------------------------------------------------------ +// parallel_or +//------------------------------------------------------------------------ + +//! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) +template +bool parallel_or( Index first, Index last, Brick f ) { + return except_handler([=]() -> bool { + std::atomic found(false); + __icp_algorithm::parallel_for( first, last, [f,&found]( Index i, Index j ) { + if( f(i, j) ) { + found = true; + tbb::task::self().group()->cancel_group_execution(); + }}); + return found; + }); +} + +//------------------------------------------------------------------------ +// parallel_first +//------------------------------------------------------------------------ + +/** Return minimum value returned by brick f[i,j) for subranges [i,j) of [first,last) + Each f[i,j) must return a value in [i,j). */ +template +Index parallel_first( Index first, Index last, Brick f ) { + return except_handler([=]() -> Index { + typedef typename std::iterator_traits::difference_type difference_type; + difference_type n = last-first; + std::atomic minimum( last-first ); + __icp_algorithm::parallel_for(first, last, [f, first, &minimum](Index i, Index j) { + // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of + // why using a shared variable scales fairly well in this situation. + if (i - first < minimum) { + Index res = f(i, j); + // If not 'last' returned then we found what we want so put this to minimum + if (res != j) { + difference_type k = res - first; + for (difference_type old = minimum; k < old; old = minimum) { + minimum.compare_exchange_weak(old, k); + } + } + } + }); + return first + minimum; + }); +} + +//------------------------------------------------------------------------ +// parallel_stable_sort +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// stable_sort utilities +// +// These are used by parallel implementations but do not depend on them. +//------------------------------------------------------------------------ + +//! Destroy sequence [xs,xe) +template +void serial_destroy(RandomAccessIterator zs, RandomAccessIterator ze) { + typedef typename std::iterator_traits::value_type T; + while(zs!=ze) { + --ze; + (*ze).~T(); + } +} + +//! Merge sequences [xs,xe) and [ys,ye) to output sequence [zs,(xe-xs)+(ye-ys)), using std::move +template +void serial_move_merge(RandomAccessIterator1 xs, RandomAccessIterator1 xe, RandomAccessIterator2 ys, RandomAccessIterator2 ye, RandomAccessIterator3 zs, Compare comp) { + if(xs!=xe) { + if(ys!=ye) { + for(;;) + if(comp(*ys, *xs)) { + *zs = std::move(*ys); + ++zs; + if(++ys==ye) break; + } + else { + *zs = std::move(*xs); + ++zs; + if(++xs==xe) goto movey; + } + } + ys = xs; + ye = xe; + } +movey: + std::move(ys, ye, zs); +} + +template +void merge_sort_init_temp_buf(RandomAccessIterator1 xs, RandomAccessIterator1 xe, RandomAccessIterator2 zs, bool inplace) { + RandomAccessIterator2 ze = zs + (xe-xs); + typedef typename std::iterator_traits::value_type T; + if(inplace) + // Initialize the temporary buffer + for(; zs!=ze; ++zs) + new(&*zs) T; + else + // Initialize the temporary buffer and move keys to it. + for(; zs!=ze; ++xs, ++zs) + new(&*zs) T(std::move(*xs)); +} + +template +class merge_task: public tbb::task { + /*override*/tbb::task* execute(); + RandomAccessIterator1 xs, xe; + RandomAccessIterator2 ys, ye; + RandomAccessIterator3 zs; + Compare comp; + bool destroy; +public: + merge_task( RandomAccessIterator1 xs_, RandomAccessIterator1 xe_, + RandomAccessIterator2 ys_, RandomAccessIterator2 ye_, + RandomAccessIterator3 zs_, + bool destroy_, Compare comp_) : + xs(xs_), xe(xe_), ys(ys_), ye(ye_), zs(zs_), destroy(destroy_), comp(comp_) + {} +}; + +template +tbb::task* merge_task::execute() { + const size_t MERGE_CUT_OFF = 2000; + auto n = (xe-xs) + (ye-ys); + if(n <= MERGE_CUT_OFF) { + serial_move_merge(xs, xe, ys, ye, zs, comp); + if(destroy) { + serial_destroy(xs, xe); + serial_destroy(ys, ye); + } + return NULL; + } + else { + RandomAccessIterator1 xm; + RandomAccessIterator2 ym; + if(xe-xs < ye-ys) { + ym = ys+(ye-ys)/2; + xm = std::upper_bound(xs, xe, *ym, comp); + } + else { + xm = xs+(xe-xs)/2; + ym = std::lower_bound(ys, ye, *xm, comp); + } + RandomAccessIterator3 zm = zs + ((xm-xs) + (ym-ys)); + tbb::task* right = new(allocate_additional_child_of(*parent())) + merge_task(xm, xe, ym, ye, zm, destroy, comp); + spawn(*right); + recycle_as_continuation(); + xe = xm; + ye = ym; + } + return this; +} + +template +class stable_sort_task : public tbb::task { + /*override*/tbb::task* execute(); + RandomAccessIterator1 xs, xe; + RandomAccessIterator2 zs; + Compare comp; + LeafSort leaf_sort; + int inplace; +public: + stable_sort_task(RandomAccessIterator1 xs_, RandomAccessIterator1 xe_, RandomAccessIterator2 zs_, int inplace_, Compare comp_, LeafSort leaf_sort_ ) : + xs(xs_), xe(xe_), zs(zs_), inplace(inplace_), comp(comp_), leaf_sort(leaf_sort_) + {} +}; + +const size_t STABLE_SORT_CUT_OFF = 500; + +template +tbb::task* stable_sort_task::execute() { + if( xe - xs <= STABLE_SORT_CUT_OFF ) { + leaf_sort(xs, xe, comp); + if( inplace!=2 ) + merge_sort_init_temp_buf(xs, xe, zs, inplace!=0); + return NULL; + } else { + RandomAccessIterator1 xm = xs + (xe - xs) / 2; + RandomAccessIterator2 zm = zs + (xm - xs); + RandomAccessIterator2 ze = zs + (xe - xs); + task* m; + if (inplace) + m = new (allocate_continuation()) merge_task(zs, zm, zm, ze, xs, inplace==2, comp); + else + m = new (allocate_continuation()) merge_task(xs, xm, xm, xe, zs, false, comp); + m->set_ref_count(2); + task* right = new(m->allocate_child()) stable_sort_task(xm,xe,zm,!inplace, comp, leaf_sort); + spawn(*right); + recycle_as_child_of(*m); + xe=xm; + inplace=!inplace; + } + return this; +} + +template +void parallel_stable_sort( RandomAccessIterator xs, RandomAccessIterator xe, Compare comp, LeafSort leaf_sort ) { + except_handler([=]() { + tbb::this_task_arena::isolate([=](){ + typedef typename std::iterator_traits::value_type T; + if( xe-xs > STABLE_SORT_CUT_OFF ) { + __icp_algorithm::raw_buffer buf( sizeof(T)*(xe-xs) ); + if( buf ) { + using tbb::task; + typedef typename std::iterator_traits::value_type T; + task::spawn_root_and_wait(*new( task::allocate_root() ) __icp_algorithm::stable_sort_task( xs, xe, (T*)buf.get(), 2, comp, leaf_sort )); + return; + } + } + // Not enough memory available or sort too small - fall back on serial sort + leaf_sort( xs, xe, comp ); + }); + }); +} + +//------------------------------------------------------------------------ +// parallel_equal +//------------------------------------------------------------------------ +template +bool parallel_equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, Pred p, IsVector vec) { + return __icp_algorithm::parallel_reduce( + tbb::blocked_range(first1, last1, 500), true, + [=](const tbb::blocked_range& r, bool is_equal_local) -> bool { + return is_equal_local && brick_equal(r.begin(), r.end(), first2+(r.begin()-first1), p, vec); + }, + [=](const bool is_equal_local1, const bool is_equal_local2) -> bool { + return is_equal_local1 && is_equal_local2; + } + ); +} + +template +bool pattern_equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate p, IsVector vec, /*is_parallel=*/std::true_type) { + return parallel_equal(first1, last1, first2, p, vec); +} + +//------------------------------------------------------------------------ +// count +//------------------------------------------------------------------------ +template +typename std::iterator_traits::difference_type +pattern_count(InputIterator first, InputIterator last, Predicate pred, /* is_parallel */ std::true_type, IsVector vec) { + typedef typename std::iterator_traits::difference_type size_type; + return __icp_algorithm::parallel_reduce(tbb::blocked_range(first, last), size_type(0), + [pred, vec](const tbb::blocked_range& r, size_type value)->size_type { + return value+brick_count(r.begin(), r.end(), pred, vec); + }, + std::plus() + ); +} + +//------------------------------------------------------------------------ +// adjacent_find +//------------------------------------------------------------------------ +template +Index pattern_adjacent_find(Index first, Index last, BinaryPredicate pred, /* is_parallel */ std::true_type, IsVector vec, bool or_semantic) { + if(last-first<2) + return last; + + return __icp_algorithm::parallel_reduce(tbb::blocked_range(first, last), last, + [last, pred, vec, or_semantic](const tbb::blocked_range& r, Index value)->Index { + + if(or_semantic && valuecancel_group_execution(); + return value; + } + + if(value>r.begin()) { + //modify local_last to check the predicate on the boundary values; //TODO: to use a custom tbb::blocked_range with boundaries overlapping + Index local_last = r.end(); + if(local_last!=last) + ++local_last; + + //correct the global result iterator if the "brick" returns a local "last" + const Index res = brick_adjacent_find(r.begin(), local_last, pred, vec, or_semantic); + if(resIndex { return x +Index pattern_fill(Index first, Index last, const T& value, /*is_parallel=*/std::true_type, IsVector vec) { + __icp_algorithm::parallel_for(first, last, [&value, vec](Index begin, Index end) { + brick_fill(begin, end, value, vec); }); + return last; +} + +template +Index pattern_fill_n(Index first, Size count, const T& value, /*is_parallel=*/std::true_type, IsVector vec) { + return pattern_fill(first, first + count, value, std::true_type(), vec); +} + +//------------------------------------------------------------------------ +// generate, generate_n +//------------------------------------------------------------------------ +template +Index pattern_generate(Index first, Index last, Generator g, /*is_parallel=*/std::true_type, IsVector vec) { + __icp_algorithm::parallel_for(first, last, [g, vec](Index begin, Index end) { + brick_generate(begin, end, g, vec); }); + return last; +} + +template +Index pattern_generate_n(Index first, Size count, Generator g, /*is_parallel=*/std::true_type, IsVector vec) { + return pattern_generate(first, first + count, g, std::true_type(), vec); +} + +} // namespace __icp_algorithm + +#endif /* __PSTL_parallel_impl_tbb_H */ diff --git a/include/pstl/_internal/pstl_config.h b/include/pstl/_internal/pstl_config.h new file mode 100644 index 00000000000..c7d2daf011b --- /dev/null +++ b/include/pstl/_internal/pstl_config.h @@ -0,0 +1,122 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_config_H +#define __PSTL_config_H + +#if _WIN32 && __PSTL_SHARED_LINKAGE +#if __PSTL_EXPORTS +#define __PSTL_API __declspec(dllexport) +#else +#define __PSTL_API __declspec(dllimport) +#endif +#else +#define __PSTL_API +#endif + +#ifndef __PSTL_HEADER_ONLY +#define __PSTL_HEADER_ONLY 1 +#endif + +// Check the user-defined macro for parallel policies +#if defined(PSTL_USE_PARALLEL_POLICIES) +#undef __PSTL_USE_PAR_POLICIES +#define __PSTL_USE_PAR_POLICIES PSTL_USE_PARALLEL_POLICIES +// Check the internal macro for parallel policies +#elif !defined(__PSTL_USE_PAR_POLICIES) +#define __PSTL_USE_PAR_POLICIES 1 +#endif + +#if __PSTL_USE_PAR_POLICIES +#if !defined(__PSTL_USE_TBB) +#define __PSTL_USE_TBB 1 +#endif +#else +#undef __PSTL_USE_TBB +#endif + +// Portability "#pragma" definition +#ifdef _MSC_VER +#define __PSTL_PRAGMA(x) __pragma(x) +#else +#define __PSTL_PRAGMA(x) _Pragma (#x) +#endif + +#define __PSTL_STRING_AUX(x) #x +#define __PSTL_STRING(x) __PSTL_STRING_AUX(x) +#define __PSTL_STRING_CONCAT(x, y) x#y + +// Enable SIMD for compilers that support OpenMP 4.0 +#if (_OPENMP >= 201307) || (__INTEL_COMPILER >= 1600) || (__PSTL_GCC_VERSION >= 40900) +#define __PSTL_PRAGMA_SIMD __PSTL_PRAGMA(omp simd) +#define __PSTL_PRAGMA_SIMD_REDUCTION(PRM) __PSTL_PRAGMA(omp simd reduction(PRM)) +#elif !defined(_MSC_VER) //#pragma simd +#define __PSTL_PRAGMA_SIMD __PSTL_PRAGMA(simd) +#define __PSTL_PRAGMA_SIMD_REDUCTION(PRM) __PSTL_PRAGMA(simd reduction(PRM)) +#else //no simd +#define __PSTL_PRAGMA_SIMD +#define __PSTL_PRAGMA_SIMD_REDUCTION(PRM) +#endif //Enable SIMD + +// note that when ICC or Clang is in use, __PSTL_GCC_VERSION might not fully match +// the actual GCC version on the system. +#define __PSTL_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +// Should be defined to 1 for environments with a vendor implementation of C++17 execution policies +#define __PSTL_CPP17_EXECUTION_POLICIES_PRESENT 0 + +#define __PSTL_CPP14_INTEGER_SEQUENCE_PRESENT (_MSC_VER >= 1900 || __cplusplus >= 201402L) +#define __PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT \ + (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700) && (_MSC_FULL_VER >= 190023918 || __cplusplus >= 201402L) + +#define __PSTL_EARLYEXIT_PRESENT (__INTEL_COMPILER >= 1800) +#define __PSTL_MONOTONIC_PRESENT (__INTEL_COMPILER >= 1800) + +#if __PSTL_EARLYEXIT_PRESENT +#define __PSTL_PRAGMA_SIMD_EARLYEXIT __PSTL_PRAGMA(omp simd early_exit) +#else +#define __PSTL_PRAGMA_SIMD_EARLYEXIT +#endif + +#if __PSTL_MONOTONIC_PRESENT +#define __PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) __PSTL_PRAGMA(omp ordered simd monotonic(PRM)) +#else +#define __PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) +#endif + +#if (__INTEL_COMPILER >= 1600) +#define __PSTL_PRAGMA_VECTOR_UNALIGNED __PSTL_PRAGMA(vector unaligned) +#else +#define __PSTL_PRAGMA_VECTOR_UNALIGNED +#endif + +#if _MSC_VER || __INTEL_COMPILER //the preprocessors don't type a message location +#define __PSTL_PRAGMA_LOCATION __FILE__ ":" __PSTL_STRING(__LINE__) ": warning: " +#else +#define __PSTL_PRAGMA_LOCATION +#endif + +#define __PSTL_PRAGMA_MESSAGE(x) + +#if defined(__GLIBCXX__) +#define __PSTL_CPP11_STD_ROTATE_BROKEN (__PSTL_GCC_VERSION < 50100) //GCC 5.1 release +#endif + +#endif /* __PSTL_config_H */ diff --git a/include/pstl/_internal/simd_impl.h b/include/pstl/_internal/simd_impl.h new file mode 100644 index 00000000000..d5a0965fc32 --- /dev/null +++ b/include/pstl/_internal/simd_impl.h @@ -0,0 +1,391 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_vector_impl_H +#define __PSTL_vector_impl_H + +#include //for std::min +#include + +#include "pstl_config.h" +#include "common.h" + +// This header defines the minimum set of vector routines required +// to support parallel STL. + +namespace __icp_algorithm { + +template +void simd_walk_1(Iterator first, DifferenceType n, Function f) noexcept { +__PSTL_PRAGMA_SIMD + for(DifferenceType i = 0; i < n; ++i) + f(first[i]); +} + +template +Iterator2 simd_walk_2(Iterator1 first1, DifferenceType n, Iterator2 first2, Function f) noexcept { +__PSTL_PRAGMA_SIMD + for(DifferenceType i = 0; i < n; ++i) + f(first1[i], first2[i]); + return first2 + n; +} + +template +Iterator3 simd_walk_3(Iterator1 first1, DifferenceType n, Iterator2 first2, Iterator3 first3, Function f) noexcept { +__PSTL_PRAGMA_SIMD + for(DifferenceType i = 0; i < n; ++i) + f(first1[i], first2[i], first3[i]); + return first3 + n; +} + +// TODO: check whether simd_first() can be used here +template +bool simd_or(Index first, DifferenceType n, Pred pred) noexcept { +#if __PSTL_EARLYEXIT_PRESENT +__PSTL_PRAGMA_SIMD_EARLYEXIT + for(DifferenceType i = 0; i < n; ++i) + if(pred(first[i])) + return true; + return false; +#else + DifferenceType block_size = std::min(4, n); + const Index last = first + n; + while ( last != first ) { + int flag = 1; +__PSTL_PRAGMA_SIMD_REDUCTION(&:flag) + for ( int i = 0; i < block_size; ++i ) + if ( pred(*(first + i)) ) + flag = 0; + if ( !flag ) + return true; + + first += block_size; + if ( last - first >= block_size << 1 ) { + // Double the block size. Any unnecessary iterations can be amortized against work done so far. + block_size <<= 1; + } + else { + block_size = last - first; + } + } + return false; +#endif +} + +template +Index simd_first(Index first, DifferenceType n, Pred pred) noexcept { +#if __PSTL_EARLYEXIT_PRESENT + DifferenceType i = 0; +__PSTL_PRAGMA_SIMD_EARLYEXIT + for(;i < n; ++i) + if(pred(*(first+i))) + break; + + return first + i; +#else + const Index last = first + n; + // Experiments show good block sizes like this + const int block_size = 8; + alignas(64) int lane[block_size] = {0}; + while ( last - first >= block_size ) { + int found = 0; +__PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part +__PSTL_PRAGMA_SIMD_REDUCTION(|:found) + for ( int i = 0; i < block_size; ++i ) { + // To improve SIMD vectorization + int t = (pred(*(first + i))); + lane[i] = t; + found |= t; + } + if ( found ) { + int i; + // This will vectorize + for ( i = 0; i < block_size; ++i ) { + if ( lane[i] ) break; + } + return first + i; + } + first += block_size; + } + //Keep remainder scalar + while ( last != first ) { + if ( pred(*(first)) ) { + return first; + } + ++first; + } + return last; +#endif //__PSTL_EARLYEXIT_PRESENT +} + +template +Index1 simd_first(Index1 first1, DifferenceType n, Index2 first2, Pred pred) noexcept { +#if __PSTL_EARLYEXIT_PRESENT + DifferenceType i = 0; +__PSTL_PRAGMA_SIMD_EARLYEXIT + for(;i < n; ++i) + if(pred(first1[i], first2[i])) + break; + return first1+i; +#else + Index1 last1 = first1 + n; + // Experiments show good block sizes like this + const int block_size = 8; + alignas(64) int lane[block_size] = {0}; + while ( last1 - first1 >= block_size ) { + int found = 0; + int i; +__PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part +__PSTL_PRAGMA_SIMD_REDUCTION(|:found) + for ( i = 0; i < block_size; ++i ) { + int t = pred(first1[i], first2[i]); + lane[i] = t; + found |= t; + } + if ( found ) { + int i; + // This will vectorize + for ( i = 0; i < block_size; ++i ) { + if ( lane[i] ) break; + } + return first1 + i; + } + first1 += block_size; + first2 += block_size; + } + + //Keep remainder scalar + for(; last1 != first1; ++first1, ++first2) + if ( pred(*(first1), *(first2)) ) + return first1; + + return last1; +#endif //__PSTL_EARLYEXIT_PRESENT +} + +template +DifferenceType simd_count(Index first, DifferenceType n, Pred pred) noexcept { + DifferenceType count = 0; +__PSTL_PRAGMA_SIMD_REDUCTION(+:count) + for (DifferenceType i = 0; i < n; ++i) + if (pred(*(first + i))) + ++count; + + return count; +} + +template +OutputIterator simd_unique_copy(InputIterator first, DifferenceType n, OutputIterator result, BinaryPredicate pred) noexcept { + if (n == 0) + return result; + + DifferenceType cnt = 1; + result[0] = first[0]; + +__PSTL_PRAGMA_SIMD + for (DifferenceType i = 1; i < n; ++i) { +__PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(cnt:1) + if (!pred(first[i], first[i - 1])) { + result[cnt] = first[i]; + ++cnt; + } + } + return result + cnt; +} + +template +OutputIterator simd_copy_n(InputIterator first, DifferenceType n, OutputIterator result) noexcept { +__PSTL_PRAGMA_SIMD + for(DifferenceType i = 0; i < n; ++i) + result[i] = first[i]; + return result+n; +} + +template +OutputIterator simd_copy_if(InputIterator first, DifferenceType n, OutputIterator result, UnaryPredicate pred) noexcept { + DifferenceType cnt = 0; + +__PSTL_PRAGMA_SIMD + for(DifferenceType i = 0; i < n; ++i) { + __PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(cnt:1) + if(pred(first[i])) { + result[cnt] = first[i]; + ++cnt; + } + } + return result + cnt; +} + +template +DifferenceType simd_calc_mask_2(InputIterator first, DifferenceType n, bool* __restrict mask, BinaryPredicate pred) noexcept { + DifferenceType count = 0; + +__PSTL_PRAGMA_SIMD_REDUCTION(+:count) + for (DifferenceType i = 0; i < n; ++i) { + mask[i] = !pred(first[i], first[i - 1]); + count += mask[i]; + } + return count; +} + +template +DifferenceType simd_calc_mask_1(InputIterator first, DifferenceType n, bool* __restrict mask, UnaryPredicate pred) noexcept { + DifferenceType count = 0; + +__PSTL_PRAGMA_SIMD_REDUCTION(+:count) + for (DifferenceType i = 0; i < n; ++i) { + mask[i] = pred(first[i]); + count += mask[i]; + } + return count; +} + +template +void simd_copy_by_mask(InputIterator first, DifferenceType n, OutputIterator result, bool* __restrict mask) noexcept { + DifferenceType cnt = 0; +__PSTL_PRAGMA_SIMD + for (DifferenceType i = 0; i < n; ++i) { +__PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(cnt:1) + if (mask[i]) { + result[cnt] = first[i]; + ++cnt; + } + } +} + +template +Index simd_fill_n(Index first, DifferenceType n, const T& value) noexcept { +__PSTL_PRAGMA_SIMD + for (DifferenceType i = 0; i < n; ++i) + first[i] = value; + return first + n; +} + +template +void simd_fill(Index first, Index last, const T& value) noexcept { + simd_fill_n(first, last - first, value); +} + +template +Index simd_generate_n(Index first, DifferenceType size, Generator g) noexcept { +__PSTL_PRAGMA_SIMD + for (DifferenceType i = 0; i < size; ++i) + first[i] = g(); + return first + size; +} + +template +void simd_generate(Index first, Index last, Generator g) noexcept { + simd_generate_n(first, last - first, g); +} + +template +Index simd_adjacent_find(Index first, Index last, BinaryPredicate pred, bool or_semantic) noexcept { + if(last - first < 2) + return last; + +#if __PSTL_EARLYEXIT_PRESENT + //Some compiler versions fail to compile the following loop when iterators are used. Indices are used instead + size_t i = 0, n = last-first-1; +__PSTL_PRAGMA_SIMD_EARLYEXIT + for(; i < n; ++i) + if(pred(first[i], first[i+1])) + break; + + return i < n ? first + i : last; +#else + // Experiments show good block sizes like this + //TODO: to consider tuning block_size for various data types + const int block_size = 8; + alignas(64) int lane[block_size] = {0}; + while ( last - first >= block_size ) { + int found = 0, i; +__PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part +__PSTL_PRAGMA_SIMD_REDUCTION(|:found) + for ( i = 0; i < block_size-1; ++i ) { + //TODO: to improve SIMD vectorization + const int t = pred(*(first + i), *(first + i + 1)); + lane[i] = t; + found |= t; + } + + //Process a pair of elements on a boundary of a data block + if(first + block_size < last && pred(*(first + i), *(first + i + 1))) + lane[i] = found = 1; + + if ( found ) { + if(or_semantic) + return first; + int i; + // This will vectorize + for ( i = 0; i < block_size; ++i ) + if ( lane[i] ) break; + return first + i; //As far as found is true a result (lane[i] is true) is guaranteed + } + first += block_size; + } + //Process the rest elements + for (; last - first > 1; ++first) + if(pred(*first, *(first+1))) + return first; + + return last; +#endif +} + +template +Index1 simd_search(Index1 first, Index1 last, Index2 s_first, Index2 s_last, BinaryPredicate p, bool b_first) noexcept { + auto n2 = s_last - s_first; + if(n2 < 1) + return last; + + auto n1 = last - first; + if(n1 < n2) + return last; + + Index1 result = last; + for(auto i = n1-n2; i >= 0; --i, ++first) { + if(simd_first(s_first, s_last-s_first, first, not_pred(p)) == s_last) {//subsequence was found + result = first; + if(b_first) //first occurrence semantic + break; + } + } + return result; +} + +template +T simd_transform_reduce(InputIterator1 first1, DifferenceType n, InputIterator2 first2, T init, BinaryOperation binary_op) noexcept { +__PSTL_PRAGMA_SIMD_REDUCTION(+:init) + for(DifferenceType i = 0; i < n; ++i) + init += binary_op(first1[i], first2[i]); + return init; +}; + +template +T simd_transform_reduce(InputIterator first, DifferenceType n, T init, UnaryOperation unary_op) noexcept { +__PSTL_PRAGMA_SIMD_REDUCTION(+:init) + for(DifferenceType i = 0; i < n; ++i) + init += unary_op(first[i]); + return init; +}; + +} // namespace __icp_algorithm +#endif /* __PSTL_vector_impl_H */ diff --git a/include/pstl/algorithm b/include/pstl/algorithm new file mode 100644 index 00000000000..6cc7952b513 --- /dev/null +++ b/include/pstl/algorithm @@ -0,0 +1,888 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_algorithm_H +#define __PSTL_algorithm_H + +#include + +#include "_internal/pstl_config.h" +#include "_internal/common.h" +#include "_internal/simd_impl.h" +#include "_internal/algorithm_impl.h" +#include "_internal/numeric_impl.h" /* count and count_if use pattern_transform_reduce */ +#if __PSTL_USE_TBB + #include "_internal/parallel_impl_tbb.h" +#else + __PSTL_PRAGMA_MESSAGE("Backend was not specified"); +#endif + +namespace std { + +// [alg.any_of] + +template +__icp_algorithm::enable_if_execution_policy +any_of( ExecutionPolicy&& exec, InputIterator first, InputIterator last, Predicate pred ) { + return __icp_algorithm::pattern_any_of( first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.all_of] + +template +__icp_algorithm::enable_if_execution_policy +all_of(ExecutionPolicy&& exec, InputIterator first, InputIterator last, Pred pred) { + return !any_of(std::forward(exec), first, last, __icp_algorithm::not_pred(pred)); +} + +// [alg.none_of] + +template +__icp_algorithm::enable_if_execution_policy +none_of( ExecutionPolicy&& exec, InputIterator first, InputIterator last, Predicate pred ) { + return !any_of( std::forward(exec), first, last, pred ); +} + +// [alg.foreach] + +template +__icp_algorithm::enable_if_execution_policy +for_each(ExecutionPolicy&& exec, InputIterator first, InputIterator last, Function f) { + __icp_algorithm::pattern_walk1( + first, last, f, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +for_each_n(ExecutionPolicy&& exec, InputIterator first, Size n, Function f) { + return __icp_algorithm::pattern_walk1_n( first, n, f, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.find] + +template +__icp_algorithm::enable_if_execution_policy +find_if(ExecutionPolicy&& exec, InputIterator first, InputIterator last, +Predicate pred) { + return __icp_algorithm::pattern_find_if( first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +find_if_not(ExecutionPolicy&& exec, InputIterator first, InputIterator last, +Predicate pred) { + return find_if(exec,first,last,__icp_algorithm::not_pred(pred)); +} + +template +__icp_algorithm::enable_if_execution_policy +find(ExecutionPolicy&& exec, InputIterator first, InputIterator last, +const T& value) { + return find_if(exec, first, last, __icp_algorithm::equal_value(value)); +} + +// [alg.find.end] +template +__icp_algorithm::enable_if_execution_policy +find_end(ExecutionPolicy &&exec, ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred) { + return __icp_algorithm::pattern_find_end(first, last, s_first, s_last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +find_end(ExecutionPolicy&& exec, ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last) { + return find_end(exec, first, last, s_first, s_last, std::equal_to::value_type>()); +} + +// [alg.find_first_of] +template +__icp_algorithm::enable_if_execution_policy +find_first_of(ExecutionPolicy&& exec, InputIterator first, InputIterator last, ForwardIterator s_first, ForwardIterator s_last, BinaryPredicate pred) { + return __icp_algorithm::pattern_find_first_of(first, last, s_first, s_last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +find_first_of(ExecutionPolicy&& exec, InputIterator first, InputIterator last, ForwardIterator s_first, ForwardIterator s_last) { + return find_first_of(exec, first, last, s_first, s_last, std::equal_to::value_type>()); +} + +// [alg.adjacent_find] +template< class ExecutionPolicy, class ForwardIt > +__icp_algorithm::enable_if_execution_policy +adjacent_find(ExecutionPolicy&& exec, ForwardIt first, ForwardIt last) { + typedef typename iterator_traits::value_type value_type; + + return __icp_algorithm::pattern_adjacent_find(first, last, std::equal_to(), + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec), /*first_semantic*/ false); +} + +template< class ExecutionPolicy, class ForwardIt, class BinaryPredicate> +__icp_algorithm::enable_if_execution_policy +adjacent_find(ExecutionPolicy&& exec, ForwardIt first, ForwardIt last, BinaryPredicate pred) { +return __icp_algorithm::pattern_adjacent_find(first, last, pred, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec), /*first_semantic*/ false); +} + +// [alg.count] + +// Implementation note: count and count_if call the pattern directly instead of calling std::transform_reduce +// so that we do not have to include . + +template +__icp_algorithm::enable_if_execution_policy::difference_type> +count(ExecutionPolicy&& exec, InputIterator first, InputIterator last, const T& value) { + typedef typename iterator_traits::value_type value_type; + + return __icp_algorithm::pattern_count(first, last, [&value](value_type x) {return value==x;}, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy::difference_type> +count_if(ExecutionPolicy&& exec, InputIterator first, InputIterator last, Predicate pred) { + return __icp_algorithm::pattern_count(first, last, pred, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec)); +} + +// [alg.search] + +template +__icp_algorithm::enable_if_execution_policy +search(ExecutionPolicy&& exec, ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last, BinaryPredicate pred) { + return __icp_algorithm::pattern_search(first, last, s_first, s_last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +search(ExecutionPolicy&& exec, ForwardIterator1 first, ForwardIterator1 last, ForwardIterator2 s_first, ForwardIterator2 s_last) { + typedef typename iterator_traits::value_type value_type; + + return search(exec, first, last, s_first, s_last, std::equal_to()); +} + +template +__icp_algorithm::enable_if_execution_policy +search_n(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Size count, const T& value, BinaryPredicate pred) { + return __icp_algorithm::pattern_search_n(first, last, count, value, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +search_n(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Size count, const T& value) { + return search_n(exec, first, last, count, value, std::equal_to::value_type>()); +} + +// [alg.copy] + +template +__icp_algorithm::enable_if_execution_policy +copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result) { + return __icp_algorithm::pattern_copy( + first, last, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +copy_n(ExecutionPolicy&& exec, InputIterator first, Size n, OutputIterator result) { + return __icp_algorithm::pattern_copy_n( + first, n, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +copy_if(ExecutionPolicy&& exec, + InputIterator first, InputIterator last, + OutputIterator result, Predicate pred) { + return __icp_algorithm::pattern_copy_if( + first, last, result, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.swap] + +template +__icp_algorithm::enable_if_execution_policy +swap_ranges(ExecutionPolicy&& exec, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2) { + return __icp_algorithm::pattern_swap_ranges(first1, last1, first2, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.transform] + +template +__icp_algorithm::enable_if_execution_policy +transform( ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, UnaryOperation op ) { + typedef typename iterator_traits::value_type input_type; + typedef typename iterator_traits::value_type output_type; + return __icp_algorithm::pattern_walk2(first, last, result, + [op]( input_type x, output_type& y ) mutable {y = op(x);}, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +transform( ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryOperation op ) { + typedef typename iterator_traits::value_type input1_type; + typedef typename iterator_traits::value_type input2_type; + typedef typename iterator_traits::value_type output_type; + return __icp_algorithm::pattern_walk3(first1, last1, first2, result, [op]( input1_type x, input2_type y, output_type& z ) mutable {z = op(x,y);}, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.replace] + +template +__icp_algorithm::enable_if_execution_policy +replace_if(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, UnaryPredicate pred, const T& new_value) { + __icp_algorithm::pattern_replace_if(first, last, pred, new_value, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +replace(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, const T& old_value, const T& new_value) { + replace_if(exec, first, last, __icp_algorithm::equal_value(old_value), new_value); +} + +template +__icp_algorithm::enable_if_execution_policy +replace_copy_if(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, UnaryPredicate pred, const T& new_value) { + typedef typename iterator_traits::value_type input_type; + typedef typename iterator_traits::value_type output_type; + return __icp_algorithm::pattern_walk2( + first, last, result, + [pred, &new_value](input_type x, output_type& y) mutable { y = pred(x) ? new_value : x; }, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +replace_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, const T& old_value, const T& new_value) { + return replace_copy_if(exec, first, last, result, __icp_algorithm::equal_value(old_value), new_value); +} + +// [alg.fill] + +template +__icp_algorithm::enable_if_execution_policy +fill( ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, const T& value ) { + __icp_algorithm::pattern_fill(first, last, value, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec)); +} + +template< class ExecutionPolicy, class OutputIterator, class Size, class T> +__icp_algorithm::enable_if_execution_policy +fill_n( ExecutionPolicy&& exec, OutputIterator first, Size count, const T& value ) { + if(count <= 0) + return first; + + return __icp_algorithm::pattern_fill_n(first, count, value, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec)); +} + +// [alg.generate] +template< class ExecutionPolicy, class ForwardIterator, class Generator> +__icp_algorithm::enable_if_execution_policy +generate( ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Generator g ) { + __icp_algorithm::pattern_generate(first, last, g, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec)); +} + +template< class ExecutionPolicy, class OutputIterator, class Size, class Generator> +__icp_algorithm::enable_if_execution_policy +generate_n( ExecutionPolicy&& exec, OutputIterator first, Size count, Generator g ) { + if(count <= 0) + return first; + + return __icp_algorithm::pattern_generate_n(first, count, g, + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec)); +} + +// [alg.remove] + +template +__icp_algorithm::enable_if_execution_policy +remove_copy_if(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, Predicate pred) { + return copy_if( exec, first, last, result, __icp_algorithm::not_pred(pred)); +} + +template +__icp_algorithm::enable_if_execution_policy +remove_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, const T& value) { + return copy_if( exec, first, last, result, __icp_algorithm::not_equal_value(value)); +} + +template +__icp_algorithm::enable_if_execution_policy +remove_if(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, UnaryPredicate pred) { + return __icp_algorithm::pattern_remove_if(first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +remove(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, const T& value) { + return remove_if(exec, first, last, __icp_algorithm::equal_value(value)); +} + +// [alg.unique] + +template +__icp_algorithm::enable_if_execution_policy +unique(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, BinaryPredicate pred) { + return __icp_algorithm::pattern_unique(first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +unique(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + return unique(exec, first, last, __icp_algorithm::pstl_equal()); +} + +template +__icp_algorithm::enable_if_execution_policy +unique_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate pred) { + return __icp_algorithm::pattern_unique_copy(first, last, result, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +unique_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result) { + return unique_copy( exec, first, last, result, __icp_algorithm::pstl_equal() ); +} + +// [alg.reverse] + +template +__icp_algorithm::enable_if_execution_policy +reverse(ExecutionPolicy&& exec, BidirectionalIterator first, BidirectionalIterator last) { + __icp_algorithm::pattern_reverse(first, last, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +reverse_copy(ExecutionPolicy&& exec, BidirectionalIterator first, BidirectionalIterator last, OutputIterator d_first) { + return __icp_algorithm::pattern_reverse_copy(first, last, d_first, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.rotate] + +template +__icp_algorithm::enable_if_execution_policy +rotate(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator middle, ForwardIterator last) { + return __icp_algorithm::pattern_rotate(first, middle, last, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +rotate_copy(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result) { + return __icp_algorithm::pattern_rotate_copy(first, middle, last, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.partitions] + +template +__icp_algorithm::enable_if_execution_policy +is_partitioned(ExecutionPolicy&& exec, InputIterator first, InputIterator last, UnaryPredicate pred) { + return __icp_algorithm::pattern_is_partitioned(first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +partition(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, UnaryPredicate pred) { + return __icp_algorithm::pattern_partition(first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +stable_partition(ExecutionPolicy&& exec, BidirectionalIterator first, BidirectionalIterator last, UnaryPredicate pred) { + return __icp_algorithm::pattern_stable_partition(first, last, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy> +partition_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator1 out_true, OutputIterator2 out_false, UnaryPredicate pred) { + return __icp_algorithm::pattern_partition_copy(first, last, out_true, out_false, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [alg.sort] + +template +__icp_algorithm::enable_if_execution_policy +sort(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) { + typedef typename iterator_traits::value_type input_type; + + return __icp_algorithm::pattern_sort(first, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec), + typename std::is_move_constructible::type()); +} + +template +__icp_algorithm::enable_if_execution_policy +sort(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) { + typedef typename iterator_traits::value_type input_type; + sort(exec, first, last, std::less()); +} + +// [stable.sort] + +template +__icp_algorithm::enable_if_execution_policy +stable_sort(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) { + typedef typename iterator_traits::value_type input_type; + + return __icp_algorithm::pattern_stable_sort(first, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +stable_sort(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) { + typedef typename iterator_traits::value_type input_type; + stable_sort(exec, first, last, std::less()); +} + +// [mismatch] + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2, class BinaryPredicate > +__icp_algorithm::enable_if_execution_policy> +mismatch(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, BinaryPredicate pred) { + return __icp_algorithm::pattern_mismatch(first1, last1, first2, last2, pred, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2, class BinaryPredicate > +__icp_algorithm::enable_if_execution_policy> +mismatch(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate pred) { + return mismatch(exec, first1, last1, first2, std::next(first2, std::distance(first1, last1)), pred); +} + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2 > +__icp_algorithm::enable_if_execution_policy> +mismatch(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) { + typedef typename iterator_traits::value_type value_type; + return mismatch(exec, first1, last1, first2, last2, std::equal_to()); +} + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2 > +__icp_algorithm::enable_if_execution_policy> +mismatch(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) { + return mismatch(exec, first1, last1, first2, std::next(first2, std::distance(first1, last1))); +} + +// [alg.equal] + +template +__icp_algorithm::enable_if_execution_policy +equal(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate p) { + return __icp_algorithm::pattern_equal(first1, last1, first2, p, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec) + ); +} + +template +__icp_algorithm::enable_if_execution_policy +equal(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) { + return equal(exec, first1, last1, first2, __icp_algorithm::pstl_equal()); +} + +template +__icp_algorithm::enable_if_execution_policy +equal(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, BinaryPredicate p) { + if ( std::distance(first1, last1) == std::distance(first2, last2) ) + return std::equal(first1, last1, first2, p); + else + return false; +} + +template +__icp_algorithm::enable_if_execution_policy +equal(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) { + if ( std::distance(first1, last1) == std::distance(first2, last2) ) + return equal(first1, last1, first2, __icp_algorithm::pstl_equal()); + else + return false; +} + +// [alg.move] +template< class ExecutionPolicy, class InputIterator, class OutputIterator > +__icp_algorithm::enable_if_execution_policy +move(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator d_first) { + return __icp_algorithm::pattern_move(first, last, d_first, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [partial.sort] + +template +__icp_algorithm::enable_if_execution_policy +partial_sort(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp) { + __icp_algorithm::pattern_partial_sort(first, middle, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +partial_sort(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last) { + typedef typename iterator_traits::value_type input_type; + partial_sort(exec, first, middle, last, std::less()); +} + +// [partial.sort.copy] + +template +__icp_algorithm::enable_if_execution_policy +partial_sort_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, RandomAccessIterator d_first, RandomAccessIterator d_last, Compare comp) { + return __icp_algorithm::pattern_partial_sort_copy(first, last, d_first, d_last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +partial_sort_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, RandomAccessIterator d_first, RandomAccessIterator d_last) { + typedef typename iterator_traits::value_type input_type; + return partial_sort_copy(exec, first, last, d_first, d_last, std::less()); +} + +// [is.sorted] +template +__icp_algorithm::enable_if_execution_policy +is_sorted_until(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Compare comp) { + const ForwardIterator res = __icp_algorithm::pattern_adjacent_find(first, last, __icp_algorithm::reorder_pred(comp), + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec), /*first_semantic*/ false); + return res==last ? last : std::next(res); +} + +template +__icp_algorithm::enable_if_execution_policy +is_sorted_until(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + typedef typename iterator_traits::value_type input_type; + return is_sorted_until(exec, first, last, std::less()); +} + +template +__icp_algorithm::enable_if_execution_policy +is_sorted(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Compare comp) { + return __icp_algorithm::pattern_adjacent_find(first, last, __icp_algorithm::reorder_pred(comp), + __icp_algorithm::is_parallelization_preferred(exec), + __icp_algorithm::is_vectorization_preferred(exec), /*or_semantic*/ true)==last; +} + +template +__icp_algorithm::enable_if_execution_policy +is_sorted(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + typedef typename iterator_traits::value_type input_type; + return is_sorted(exec, first, last, std::less()); +} + +// [alg.nth.element] + +template +__icp_algorithm::enable_if_execution_policy +nth_element(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare comp) { + __icp_algorithm::pattern_nth_element(first, nth, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +nth_element(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last) { + typedef typename iterator_traits::value_type input_type; + nth_element(exec, first, nth, last, std::less()); +} + +// [alg.merge] +template< class ExecutionPolicy, class InputIterator1, class InputIterator2, class OutputIterator, class Compare> +__icp_algorithm::enable_if_execution_policy +merge(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator d_first, Compare comp) { + return __icp_algorithm::pattern_merge(first1, last1, first2, last2, d_first, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2, class OutputIterator> +__icp_algorithm::enable_if_execution_policy +merge(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator d_first) { + typedef typename iterator_traits::value_type value_type; + return merge(exec, first1, last1, first2, last2, d_first, std::less()); +} + +template< class ExecutionPolicy, class BidirectionalIterator, class Compare> +__icp_algorithm::enable_if_execution_policy +inplace_merge(ExecutionPolicy&& exec, BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last, Compare comp) { + __icp_algorithm::pattern_inplace_merge(first, middle, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class BidirectionalIterator> +__icp_algorithm::enable_if_execution_policy +inplace_merge(ExecutionPolicy&& exec, BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) { + typedef typename std::iterator_traits::value_type input_type; + inplace_merge(exec, first, middle, last, std::less()); +} + +// [includes] + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2, class Compare> +__icp_algorithm::enable_if_execution_policy +includes(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp) { + return __icp_algorithm::pattern_includes(first1, last1, first2, last2, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2> +__icp_algorithm::enable_if_execution_policy +includes(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) { + typedef typename iterator_traits::value_type value_type; + return includes(exec, first1, last1, first2, last2, std::less()); +} + +// [set.union] + +template +__icp_algorithm::enable_if_execution_policy +set_union(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp) { + return __icp_algorithm::pattern_set_union(first1, last1, first2, last2, result, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +set_union(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, + InputIterator2 last2, OutputIterator result) { + typedef typename iterator_traits::value_type value_type; + return set_union(exec, first1, last1, first2, last2, result, std::less()); +} + +// [set.intersection] + +template +__icp_algorithm::enable_if_execution_policy +set_intersection(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp) { + return __icp_algorithm::pattern_set_intersection(first1, last1, first2, last2, result, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +set_intersection(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { + typedef typename iterator_traits::value_type value_type; + return set_intersection(exec, first1, last1, first2, last2, result, std::less()); +} + +// [set.difference] + +template +__icp_algorithm::enable_if_execution_policy +set_difference(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp) { + return __icp_algorithm::pattern_set_difference(first1, last1, first2, last2, result, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +set_difference(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { + typedef typename iterator_traits::value_type value_type; + return set_difference(exec, first1, last1, first2, last2, result, std::less()); +} + +// [set.symmetric.difference] + +template +__icp_algorithm::enable_if_execution_policy +set_symmetric_difference(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp) { + return __icp_algorithm::pattern_set_symmetric_difference(first1, last1, first2, last2, result, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +set_symmetric_difference(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { + typedef typename iterator_traits::value_type value_type; + return set_symmetric_difference(exec, first1, last1, first2, last2, result, std::less()); +} + +// [is.heap] +template< class ExecutionPolicy, class RandomAccessIterator, class Compare > +__icp_algorithm::enable_if_execution_policy +is_heap_until(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) { + return __icp_algorithm::pattern_is_heap_until(first, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class RandomAccessIterator > +__icp_algorithm::enable_if_execution_policy +is_heap_until(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) { + typedef typename std::iterator_traits::value_type input_type; + return is_heap_until(exec, first, last, std::less()); +} + +template< class ExecutionPolicy, class RandomAccessIterator, class Compare > +__icp_algorithm::enable_if_execution_policy +is_heap(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) { + return is_heap_until(exec, first, last, comp) == last; +} + +template< class ExecutionPolicy, class RandomAccessIterator > +__icp_algorithm::enable_if_execution_policy +is_heap(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) { + typedef typename std::iterator_traits::value_type input_type; + return is_heap(exec, first, last, std::less()); +} + +// [alg.min.max] + +template< class ExecutionPolicy, class ForwardIterator, class Compare > +__icp_algorithm::enable_if_execution_policy +min_element(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Compare comp) { + return __icp_algorithm::pattern_min_element(first, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class ForwardIterator > +__icp_algorithm::enable_if_execution_policy +min_element(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + typedef typename std::iterator_traits::value_type input_type; + return min_element(exec, first, last, std::less()); +} + +template< class ExecutionPolicy, class ForwardIterator, class Compare > +__icp_algorithm::enable_if_execution_policy +max_element(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Compare comp) { + return __icp_algorithm::pattern_max_element(first, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class ForwardIterator > +__icp_algorithm::enable_if_execution_policy +max_element(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + typedef typename std::iterator_traits::value_type input_type; + return max_element(exec, first, last, std::less()); +} + +template< class ExecutionPolicy, class ForwardIterator, class Compare > +__icp_algorithm::enable_if_execution_policy> +minmax_element(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, Compare comp) { + return __icp_algorithm::pattern_minmax_element(first, last, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class ForwardIterator > +__icp_algorithm::enable_if_execution_policy> +minmax_element(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + typedef typename iterator_traits::value_type value_type; + return minmax_element(exec, first, last, std::less()); +} + +// [alg.lex.comparison] + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2, class Compare > +__icp_algorithm::enable_if_execution_policy +lexicographical_compare(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp) { + return __icp_algorithm::pattern_lexicographical_compare(first1, last1, first2, last2, comp, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template< class ExecutionPolicy, class InputIterator1, class InputIterator2 > +__icp_algorithm::enable_if_execution_policy +lexicographical_compare(ExecutionPolicy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) { + typedef typename iterator_traits::value_type value_type; + return lexicographical_compare(policy, first1, last1, first2, last2, std::less()); +} + +} // namespace std + +#endif /* __PSTL_algorithm_H */ diff --git a/include/pstl/execution b/include/pstl/execution new file mode 100644 index 00000000000..300ff2d25c5 --- /dev/null +++ b/include/pstl/execution @@ -0,0 +1,132 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_execution_policy_H +#define __PSTL_execution_policy_H + +#include +#include "_internal/pstl_config.h" + +namespace pstl { +namespace execution { +inline namespace v1 { + +// 2.4, Sequential execution policy +class sequenced_policy { +public: + // For internal use only + static constexpr std::false_type __allow_unsequenced() {return std::false_type{};} + static constexpr std::false_type __allow_vector() {return std::false_type{};} + static constexpr std::false_type __allow_parallel() {return std::false_type{};} +}; + +#if __PSTL_USE_PAR_POLICIES +// 2.5, Parallel execution policy +class parallel_policy { +public: + // For internal use only + static constexpr std::false_type __allow_unsequenced() {return std::false_type{};} + static constexpr std::false_type __allow_vector() {return std::false_type{};} + static constexpr std::true_type __allow_parallel() {return std::true_type{};} +}; + +// 2.6, Parallel+Vector execution policy +class parallel_unsequenced_policy { +public: + // For internal use only + static constexpr std::true_type __allow_unsequenced() {return std::true_type{};} + static constexpr std::true_type __allow_vector() {return std::true_type{};} + static constexpr std::true_type __allow_parallel() {return std::true_type{};} +}; +#endif + +class unsequenced_policy { +public: + // For internal use only + static constexpr std::true_type __allow_unsequenced() {return std::true_type{};} + static constexpr std::true_type __allow_vector() {return std::true_type{};} + static constexpr std::false_type __allow_parallel() {return std::false_type{};} +}; + + +// 2.8, Execution policy objects +constexpr sequenced_policy seq{}; +#if __PSTL_USE_PAR_POLICIES +constexpr parallel_policy par{}; +constexpr parallel_unsequenced_policy par_unseq{}; +#endif +constexpr unsequenced_policy unseq{}; + +// 2.3, Execution policy type trait +template struct is_execution_policy: std::false_type {}; + +template<> struct is_execution_policy: std::true_type {}; +#if __PSTL_USE_PAR_POLICIES +template<> struct is_execution_policy: std::true_type {}; +template<> struct is_execution_policy: std::true_type {}; +#endif +template<> struct is_execution_policy: std::true_type {}; + +#if __PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT +template constexpr bool is_execution_policy_v = is_execution_policy::value; +#endif + +} //namespace v1 +} //namespace execution +} //namespace pstl + +#if __PSTL_CPP17_EXECUTION_POLICIES_PRESENT +__PSTL_PRAGMA_MESSAGE("The execution policies are defined in the namespace pstl::execution") +#else +namespace std { + // Type trait + using pstl::execution::is_execution_policy; +#if __PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT +#if __INTEL_COMPILER + template constexpr bool is_execution_policy_v = is_execution_policy::value; +#else + using pstl::execution::is_execution_policy_v; +#endif +#endif + + namespace execution { + // Standard C++ policy classes + using pstl::execution::sequenced_policy; +#if __PSTL_USE_PAR_POLICIES + using pstl::execution::parallel_policy; + using pstl::execution::parallel_unsequenced_policy; +#endif + // Standard predefined policy instances + using pstl::execution::seq; +#if __PSTL_USE_PAR_POLICIES + using pstl::execution::par; + using pstl::execution::par_unseq; +#endif + // Implementation-defined names + // Unsequenced policy is not yet standard, but for consistency + // we include it into namespace std::execution as well + using pstl::execution::unseq; + using pstl::execution::unsequenced_policy; + } +} +__PSTL_PRAGMA_MESSAGE("The execution policies are injected into the standard namespace std::execution") +#endif + +#endif /* __PSTL_execution_policy_H */ diff --git a/include/pstl/iterators.h b/include/pstl/iterators.h new file mode 100644 index 00000000000..85ff97c0763 --- /dev/null +++ b/include/pstl/iterators.h @@ -0,0 +1,210 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_iterators_H +#define __PSTL_iterators_H + +#include +#include +#include + +#if __PSTL_CPP14_INTEGER_SEQUENCE_PRESENT + +#include +namespace pstl { + namespace internal { + using std::index_sequence; + using std::make_index_sequence; + } //internal +}//namespace pstl + +#else //std::integer_sequence is not present +namespace pstl { + namespace internal { +template class index_sequence {}; +template +struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {}; +template +struct make_index_sequence_impl <0, S...> { + typedef index_sequence type; +}; +template struct make_index_sequence: internal::make_index_sequence_impl::type {}; +} //internal +}//namespace pstl +#endif + +namespace pstl { +namespace internal { + +template +struct tuple_util { + template + static void increment(TupleType& it, DifferenceType forward) { + std::get(it) += forward; + tuple_util::increment(it, forward); + } + template + static bool check_sync(const TupleType& it1, const TupleType& it2, DifferenceType val) { + if(std::get(it1) - std::get(it2) != val) + return false; + return tuple_util::check_sync(it1, it2, val); + } +}; + +template<> +struct tuple_util<0> { + template + static void increment(TupleType&, DifferenceType) {} + template + static bool check_sync(const TupleType&, const TupleType&, DifferenceType) { return true;} +}; + +template +struct make_references { + template + TupleReturnType operator()(const TupleType& t, pstl::internal::index_sequence) { + return std::tie((*std::get(t))...); + } +}; + +} //namespace internal + +template +class counting_iterator { +public: + typedef typename std::make_signed::type difference_type; + typedef IntType value_type; + typedef const IntType* pointer; + typedef const IntType& reference; + typedef std::random_access_iterator_tag iterator_category; + + explicit counting_iterator(IntType init): my_counter(init) { static_assert(std::is_integral::value, "Integer required."); } + + reference operator*() const { return my_counter; } + value_type operator[](difference_type i) const { return *(*this + i); } + + difference_type operator-(const counting_iterator& it) const { return my_counter - it.my_counter; } + + counting_iterator& operator+=(difference_type forward) { my_counter += forward; return *this; } + counting_iterator& operator-=(difference_type backward) { return *this += -backward; } + counting_iterator& operator++() { return *this += 1; } + counting_iterator& operator--() { return *this -= 1; } + + counting_iterator operator++(int) { + counting_iterator it(*this); + ++(*this); + return it; + } + counting_iterator operator--(int) { + counting_iterator it(*this); + --(*this); + return it; + } + + counting_iterator operator-(difference_type backward) const { return counting_iterator(my_counter - backward); } + counting_iterator operator+(difference_type forward) const { return counting_iterator(my_counter + forward); } + friend counting_iterator operator+(difference_type forward, const counting_iterator it) { return it + forward; } + + bool operator==(const counting_iterator& it) const { return *this - it == 0; } + bool operator!=(const counting_iterator& it) const { return !(*this == it); } + bool operator<(const counting_iterator& it) const {return *this - it < 0; } + bool operator>(const counting_iterator& it) const { return it < *this; } + bool operator<=(const counting_iterator& it) const { return !(*this > it); } + bool operator>=(const counting_iterator& it) const { return !(*this < it); } + +private: + IntType my_counter; +}; + +template +class zip_iterator { + static const std::size_t num_types = sizeof...(Types); + typedef typename std::tuple it_types; +public: + typedef typename std::make_signed::type difference_type; + typedef std::tuple::value_type...> value_type; + typedef std::tuple::reference...> reference; + typedef std::tuple::pointer...> pointer; + typedef std::random_access_iterator_tag iterator_category; + + explicit zip_iterator(Types... args): my_it(std::make_tuple(args...)) {} + + reference operator*() { + return internal::make_references()(my_it, pstl::internal::make_index_sequence()); + } + reference operator[](difference_type i) const { return *(*this + i); } + + difference_type operator-(const zip_iterator& it) const { + assert((internal::tuple_util::check_sync(my_it, it.my_it, std::get<0>(my_it) - std::get<0>(it.my_it)))); + return std::get<0>(my_it) - std::get<0>(it.my_it); + } + + zip_iterator& operator+=(difference_type forward) { + internal::tuple_util::increment(my_it, forward); + return *this; + } + zip_iterator& operator-=(difference_type backward) { return *this += -backward; } + zip_iterator& operator++() { return *this += 1; } + zip_iterator& operator--() { return *this -= 1; } + + zip_iterator operator++(int) { + zip_iterator it(*this); + ++(*this); + return it; + } + zip_iterator operator--(int) { + zip_iterator it(*this); + --(*this); + return it; + } + + zip_iterator operator-(difference_type backward) const { + zip_iterator it(*this); + return it -= backward; + } + zip_iterator operator+(difference_type forward) const { + zip_iterator it(*this); + return it += forward; + } + friend zip_iterator operator+(difference_type forward, const zip_iterator& it) { return it + forward; } + + bool operator==(const zip_iterator& it) const { + assert((internal::tuple_util::check_sync(my_it, it.my_it, *this - it))); + return *this - it == 0; + } + bool operator!=(const zip_iterator& it) const { return !(*this == it); } + bool operator<(const zip_iterator& it) const { return *this - it < 0; } + bool operator>(const zip_iterator& it) const { return it < *this; } + bool operator<=(const zip_iterator& it) const { return !(*this > it); } + bool operator>=(const zip_iterator& it) const { return !(*this < it); } + +private: + it_types my_it; +}; + +template +zip_iterator make_zip_iterator(T... args) { return zip_iterator(args...); } + +} //namespace pstl + +namespace __icp_algorithm { +} + +#endif /* __PSTL_iterators_H */ diff --git a/include/pstl/memory b/include/pstl/memory new file mode 100644 index 00000000000..19d92c808a3 --- /dev/null +++ b/include/pstl/memory @@ -0,0 +1,138 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_memory_H +#define __PSTL_memory_H + +#include "_internal/pstl_config.h" +#include "_internal/memory_impl.h" + +namespace std { + +// [uninitialized.copy] + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_copy(ExecutionPolicy&& exec, InputIterator first, InputIterator last, ForwardIterator result) { + return __icp_algorithm::pattern_uninitialized_copy(first, last, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_copy_n(ExecutionPolicy&& exec, InputIterator first, Size n, ForwardIterator result) { + return __icp_algorithm::pattern_uninitialized_copy_n(first, n, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [uninitialized.move] + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_move(ExecutionPolicy&& exec, InputIterator first, InputIterator last, ForwardIterator result) { + return __icp_algorithm::pattern_uninitialized_move(first, last, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_move_n(ExecutionPolicy&& exec, InputIterator first, Size n, ForwardIterator result) { + return __icp_algorithm::pattern_uninitialized_move_n(first, n, result, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [uninitialized.fill] + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_fill(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, const T& x) { + return __icp_algorithm::pattern_uninitialized_fill(first, last, x, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_fill_n(ExecutionPolicy&& exec, ForwardIterator first, Size n, const T& x) { + return __icp_algorithm::pattern_uninitialized_fill_n(first, n, x, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [specialized.destroy] + +template +__icp_algorithm::enable_if_execution_policy +destroy(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + __icp_algorithm::pattern_destroy(first, last, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +destroy_n(ExecutionPolicy&& exec, ForwardIterator first, Size n) { + return __icp_algorithm::pattern_destroy_n(first, n, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [uninitialized.construct.default] + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_default_construct(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + __icp_algorithm::pattern_uninitialized_default_construct(first, last, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_default_construct_n(ExecutionPolicy&& exec, ForwardIterator first, Size n) { + return __icp_algorithm::pattern_uninitialized_default_construct_n(first, n, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [uninitialized.construct.value] + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_value_construct(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) { + __icp_algorithm::pattern_uninitialized_value_construct(first, last, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +uninitialized_value_construct_n(ExecutionPolicy&& exec, ForwardIterator first, Size n) { + return __icp_algorithm::pattern_uninitialized_value_construct_n(first, n, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +} // namespace std +#endif /*__PSTL_memory_H */ diff --git a/include/pstl/numeric b/include/pstl/numeric new file mode 100644 index 00000000000..c1b190e1bff --- /dev/null +++ b/include/pstl/numeric @@ -0,0 +1,187 @@ +/* + Copyright (c) 2017 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + + +*/ + +#ifndef __PSTL_numeric_H +#define __PSTL_numeric_H + +#include + +#include "_internal/pstl_config.h" +#include "_internal/common.h" +#include "_internal/numeric_impl.h" +#if __PSTL_USE_TBB +#include "_internal/parallel_impl_tbb.h" +#else + __PSTL_PRAGMA_MESSAGE("Backend was not specified"); +#endif + +namespace std { + +// [reduce] + +template +__icp_algorithm::enable_if_execution_policy +reduce(ExecutionPolicy&& exec, InputIterator first, InputIterator last, T init, BinaryOperation binary_op) { + return transform_reduce(exec, first, last, init, binary_op, __icp_algorithm::no_op()); +} + +template +__icp_algorithm::enable_if_execution_policy +reduce(ExecutionPolicy&& exec, InputIterator first, InputIterator last, T init) { + return transform_reduce(exec, first, last, init, std::plus(), __icp_algorithm::no_op()); +} + +template +__icp_algorithm::enable_if_execution_policy::value_type> +reduce(ExecutionPolicy&& exec, InputIterator first, InputIterator last) { + typedef typename decay::value_type>::type T; + return transform_reduce(exec, first, last, T{}, std::plus(), __icp_algorithm::no_op()); +} + +// [transform.reduce] + +template +__icp_algorithm::enable_if_execution_policy +transform_reduce(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init) { + typedef typename iterator_traits::value_type input_type; + return __icp_algorithm::pattern_transform_reduce(first1, last1, first2, init, std::plus(), std::multiplies(), + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +transform_reduce(ExecutionPolicy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryOperation1 binary_op1, BinaryOperation2 binary_op2) { + return __icp_algorithm::pattern_transform_reduce(first1, last1, first2, init, binary_op1, binary_op2, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +transform_reduce(ExecutionPolicy&& exec, InputIterator first, InputIterator last, T init, BinaryOperation binary_op, UnaryOperation unary_op) { + return __icp_algorithm::pattern_transform_reduce(first, last, init, binary_op, unary_op, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [exclusive.scan] + +template +__icp_algorithm::enable_if_execution_policy +exclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, T init) { + return transform_exclusive_scan(exec, first, last, result, init, std::plus(), __icp_algorithm::no_op()); +} + +template +OutputIterator exclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, T init, BinaryOperation binary_op) { + return transform_exclusive_scan(exec, first, last, result, init, binary_op, __icp_algorithm::no_op()); +} + +// [inclusive.scan] + +template +__icp_algorithm::enable_if_execution_policy +inclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result) { + typedef typename iterator_traits::value_type input_type; + + if( first!=last ) { + auto tmp = *first; + *result = tmp; + return transform_inclusive_scan(exec, ++first, last, ++result, std::plus(), __icp_algorithm::no_op(), tmp); + } else { + return result; + } +} + +template +__icp_algorithm::enable_if_execution_policy +inclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op) { + if( first!=last ) { + auto tmp = *first; + *result = tmp; + return transform_inclusive_scan(exec, ++first, last, ++result, binary_op, __icp_algorithm::no_op(), tmp); + } else { + return result; + } +} + +template +__icp_algorithm::enable_if_execution_policy +inclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op, T init) { + return transform_inclusive_scan(exec, first, last, result, binary_op, __icp_algorithm::no_op(), init); +} + +// [transform.exclusive.scan] + +template +__icp_algorithm::enable_if_execution_policy +transform_exclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, T init, BinaryOperation binary_op, UnaryOperation unary_op) { + return __icp_algorithm::pattern_transform_scan( + first, last, result, unary_op, init, binary_op, + /*inclusive=*/std::false_type(), + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +// [transform.inclusive.scan] + +template +__icp_algorithm::enable_if_execution_policy +transform_inclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op, UnaryOperation unary_op, T init) { + return __icp_algorithm::pattern_transform_scan( + first, last, result, unary_op, init, binary_op, + /*inclusive=*/std::true_type(), + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +transform_inclusive_scan(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op, UnaryOperation unary_op) { + if( first!=last ) { + auto tmp = *first; + *result = tmp; + return transform_inclusive_scan(exec, ++first, last, ++result, binary_op, unary_op, tmp); + } else { + return result; + } +} + +// [adjacent.difference] + +template +__icp_algorithm::enable_if_execution_policy +adjacent_difference(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator d_first, BinaryOperation op) { + return __icp_algorithm::pattern_adjacent_difference(first, last, d_first, op, + __icp_algorithm::is_vectorization_preferred(exec), + __icp_algorithm::is_parallelization_preferred(exec)); +} + +template +__icp_algorithm::enable_if_execution_policy +adjacent_difference(ExecutionPolicy&& exec, InputIterator first, InputIterator last, OutputIterator d_first) { + typedef typename iterator_traits::value_type value_type; + return adjacent_difference(exec, first, last, d_first, std::minus()); +} + +} + +#endif /* __PSTL_numeric_H */ diff --git a/linux/pstlvars.csh b/linux/pstlvars.csh new file mode 100644 index 00000000000..68e4c3c797a --- /dev/null +++ b/linux/pstlvars.csh @@ -0,0 +1,70 @@ +#!/bin/csh +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# Parsing script arguments +# Arg1 represents target architecture. Its possible values are 'ia32' or 'intel64', +# default value equals to the value of $COMPILERVARS_ARCHITECTURE environment variable. + +set PSTL_TARGET_ARCH="" + +if ($?COMPILERVARS_ARCHITECTURE) then + set PSTL_TARGET_ARCH="$COMPILERVARS_ARCHITECTURE" +endif + +if ("$1" != "") then + set PSTL_TARGET_ARCH="$1" +endif + +if ("$PSTL_TARGET_ARCH" != "") then + if ("$PSTL_TARGET_ARCH" != "ia32" && "$PSTL_TARGET_ARCH" != "intel64") then + echo "ERROR: Unknown switch '$PSTL_TARGET_ARCH'. Accepted values: ia32, intel64" + set PSTL_TARGET_ARCH="" + exit 1 + endif +else + echo "ERROR: Architecture is not defined. Accepted values: ia32, intel64" + exit 1 +endif + + +# Arg2 represents PSTLROOT detection method. Its possible value is 'auto_pstlroot'. In which case +# the environment variable PSTLROOT is detected automatically by using the script directory path. +if ("$2" == "auto_pstlroot") then + set sourced=($_) + if ("$sourced" != '') then # if the script was sourced + set script_name=`readlink -f $sourced[2]` + else # if the script was run => "$_" is empty + set script_name=`readlink -f $0` + endif + set script_dir=`dirname $script_name` + setenv PSTLROOT "$script_dir/.." +else + setenv PSTLROOT "SUBSTITUTE_INSTALL_DIR_HERE" +endif + +if ( -e $PSTLROOT/../tbb/bin/tbbvars.csh ) then + source $PSTLROOT/../tbb/bin/tbbvars.csh $PSTL_TARGET_ARCH; +endif + +if (! $?CPATH) then + setenv CPATH "${PSTLROOT}/include" +else + setenv CPATH "${PSTLROOT}/include:$CPATH" +endif diff --git a/linux/pstlvars.sh b/linux/pstlvars.sh new file mode 100644 index 00000000000..e1547877d5c --- /dev/null +++ b/linux/pstlvars.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +# Parsing script arguments +# Arg1 represents target architecture. Its possible values are 'ia32' or 'intel64', +# default value equals to the value of $COMPILERVARS_ARCHITECTURE environment variable. + +PSTL_TARGET_ARCH= + +if [ -n "${COMPILERVARS_ARCHITECTURE}" ]; then + PSTL_TARGET_ARCH=$COMPILERVARS_ARCHITECTURE +fi + +if [ -n "$1" ]; then + PSTL_TARGET_ARCH=$1 +fi + +if [ -n "${PSTL_TARGET_ARCH}" ]; then + if [ "$PSTL_TARGET_ARCH" != "ia32" -a "$PSTL_TARGET_ARCH" != "intel64" ]; then + echo "ERROR: Unknown switch '$PSTL_TARGET_ARCH'. Accepted values: ia32, intel64" + PSTL_TARGET_ARCH= + return 1; + fi +else + echo "ERROR: Architecture is not defined. Accepted values: ia32, intel64" + return 1; +fi + +# Arg2 represents PSTLROOT detection method. Its possible value is 'auto_pstlroot'. In which case +# the environment variable PSTLROOT is detected automatically by using the script directory path. +PSTLROOT=SUBSTITUTE_INSTALL_DIR_HERE +if [ -n "${BASH_SOURCE}" ]; then + if [ "$2" = "auto_pstlroot" ]; then + PSTLROOT=$(cd $(dirname ${BASH_SOURCE}) && pwd -P)/.. + fi +fi +export PSTLROOT + +if [ -e $PSTLROOT/../tbb/bin/tbbvars.sh ]; then + . $PSTLROOT/../tbb/bin/tbbvars.sh $PSTL_TARGET_ARCH +fi + +if [ -z "${CPATH}" ]; then + CPATH="${PSTLROOT}/include"; export CPATH +else + CPATH="${PSTLROOT}/include:$CPATH"; export CPATH +fi diff --git a/mac/pstlvars.csh b/mac/pstlvars.csh new file mode 100644 index 00000000000..3ce0c67f47c --- /dev/null +++ b/mac/pstlvars.csh @@ -0,0 +1,31 @@ +#!/bin/csh +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +setenv PSTLROOT "SUBSTITUTE_INSTALL_DIR_HERE" + +if ( -e $PSTLROOT/../tbb/bin/tbbvars.csh ) then + source $PSTLROOT/../tbb/bin/tbbvars.csh; +endif + +if (! $?CPATH) then + setenv CPATH "${PSTLROOT}/include" +else + setenv CPATH "${PSTLROOT}/include:$CPATH" +endif diff --git a/mac/pstlvars.sh b/mac/pstlvars.sh new file mode 100644 index 00000000000..e3dc4517dde --- /dev/null +++ b/mac/pstlvars.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# + +export PSTLROOT="SUBSTITUTE_INSTALL_DIR_HERE" + +if [ -e $PSTLROOT/../tbb/bin/tbbvars.sh ]; then + . $PSTLROOT/../tbb/bin/tbbvars.sh +fi + +if [ -z "${CPATH}" ]; then + CPATH="${PSTLROOT}/include"; export CPATH +else + CPATH="${PSTLROOT}/include:$CPATH"; export CPATH +fi diff --git a/windows/pstlvars.bat b/windows/pstlvars.bat new file mode 100644 index 00000000000..0c2395d199c --- /dev/null +++ b/windows/pstlvars.bat @@ -0,0 +1,70 @@ +@echo off +REM +REM Copyright (c) 2017 Intel Corporation +REM +REM Licensed under the Apache License, Version 2.0 (the "License"); +REM you may not use this file except in compliance with the License. +REM You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. +REM +REM +REM +REM +REM + +set SCRIPT_NAME=%~nx0 +set PSTL_BIN_DIR=%~d0%~p0 +set PSTLROOT=%PSTL_BIN_DIR%.. + +:: Set the default arguments +set PSTL_TARGET_ARCH= +set PSTL_TARGET_VS= + +:ParseArgs +:: Parse the incoming arguments +if /i "%1"=="" goto SetEnv +if /i "%1"=="ia32" (set PSTL_TARGET_ARCH=ia32) & shift & goto ParseArgs +if /i "%1"=="intel64" (set PSTL_TARGET_ARCH=intel64) & shift & goto ParseArgs +if /i "%1"=="vs2013" (set PSTL_TARGET_VS=vs2013) & shift & goto ParseArgs +if /i "%1"=="vs2015" (set PSTL_TARGET_VS=vs2015) & shift & goto ParseArgs +if /i "%1"=="vs2017" (set PSTL_TARGET_VS=vs2017) & shift & goto ParseArgs +if /i "%1"=="all" (set PSTL_TARGET_VS=all) & shift & goto ParseArgs +:: for any other incoming arguments values +goto Syntax + +:SetEnv +:: target architecture is a required argument +if "%PSTL_TARGET_ARCH%"=="" goto Syntax +:: PSTL_TARGET_VS default value is 'vc_mt' (all) +if "%PSTL_TARGET_VS%"=="" set PSTL_TARGET_VS=all + +if exist "%PSTLROOT%\..\tbb\bin\tbbvars.bat" @call "%PSTLROOT%\..\tbb\bin\tbbvars.bat" %PSTL_TARGET_ARCH% %PSTL_TARGET_VS% + +set INCLUDE=%PSTLROOT%\include;%INCLUDE% +set CPATH=%PSTLROOT%\include;%CPATH% + +goto End + +:Syntax +echo Syntax: +echo %SCRIPT_NAME% ^ ^ +echo ^ must be one of the following +echo ia32 : Set up for IA-32 architecture +echo intel64 : Set up for Intel(R) 64 architecture +echo ^ should be one of the following +echo vs2013 : Set to use with Microsoft Visual Studio 2013 runtime DLLs +echo vs2015 : Set to use with Microsoft Visual Studio 2015 runtime DLLs +echo vs2017 : Set to use with Microsoft Visual Studio 2017 runtime DLLs +echo all : Set PSTL to use TBB statically linked with Microsoft Visual C++ runtime +echo if ^ is not set PSTL will use TBB statically linked with Microsoft Visual C++ runtime. +exit /B 1 + +:End +exit /B 0 \ No newline at end of file