diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..412eeda7
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,22 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Custom for Visual Studio
+*.cs     diff=csharp
+*.sln    merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.fsproj merge=union
+*.dbproj merge=union
+
+# Standard to msysgit
+*.doc	 diff=astextplain
+*.DOC	 diff=astextplain
+*.docx diff=astextplain
+*.DOCX diff=astextplain
+*.dot  diff=astextplain
+*.DOT  diff=astextplain
+*.pdf  diff=astextplain
+*.PDF	 diff=astextplain
+*.rtf	 diff=astextplain
+*.RTF	 diff=astextplain
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..620d3dc8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+# Compiled Object files
+*.slo
+*.lo
+*.o
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644
index 00000000..a88cad8e
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,204 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+clFFT Readme
+
+Version:       1.10
+Release Date:  April 2013
+
+ChangeLog:
+
+____________
+Current Version:
+  * This release tested using the 9.012 runtime driver and the 2.8 APPSDK
+  
+____________
+Version 1.8.291:
+Fixed:
+  * Memory leaks affecting use cases where 'clfftEnqueueTransform' is used in a loop
+	  
+____________
+Version 1.8.269 (beta):
+New:
+  * clFFT now supports real-to-complex and complex-to-real transforms;
+      refer to documentation for details
+  * This release tested using the 12.4 Catalyst software suite
+	  
+Known Issues:
+  * Some degradation in performance of real transforms due to known
+      runtime/driver issues
+  * Failures in real transforms have been seen on 7xxx series GPUs with certain
+      problem sizes involving powers of 3 and 5  
+  
+____________
+Version 1.6.244:
+Fixed:
+  * Failures observed in v1.6.236 in backward transforms of certain power of 2
+      (involving radix 4 and radix 8) problem sizes.
+	  
+____________
+Version 1.6.236:
+New:
+  * Performance of the FFT library has been improved for Radix-2 1D and 2D transforms
+  * Support for R4XXX GPUs is deprecated and no longer tested
+  * Preview: Support for AMD Radeon™ HD7000 series GPUs
+  * This release tested using the 8.92 runtime driver and the 2.6 APP SDK
+____________
+Version 1.4:
+New:
+  * clFFT now supports transform lengths whose factors consist exclusively 
+      of powers of 2, 3, and 5
+  * clFFT supports double precision data types
+  * clFFT executes on OpenCL 1.0 compliant devices
+  * This release tested using the 8.872 runtime driver and the 2.5 APP SDK
+  * A helper bash script appmlEnv.sh has been added to the root installation
+      directory to assist in properly setting up a terminal environment to 
+      execute clFFT samples
+
+Fixed:
+  * If the library is required to allocate a temporary buffer, and the user does
+      not specify a temporary buffer on the Enqueue call, the library will 
+      allocate a temporary buffer internally and the lifetime of that temporary 
+      buffer is managed by the lifetime of the FFT plan; deleting the plan will 
+      release the buffer.
+  * Test failures on CPU device for 32-bit systems  (Windows/Linux) 
+
+Known Issues:
+  * Failures have been seen on graphics cards using R4550 (RV710) GPUs.
+  
+____________
+Version 1.2:
+New:
+  * Reduced the number of internal LDS bank conflicts for our 1D FFT transforms,
+      increasing performance.
+  * Padded reads/writes to global memory, decreasing bank conflicts and 
+      increasing performance on 2D transforms.
+  * This release tested using the 8.841 runtime driver and the 2.4 APP SDK
+
+Fixed:
+  * Failures have been seen attempting to queue work on the second GPU device on
+      a multi GPU 5970 card on Linux.
+
+Known Issues:
+  * It is recommended that users query for and explicitely create an 
+      intermediate buffer if clFFT requires one.  If the library creates the 
+      intermediate buffer internally, a race condition may occur on freeing the 
+      buffer on lower end hardware.
+  * Failures have been seen on graphics cards using R4550 (RV710) GPUs.
+  * Test failures on CPU device for 32-bit systems  (Windows/Linux) 
+  * It is recommended that windows users uninstall previous version of clFFT 
+      before installing newer versions.  Otherwise, Add/Remove programs only 
+      removes the latest version.  Linux users can delete the install directory.
+
+____________
+Version 1.0:
+  * Initial release, available on all platforms
+
+Known Issues:
+  * Failures have been seen attempting to queue work on the second GPU device on
+      a multi GPU 5970 card on Linux.
+_____________________
+Building the Samples:
+
+To install the Linux versions of clFFT, uncompress the initial download and 
+  then execute the install script.
+
+For example:
+  tar -xf clFFT-${version}.tar.gz
+      - This installs three files into the local directory, one being an 
+        executable bash script.
+
+  sudo mkdir /opt/clFFT-${version}
+      - This pre-creates the install directory with proper permissions in /opt 
+        if it is to be installed there (This is the default).
+
+  ./install-clFFT-${version}.sh
+      - This prints an EULA and uncompresses files into the chosen install 
+        directory.
+
+  cd ${installDir}/bin64
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OpenCLLibDir}:${clfftLibDir}
+      - Export library dependencies to resolve all external linkages to the 
+        client program. The user can create a bash script to help automate this 
+        procedure.
+
+  ./Client -h
+      - Understand the command line options that are available to the user 
+        through the sample client.
+
+  ./Client -iv
+      - Watch for the version strings to print out; watch for 
+        'Client Test *****PASS*****' to print out.
+
+The sample program does not ship with native build files. Instead, a CMake
+file is shipped, and users generate a native build file for their system.
+
+For example:
+  cd ${installDir}
+  mkdir samplesBin/
+      - This creates a sister directory to the samples directory that will house
+        the native makefiles and the generated files from the build.
+
+  cd samplesBin/
+  ccmake ../samples/
+      - ccmake is a curses-based cmake program. It takes a parameter that 
+        specifies the location of the source code to compile.
+      - Hit 'c' to configure for the platform; ensure that the dependencies to 
+        external libraries are satisfied, including paths to 'ATI Stream SDK' 
+        and 'Boost'.
+      - After dependencies are satisfied, hit 'c' again to finalize configure 
+        step, then hit 'g' to generate makefile and exit ccmake.
+
+  make help
+      - Look at the available options for make.
+
+  make
+      - Build the sample client program.
+
+  ./clfft.Sample -iv
+      - Watch for the version strings to print out; watch for 
+        'Client Test *****PASS*****' to print out.
+_______________________________________________________________________________
+(C) 2010-2013 Advanced Micro Devices, Inc. All rights reserved. AMD, the AMD 
+Arrow logo, ATI, the ATI logo, Radeon, FireStream, FireGL, Catalyst, and 
+combinations thereof are trademarks of Advanced Micro Devices, Inc. Microsoft 
+(R), Windows, and Windows Vista (R) are registered trademarks of Microsoft 
+Corporation in the U.S. and/or other jurisdictions. OpenCL and the OpenCL logo 
+are trademarks of Apple Inc. used by permission by Khronos. Other names are for 
+informational purposes only and may be trademarks of their respective owners.
+
+The contents of this document are provided in connection with Advanced Micro 
+Devices, Inc. ("AMD") products. AMD makes no representations or warranties with 
+respect to the accuracy or completeness of the contents of this publication and 
+reserves the right to make changes to specifications and product descriptions 
+at any time without notice. The information contained herein may be of a 
+preliminary or advance nature and is subject to change without notice. No 
+license, whether express, implied, arising by estoppel or otherwise, to any 
+intellectual property rights is granted by this publication. Except as set forth
+in AMD's Standard Terms and Conditions of Sale, AMD assumes no liability 
+whatsoever, and disclaims any express or implied warranty, relating to its 
+products including, but not limited to, the implied warranty of 
+merchantability, fitness for a particular purpose, or infringement of any 
+intellectual property right.
+
+AMD's products are not designed, intended, authorized or warranted for use as 
+components in systems intended for surgical implant into the body, or in other 
+applications intended to support or sustain life, or in any other application 
+in which the failure of AMD's product could create a situation where personal 
+injury, death, or severe property or environmental damage may occur. AMD 
+reserves the right to discontinue or make changes to its products at any time 
+without notice.
+_______________________________________________________________________________
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..f635c554
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,36 @@
+## Contributor guidelines
+
+Contributing code to this project is intended to be light weight and intuitive to users familiar with GitHub to actively encourage contributions, but a process is documented and should be followed to prevent chaos, confusion and despair.  
+
+## The mechanics of contributing code
+Firstly, in order to contribute code to this project, a contributor must have a valid and current [GitHub account](https://help.github.com/articles/set-up-git) available to use.  Given an account,
+* The potential contributor forks this project into his/her account following the traditional [forking](https://help.github.com/articles/fork-a-repo) model native to GitHub
+* After forking, the contributor [clones their repository](https://help.github.com/articles/create-a-repo) locally on their machine
+* Code is developed and checked into the contributor's repository.  These commits are eventually pushed upstream to their GitHub repository
+* The contributor then issues a [pull-request](https://help.github.com/articles/using-pull-requests) against the **develop** branch of this repository, which is the [git flow](http://nvie.com/posts/a-successful-git-branching-model/) workflow which is well suited for working with GitHub
+    * A [git extention](https://github.com/nvie/gitflow) has been developed to ease the use of the 'git flow' methodology, but requires manual installation by the user.  Refer to the projects wiki
+
+At this point, the repository maintainers will be notified by GitHub that a 'pull request' exists pending against their repository.  A code review should be completed within a few days, depending on the scope of submitted code, and the code will either be accepted, rejected or commented on for extra feedback.
+
+## Code submission guidelines
+We want to ensure that the project code base maintains a level of quality over time, such that future contributors find it as easy to jump into the code as hopefully it is today.  As such, pull requests should 
+* remember that clMath is a project licensed under the [Apache License, Version 2.0]( http://www.apache.org/licenses/LICENSE-2.0 ).  If you are not already familiar, please review the license before issuing a pull request.  We intend this project to be open to external contributors, and encourage developers to contribute code back that they believe will provide value to the overall community.  We will interpret an explicit 'pull request' back to this repository as an implicit acknowledgement from the contributor that they wish to share the code with the community under the terms of the Apache license v2.0.
+* follow the [code style guidelines]( ) of the project as posted to the project wiki.  Unfortunately, there was no unifying code guidelines defined between the BLAS & FFT projects, but code submissions should not mix styles within an individual file.  We have since defined and posted a code style guideline for the projects and we expect the code to slowly transition to the new
+guidelines over time
+    *  separate check-ins that modify a files style from the ones that add/change/delete code.
+* target the **develop** branch in the repository
+* ensure that the [code properly builds]( https://github.com/kknox/clFFT/wiki/Build )
+* cannot break existing test cases
+    * we encourage contributors to [run all tests]( https://github.com/kknox/clFFT/wiki/Testing ) on their end before the pull-request
+        * if possible, upload the test results associated with the pull request to a personal [gist repository]( https://gist.github.com/ ) and insert a link to the test results in the pull request so that collaborators can browse the results
+        * if no test results are provided with the pull request, official collaborators will run the test suite on their test machines against the patch before we will accept the pull-request
+            * if we detect failing test cases, we will request that the code associated with the pull request be fixed before the pull request will be merged
+    * if new functionality is introduced with the pull request, sufficient test cases should be added to verify the new functionality is correct
+        * new tests should integrate with the existing [googletest framework]( https://code.google.com/p/googletest/wiki/Primer ) located in the src/tests directory of the repo
+        * if the collaborators feel the new tests do not provide sufficient coverage, feedback on the pull request will be left with suggestions on how to improve the tests before the pull request will be merged
+
+Pull requests will be reviewed by the set of collaborators that are assigned for the repository.  Pull requests may be accepted, declined or a conversation may start on the pull request thread with feedback.  If the pull request is trivial and all the submission guidelines defined above are honored, the pull request may be accepted without delay.  If the pull request is good, but the guidelines defined above are not followed, the collaborators may leave feedback on the pull request and engage in a conversation with the contributor with what they can do to improve the pull request.  At any time, collaborators may decline a pull request if they decide the contribution is not appropriate for the project, or the feedback from reviewers on a pull request is not being addressed in an appropriate amount of time.
+
+## Is it possible to become an official collaborator of the repository?
+Yes, we hope to promote trusted members of the community, who have proven themselves to be competent and request to take on the extra responsibility to be official collaborators of the project.  When an individual requests to be an official collaborator, current project collaborators will browse through the history of the requester's prior pull requests and take a vote amongst themselves if the requester should be promoted to collaborator.  These individuals will then have the right to approve/decline pull requests and help shape the path that the project goes.  It is worth noting, that on GitHub everybody has read-only access to the source and that everybody has the ability to issue a pull request to contribute to the project.  The benefit of being a repository collaborator allows you to be able to be able to manage other peoples pull requests.
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..d6456956
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 00000000..edd33262
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,5 @@
+AMD clFFT
+    Copyright 2013 Advanced Micro Devices, Inc.
+
+    This product includes software developed at
+    Advanced Micro Devices, Inc. (http://www.amd.com).
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..328a3aa5
--- /dev/null
+++ b/README.md
@@ -0,0 +1,139 @@
+clFFT
+=====
+
+clMath is a software library containing FFT and BLAS functions written in OpenCL. In addition to GPU devices, the libraries also support running on CPU devices to facilitate debugging and multicore programming.
+
+<a href="http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-math-libraries/">APPML 1.10</a> is the most current generally available version of the library, and pre-built binaries are available for download on both Linux and Windows platforms.
+
+## Introduction to clFFT
+
+The FFT is an implementation of the Discrete Fourier Transform (DFT) that makes use of symmetries in the FFT definition to reduce the mathematical intensity required from O(N<sup>2</sup>) to O(N log<sub>2</sub>( N )) when the sequence length N is the product of small prime factors. Currently, there is no standard API for FFT routines. Hardware vendors usually provide a set of high-performance FFTs optimized for their systems: no two vendors employ the same interfaces for their FFT routines. clFFT provides a set of FFT routines that are optimized for AMD graphics processors, but also are functional across CPU and other compute devices.
+
+The clFFT library is an open source OpenCL library implementation of discrete Fast Fourier Transforms. It:
+
+* Provides a fast and accurate platform for calculating discrete FFTs. 
+* Works on CPU or GPU backends. 
+* Supports in-place or out-of-place transforms. 
+* Supports 1D, 2D, and 3D transforms with a batch size that can be greater than 1. 
+* Supports planar (real and complex components in separate arrays) and interleaved (real and complex components as a pair contiguous in memory) formats. 
+* Supports dimension lengths that can be any mix of powers of 2, 3, and 5. 
+* Supports single and double precision floating point formats.
+
+## clFFT Wiki
+The [project wiki](https://github.com/kknox/clFFT/wiki) contains helpful documentation, including a [build primer](https://github.com/kknox/clFFT/wiki/Build)
+
+## Contributing code
+Please refer to and read the [Contributing](CONTRIBUTING.md) document for guidelines on how to contribute code to this open source project
+
+## License
+The source for clFFT is licensed under the [Apache License, Version 2.0]( http://www.apache.org/licenses/LICENSE-2.0 )
+
+## Example
+The simple example below shows how to use clFFT to compute an simple 1D forward transform
+
+```c
+#include <stdlib.h>
+
+/* No need to explicitely include the OpenCL headers */
+#include <clFFT.h>
+
+int main( void )
+{
+    cl_int err;
+    cl_platform_id platform = 0;
+    cl_device_id device = 0;
+    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    cl_context ctx = 0;
+    cl_command_queue queue = 0;
+    cl_mem bufX;
+	float *X;
+    cl_event event = NULL;
+    int ret = 0;
+	size_t N = 16;
+	
+	/* FFT library realted declarations */
+	clfftPlanHandle planHandle;
+	clfftDim dim = CLFFT_1D;
+	size_t clLengths[1] = {N};
+                
+    /* Setup OpenCL environment. */
+    err = clGetPlatformIDs( 1, &platform, NULL );
+    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );
+
+    props[1] = (cl_context_properties)platform;
+    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
+    queue = clCreateCommandQueue( ctx, device, 0, &err );
+
+    /* Setup clFFT. */
+	clfftSetupData fftSetup;
+	err = clfftInitSetupData(&fftSetup);
+	err = clfftSetup(&fftSetup);
+
+	/* Allocate host & initialize data. */
+	/* Only allocation shown for simplicity. */
+	X = (float *)malloc(N * 2 * sizeof(*X));
+                
+    /* Prepare OpenCL memory objects and place data inside them. */
+    bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err );
+
+    err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0,
+	N * 2 * sizeof( *X ), X, 0, NULL, NULL );
+
+	/* Create a default plan for a complex FFT. */
+	err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
+	
+	/* Set plan parameters. */
+	err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
+	err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
+	err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);
+                                
+    /* Bake the plan. */
+	err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
+	
+	/* Execute the plan. */
+	err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
+
+	/* Wait for calculations to be finished. */
+	err = clFinish(queue);
+
+	/* Fetch results of calculations. */
+	err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL );
+
+    /* Release OpenCL memory objects. */
+    clReleaseMemObject( bufX );
+
+	free(X);
+	
+	/* Release the plan. */
+	err = clfftDestroyPlan( &planHandle );
+
+    /* Release clFFT library. */
+    clfftTeardown( );
+
+    /* Release OpenCL working objects. */
+    clReleaseCommandQueue( queue );
+    clReleaseContext( ctx );
+
+    return ret;
+}
+```
+
+## Build dependencies
+### Library for Windows
+*  WindowsÂ® 7/8
+*  Visual Studio 2010 SP1, 2012
+*  Latest CMake
+*  An OpenCL SDK, such as APP SDK 2.8
+
+### Library for Linux
+*  GCC 4.6 and onwards
+*  Latest CMake
+*  An OpenCL SDK, such as APP SDK 2.8
+
+### Test infrastructure
+* Latest Googletest
+* Latest FFTW 
+* Latest Boost
+
+### Performance infrastructure
+* Python
\ No newline at end of file
diff --git a/doc/clFFT.doxy b/doc/clFFT.doxy
new file mode 100644
index 00000000..366f9c75
--- /dev/null
+++ b/doc/clFFT.doxy
@@ -0,0 +1,1894 @@
+# Doxyfile 1.8.4
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file 
+# that follow. The default is UTF-8 which is also the encoding used for all 
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the 
+# iconv built into libc) for the transcoding. See 
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or sequence of words) that should 
+# identify the project. Note that if you do not use Doxywizard you need 
+# to put quotes around the project name if it contains spaces.
+
+PROJECT_NAME           = clMathFft
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
+# This could be handy for archiving the generated documentation or 
+# if some version control system is used.
+
+PROJECT_NUMBER         = 2.0
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description 
+# for a project that appears at the top of each page and should give viewer 
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = 
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is 
+# included in the documentation. The maximum height of the logo should not 
+# exceed 55 pixels and the maximum width should not exceed 200 pixels. 
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO           = 
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
+# base path where the generated documentation will be put. 
+# If a relative path is entered, it will be relative to the location 
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = ../../bin/clFFT.doxy
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
+# 4096 sub-directories (in 2 levels) under the output directory of each output 
+# format and will distribute the generated files over these directories. 
+# Enabling this option can be useful when feeding doxygen a huge amount of 
+# source files, where putting all generated files in the same directory would 
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
+# documentation generated by doxygen is written. Doxygen will use this 
+# information to generate all constant output in the proper language. 
+# The default language is English, other supported languages are: 
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, 
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English 
+# messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian, 
+# Persian, Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, 
+# Slovak, Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
+# include brief member descriptions after the members that are listed in 
+# the file and class documentation (similar to JavaDoc). 
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
+# the brief description of a member or function before the detailed description. 
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator 
+# that is used to form the text in various listings. Each string 
+# in this list, if found as the leading text of the brief description, will be 
+# stripped from the text and the result after processing the whole list, is 
+# used as the annotated text. Otherwise, the brief description is used as-is. 
+# If left blank, the following values are used ("$name" is automatically 
+# replaced with the name of the entity): "The $name class" "The $name widget" 
+# "The $name file" "is" "provides" "specifies" "contains" 
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = 
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
+# Doxygen will generate a detailed section even if there is only a brief 
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
+# inherited members of a class in the documentation of that class as if those 
+# members were ordinary class members. Constructors, destructors and assignment 
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
+# path before files name in the file list and in the header files. If set 
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
+# can be used to strip a user-defined part of the path. Stripping is 
+# only done if one of the specified strings matches the left-hand part of 
+# the path. The tag can be used to show relative paths in the file list. 
+# If left blank the directory from which doxygen is run is used as the 
+# path to strip. Note that you specify absolute paths here, but also 
+# relative paths, which will be relative from the directory where doxygen is 
+# started.
+
+STRIP_FROM_PATH        = 
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
+# the path mentioned in the documentation of a class, which tells 
+# the reader which header file to include in order to use a class. 
+# If left blank only the name of the header file containing the class 
+# definition is used. Otherwise one should specify the include paths that 
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    = 
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
+# (but less readable) file names. This can be useful if your file system 
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments will behave just like regular Qt-style comments 
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will 
+# interpret the first line (until the first dot) of a Qt-style 
+# comment as the brief description. If set to NO, the comments 
+# will behave just like regular Qt-style comments (thus requiring 
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
+# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
+# comments) as a brief description. This used to be the default behaviour. 
+# The new default is to treat a multi-line C++ comment block as a detailed 
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
+# member inherits the documentation from any documented member that it 
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
+# a new page for each member. If set to NO, the documentation of a member will 
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts 
+# as commands in the documentation. An alias has the form "name=value". 
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
+# put the command \sideeffect (or @sideeffect) in the documentation, which 
+# will result in a user-defined paragraph with heading "Side Effects:". 
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                = 
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only). 
+# A mapping has the form "name=value". For example adding 
+# "class=itcl::class" will allow you to use the command class in the 
+# itcl::class meaning.
+
+TCL_SUBST              = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
+# sources only. Doxygen will then generate output that is more tailored for C. 
+# For instance, some of the names that are used will be different. The list 
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
+# sources only. Doxygen will then generate output that is more tailored for 
+# Java. For instance, namespaces will be presented as packages, qualified 
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 
+# sources only. Doxygen will then generate output that is more tailored for 
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 
+# sources. Doxygen will then generate output that is tailored for 
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it 
+# parses. With this tag you can assign which parser to use for a given 
+# extension. Doxygen has a built-in mapping, but you can override or extend it 
+# using this tag. The format is ext=language, where ext is a file extension, 
+# and language is one of the parsers supported by doxygen: IDL, Java, 
+# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, 
+# C++. For instance to make doxygen treat .inc files as Fortran files (default 
+# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note 
+# that for custom extensions you also need to set FILE_PATTERNS otherwise the 
+# files are not read by doxygen.
+
+EXTENSION_MAPPING      = 
+
+# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all 
+# comments according to the Markdown format, which allows for more readable 
+# documentation. See http://daringfireball.net/projects/markdown/ for details. 
+# The output of markdown processing is further processed by doxygen, so you 
+# can mix doxygen, HTML, and XML commands with Markdown formatting. 
+# Disable only in case of backward compatibilities issues.
+
+MARKDOWN_SUPPORT       = YES
+
+# When enabled doxygen tries to link words that correspond to documented 
+# classes, or namespaces to their corresponding documentation. Such a link can 
+# be prevented in individual cases by by putting a % sign in front of the word 
+# or globally by setting AUTOLINK_SUPPORT to NO.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 
+# to include (a tag file for) the STL sources as input, then you should 
+# set this tag to YES in order to let doxygen match functions declarations and 
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
+# func(std::string) {}). This also makes the inheritance and collaboration 
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to 
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 
+# Doxygen will parse them like normal C++ but will assume all classes use public 
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate 
+# getter and setter methods for a property. Setting this option to YES (the 
+# default) will make doxygen replace the get and set methods by a property in 
+# the documentation. This will only work if the methods are indeed getting or 
+# setting a simple type. If this is not the case, or you want to show the 
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
+# tag is set to YES, then doxygen will reuse the documentation of the first 
+# member in the group (if any) for the other members of the group. By default 
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
+# the same type (for instance a group of public functions) to be put as a 
+# subgroup of that type (e.g. under the Public Functions section). Set it to 
+# NO to prevent subgrouping. Alternatively, this can be done per class using 
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and 
+# unions are shown inside the group in which they are included (e.g. using 
+# @ingroup) instead of on a separate page (for HTML and Man pages) or 
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and 
+# unions with only public data fields or simple typedef fields will be shown 
+# inline in the documentation of the scope in which they are defined (i.e. file, 
+# namespace, or group documentation), provided this scope is documented. If set 
+# to NO (the default), structs, classes, and unions are shown on a separate 
+# page (for HTML and Man pages) or section (for LaTeX and RTF).
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 
+# is documented as struct, union, or enum with the name of the typedef. So 
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct 
+# with name TypeT. When disabled the typedef will appear as a member of a file, 
+# namespace, or class. And the struct will be named TypeS. This can typically 
+# be useful for C code in case the coding convention dictates that all compound 
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = YES
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This 
+# cache is used to resolve symbols given their name and scope. Since this can 
+# be an expensive process and often the same symbol appear multiple times in 
+# the code, doxygen keeps a cache of pre-resolved symbols. If the cache is too 
+# small doxygen will become slower. If the cache is too large, memory is wasted. 
+# The cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid 
+# range is 0..9, the default is 0, corresponding to a cache size of 2^16 = 65536 
+# symbols.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
+# documentation are documented, even if no documentation was available. 
+# Private class members and static file members will be hidden unless 
+# the EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal 
+# scope will be included in the documentation.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
+# defined locally in source files will be included in the documentation. 
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local 
+# methods, which are defined in the implementation section but not in 
+# the interface are included in the documentation. 
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be 
+# extracted and appear in the documentation as a namespace called 
+# 'anonymous_namespace{file}', where file will be replaced with the base 
+# name of the file that contains the anonymous namespace. By default 
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
+# undocumented members of documented classes, files or namespaces. 
+# If set to NO (the default) these members will be included in the 
+# various overviews, but no documentation section is generated. 
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = YES
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
+# undocumented classes that are normally visible in the class hierarchy. 
+# If set to NO (the default) these classes will be included in the various 
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = YES
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
+# friend (class|struct|union) declarations. 
+# If set to NO (the default) these declarations will be included in the 
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
+# documentation blocks found inside the body of a function. 
+# If set to NO (the default) these blocks will be appended to the 
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation 
+# that is typed after a \internal command is included. If the tag is set 
+# to NO (the default) then the documentation will be excluded. 
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
+# file names in lower-case letters. If set to YES upper-case letters are also 
+# allowed. This is useful if you have classes or files whose names only differ 
+# in case and if your file system supports case sensitive file names. Windows 
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
+# will show members with their full class and namespace scopes in the 
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
+# will put a list of the files that are included by a file in the documentation 
+# of that file.
+
+SHOW_INCLUDE_FILES     = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen 
+# will list include files with double quotes in the documentation 
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
+# will sort the (detailed) documentation of file and class members 
+# alphabetically by member name. If set to NO the members will appear in 
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
+# brief documentation of file, namespace and class members alphabetically 
+# by member name. If set to NO (the default) the members will appear in 
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen 
+# will sort the (brief and detailed) documentation of class members so that 
+# constructors and destructors are listed first. If set to NO (the default) 
+# the constructors will appear in the respective orders defined by 
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. 
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO 
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 
+# hierarchy of group names into alphabetical order. If set to NO (the default) 
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
+# sorted by fully-qualified names, including namespaces. If set to 
+# NO (the default), the class list will be sorted only by class name, 
+# not including the namespace part. 
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. 
+# Note: This option applies only to the class list, not to the 
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to 
+# do proper type resolution of all parameters of a function it will reject a 
+# match between the prototype and the implementation of a member function even 
+# if there is only one candidate or it is obvious which candidate to choose 
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen 
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or 
+# disable (NO) the todo list. This list is created by putting \todo 
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or 
+# disable (NO) the test list. This list is created by putting \test 
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or 
+# disable (NO) the bug list. This list is created by putting \bug 
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
+# disable (NO) the deprecated list. This list is created by putting 
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional 
+# documentation sections, marked by \if section-label ... \endif 
+# and \cond section-label ... \endcond blocks.
+
+ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
+# the initial value of a variable or macro consists of for it to appear in 
+# the documentation. If the initializer consists of more lines than specified 
+# here it will be hidden. Use a value of 0 to hide initializers completely. 
+# The appearance of the initializer of individual variables and macros in the 
+# documentation can be controlled using \showinitializer or \hideinitializer 
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
+# at the bottom of the documentation of classes and structs. If set to YES the 
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. 
+# This will remove the Files entry from the Quick Index and from the 
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the 
+# Namespaces page.  This will remove the Namespaces entry from the Quick Index 
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
+# doxygen should invoke to get the current version for each file (typically from 
+# the version control system). Doxygen will invoke the program by executing (via 
+# popen()) the command <command> <input-file>, where <command> is the value of 
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
+# provided by doxygen. Whatever the program writes to standard output 
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    = 
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed 
+# by doxygen. The layout file controls the global structure of the generated 
+# output files in an output format independent way. To create the layout file 
+# that represents doxygen's defaults, run doxygen with the -l option. 
+# You can optionally specify a file name after the option, if omitted 
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE            = 
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files 
+# containing the references data. This must be a list of .bib files. The 
+# .bib extension is automatically appended if omitted. Using this command 
+# requires the bibtex tool to be installed. See also 
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style 
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this 
+# feature you need bibtex and perl available in the search path. Do not use 
+# file names with spaces, bibtex cannot handle them.
+
+CITE_BIB_FILES         = 
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated 
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are 
+# generated by doxygen. Possible values are YES and NO. If left blank 
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
+# potential errors in the documentation, such as not documenting some 
+# parameters in a documented function, or documenting parameters that 
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for 
+# functions that are documented, but have no documentation for their parameters 
+# or return value. If set to NO (the default) doxygen will only warn about 
+# wrong or incomplete parameter documentation, but not about the absence of 
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that 
+# doxygen can produce. The string should contain the $file, $line, and $text 
+# tags, which will be replaced by the file and line number from which the 
+# warning originated and the warning text. Optionally the format may contain 
+# $version, which will be replaced by the version of the file (if it could 
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning 
+# and error messages should be written. If left blank the output is written 
+# to stderr.
+
+WARN_LOGFILE           = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces.
+
+INPUT                  = ../src/library/mainpage.h \
+                         ../src/include/clFFT.h
+
+# This tag can be used to specify the character encoding of the source files 
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
+# also the default input encoding. Doxygen uses libiconv (or the iconv built 
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the 
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank the following patterns are tested: 
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh 
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py 
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS          = 
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
+# should be searched for input files as well. Possible values are YES and NO. 
+# If left blank NO is used.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be 
+# excluded from the INPUT source files. This way you can easily exclude a 
+# subdirectory from a directory tree whose root is specified with the INPUT tag. 
+# Note that relative paths are relative to the directory from which doxygen is 
+# run.
+
+EXCLUDE                = 
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or 
+# directories that are symbolic links (a Unix file system feature) are excluded 
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the 
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
+# certain files from those directories. Note that the wildcards are matched 
+# against the file with absolute path, so to exclude all test directories 
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = */.hg* \
+                         */.svn*
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 
+# (namespaces, classes, functions, etc.) that should be excluded from the 
+# output. The symbol name can be a fully qualified name, a word, or if the 
+# wildcard * is used, a substring. Examples: ANamespace, AClass, 
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or 
+# directories that contain example code fragments that are included (see 
+# the \include command).
+
+EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included.
+
+EXAMPLE_PATTERNS       = 
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
+# searched for input files to be used with the \include or \dontinclude 
+# commands irrespective of the value of the RECURSIVE tag. 
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or 
+# directories that contain image that are included in the documentation (see 
+# the \image command).
+
+IMAGE_PATH             = .
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should 
+# invoke to filter for each input file. Doxygen will invoke the filter program 
+# by executing (via popen()) the command <filter> <input-file>, where <filter> 
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
+# input file. Doxygen will then use the output that the filter program writes 
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be ignored. 
+# Note that the filter must not add or remove lines; it is applied before the 
+# code is scanned, but not when the output code is generated. If lines are added 
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER           = 
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
+# basis.  Doxygen will compare the file name with each pattern and apply the 
+# filter if there is a match.  The filters are a list of the form: 
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
+# info on how filters are used. If FILTER_PATTERNS is empty or if 
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
+# INPUT_FILTER) will be used to filter the input files when producing source 
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file 
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) 
+# and it is also possible to disable source filtering for a specific pattern 
+# using *.ext= (so without naming a filter). This option only has effect when 
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS = 
+
+# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that 
+# is part of the input, its contents will be placed on the main page 
+# (index.html). This can be useful if you have a project on for instance GitHub 
+# and want reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = 
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
+# be generated. Documented entities will be cross-referenced with these sources. 
+# Note: To get rid of all source code in the generated output, make sure also 
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body 
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
+# doxygen to hide any special comment blocks from generated source code 
+# fragments. Normal C, C++ and Fortran comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES 
+# then for each documented function all documented 
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES 
+# then for each documented function all documented entities 
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) 
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from 
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will 
+# link to the source code.  Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code 
+# will point to the HTML generated by the htags(1) tool instead of doxygen 
+# built-in source browser. The htags tool is part of GNU's global source 
+# tagging system (see http://www.gnu.org/software/global/global.html). You 
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
+# will generate a verbatim copy of the header file for each class for 
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+# If CLANG_ASSISTED_PARSING is set to YES, then doxygen will use the clang parser 
+# for more acurate parsing at the cost of reduced performance. This can be 
+# particularly helpful with template rich C++ code for which doxygen's built-in 
+# parser lacks the necessairy type information.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command 
+# line options that you would normally use when invoking the compiler. Note that 
+# the include paths will already be set by doxygen for the files and directories 
+# specified at INPUT and INCLUDE_PATH.
+
+CLANG_OPTIONS          = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
+# of all compounds will be generated. Enable this if the project 
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all 
+# classes will be put under the same header in the alphabetical index. 
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard header. Note that when using a custom header you are responsible  
+# for the proper inclusion of any scripts and style sheets that doxygen 
+# needs, which is dependent on the configuration options used. 
+# It is advised to generate a default header using "doxygen -w html 
+# header.html footer.html stylesheet.css YourConfigFile" and then modify 
+# that header. Note that the header is subject to change so you typically 
+# have to redo this when upgrading to a newer version of doxygen or when 
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard footer.
+
+HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
+# style sheet that is used by each HTML page. It can be used to 
+# fine-tune the look of the HTML output. If left blank doxygen will 
+# generate a default style sheet. Note that it is recommended to use 
+# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this 
+# tag will in the future become obsolete.
+
+HTML_STYLESHEET        = 
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional 
+# user-defined cascading style sheet that is included after the standard 
+# style sheets created by doxygen. Using this option one can overrule 
+# certain style aspects. This is preferred over using HTML_STYLESHEET 
+# since it does not replace the standard style sheet and is therefor more 
+# robust against future updates. Doxygen will copy the style sheet file to 
+# the output directory.
+
+HTML_EXTRA_STYLESHEET  = 
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or 
+# other source files which should be copied to the HTML output directory. Note 
+# that these files will be copied to the base HTML output directory. Use the 
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these 
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that 
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES       = 
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. 
+# Doxygen will adjust the colors in the style sheet and background images 
+# according to this color. Hue is specified as an angle on a colorwheel, 
+# see http://en.wikipedia.org/wiki/Hue for more information. 
+# For instance the value 0 represents red, 60 is yellow, 120 is green, 
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. 
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of 
+# the colors in the HTML output. For a value of 0 the output will use 
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to 
+# the luminance component of the colors in the HTML output. Values below 
+# 100 gradually make the output lighter, whereas values above 100 make 
+# the output darker. The value divided by 100 is the actual gamma applied, 
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, 
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML 
+# page will contain the date and time when the page was generated. Setting 
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 
+# documentation will contain sections that can be hidden and shown after the 
+# page has loaded.
+
+HTML_DYNAMIC_SECTIONS  = YES
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of 
+# entries shown in the various tree structured indices initially; the user 
+# can expand and collapse entries dynamically later on. Doxygen will expand 
+# the tree to such a level that at most the specified number of entries are 
+# visible (unless a fully collapsed tree already exceeds this amount). 
+# So setting the number of entries 1 will produce a full collapsed tree by 
+# default. 0 is a special value representing an infinite number of entries 
+# and will result in a full expanded tree by default.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files 
+# will be generated that can be used as input for Apple's Xcode 3 
+# integrated development environment, introduced with OSX 10.5 (Leopard). 
+# To create a documentation set, doxygen will generate a Makefile in the 
+# HTML output directory. Running make will produce the docset in that 
+# directory and running "make install" will install the docset in 
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 
+# it at startup. 
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html 
+# for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 
+# feed. A documentation feed provides an umbrella under which multiple 
+# documentation sets from a single provider (such as a company or product suite) 
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 
+# should uniquely identify the documentation set bundle. This should be a 
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely 
+# identify the documentation publisher. This should be a reverse domain-name 
+# style string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
+# will be generated that can be used as input for tools like the 
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
+# be used to specify the file name of the resulting .chm file. You 
+# can add a path in front of the file if the result should not be 
+# written to the html output directory.
+
+CHM_FILE               = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
+# be used to specify the location (absolute path including file name) of 
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
+# controls if a separate .chi index file is generated (YES) or that 
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING 
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file 
+# content.
+
+CHM_INDEX_ENCODING     = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
+# controls whether a binary table of contents is generated (YES) or a 
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members 
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and 
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated 
+# that can be used as input for Qt's qhelpgenerator to generate a 
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can 
+# be used to specify the file name of the resulting .qch file. 
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               = 
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating 
+# Qt Help Project output. For more information please see 
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 
+# Qt Help Project output. For more information please see 
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to 
+# add. For more information please see 
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   = 
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the 
+# custom filter to add. For more information please see 
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters"> 
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  = 
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this 
+# project's 
+# filter section matches. 
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes"> 
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  = 
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can 
+# be used to specify the location of Qt's qhelpgenerator. 
+# If non-empty doxygen will try to run qhelpgenerator on the generated 
+# .qhp file.
+
+QHG_LOCATION           = 
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files  
+# will be generated, which together with the HTML files, form an Eclipse help 
+# plugin. To install this plugin and make it available under the help contents 
+# menu in Eclipse, the contents of the directory containing the HTML and XML 
+# files needs to be copied into the plugins directory of eclipse. The name of 
+# the directory within the plugins directory should be the same as 
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before 
+# the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin 
+# the directory name containing the HTML and XML files should also have 
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) 
+# at top of each HTML page. The value NO (the default) enables the index and 
+# the value YES disables it. Since the tabs have the same information as the 
+# navigation tree you can set this option to NO if you already set 
+# GENERATE_TREEVIEW to YES.
+
+DISABLE_INDEX          = YES
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index 
+# structure should be generated to display hierarchical information. 
+# If the tag value is set to YES, a side panel will be generated 
+# containing a tree-like index structure (just like the one that 
+# is generated for HTML Help). For this to work a browser that supports 
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). 
+# Windows users are probably better off using the HTML help feature. 
+# Since the tree basically has the same information as the tab index you 
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+
+GENERATE_TREEVIEW      = YES
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values 
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML 
+# documentation. Note that a value of 0 will completely suppress the enum 
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE   = 1
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
+# used to set the initial width (in pixels) of the frame in which the tree 
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open 
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of Latex formulas included 
+# as images in the HTML documentation. The default is 10. Note that 
+# when you change the font size after a successful doxygen run you need 
+# to manually remove any form_*.png images from the HTML output directory 
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images 
+# generated for formulas are transparent PNGs. Transparent PNGs are 
+# not supported properly for IE 6.0, but are supported on all modern browsers. 
+# Note that when changing this option you need to delete any form_*.png files 
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax 
+# (see http://www.mathjax.org) which uses client side Javascript for the 
+# rendering instead of using prerendered bitmaps. Use this if you do not 
+# have LaTeX installed or if you want to formulas look prettier in the HTML 
+# output. When enabled you may also need to install MathJax separately and 
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX            = YES
+
+# When MathJax is enabled you can set the default output format to be used for 
+# the MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and 
+# SVG. The default value is HTML-CSS, which is slower, but has the best 
+# compatibility.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the 
+# HTML output directory using the MATHJAX_RELPATH option. The destination 
+# directory should contain the MathJax.js script. For instance, if the mathjax 
+# directory is located at the same level as the HTML output directory, then 
+# MATHJAX_RELPATH should be ../mathjax. The default value points to 
+# the MathJax Content Delivery Network so you can quickly see the result without 
+# installing MathJax.  However, it is strongly recommended to install a local 
+# copy of MathJax from http://www.mathjax.org before deployment.
+
+MATHJAX_RELPATH        = http://www.mathjax.org/mathjax
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension 
+# names that should be enabled during MathJax rendering.
+
+MATHJAX_EXTENSIONS     = 
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript 
+# pieces of code that will be used on startup of the MathJax code.
+
+MATHJAX_CODEFILE       = 
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box 
+# for the HTML output. The underlying search engine uses javascript 
+# and DHTML and should work on any modern browser. Note that when using 
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets 
+# (GENERATE_DOCSET) there is already a search function so this one should 
+# typically be disabled. For large projects the javascript based search engine 
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be 
+# implemented using a web server instead of a web client using Javascript. 
+# There are two flavours of web server based search depending on the 
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for 
+# searching and an index file used by the script. When EXTERNAL_SEARCH is 
+# enabled the indexing and searching needs to be provided by external tools. 
+# See the manual for details.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP 
+# script for searching. Instead the search results are written to an XML file 
+# which needs to be processed by an external indexer. Doxygen will invoke an 
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain 
+# the search results. Doxygen ships with an example indexer (doxyindexer) and 
+# search engine (doxysearch.cgi) which are based on the open source search 
+# engine library Xapian. See the manual for configuration details.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server 
+# which will returned the search results when EXTERNAL_SEARCH is enabled. 
+# Doxygen ships with an example search engine (doxysearch) which is based on 
+# the open source search engine library Xapian. See the manual for configuration 
+# details.
+
+SEARCHENGINE_URL       = 
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed 
+# search data is written to a file for indexing by an external tool. With the 
+# SEARCHDATA_FILE tag the name of this file can be specified.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the 
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is 
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple 
+# projects and redirect the results back to the right project.
+
+EXTERNAL_SEARCH_ID     = 
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen 
+# projects other than the one defined by this configuration file, but that are 
+# all added to the same external search index. Each project needs to have a 
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id 
+# of to a relative location where the documentation can be found. 
+# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ...
+
+EXTRA_SEARCH_MAPPINGS  = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
+# invoked. If left blank `latex' will be used as the default command name. 
+# Note that when enabling USE_PDFLATEX this option is only used for 
+# generating bitmaps for formulas in the HTML output, but not in the 
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
+# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, letter, legal and 
+# executive. If left blank a4 will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           = 
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for 
+# the generated latex document. The footer should contain everything after 
+# the last chapter. If it is left blank doxygen will generate a 
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER           = 
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images 
+# or other source files which should be copied to the LaTeX output directory. 
+# Note that the files will be copied as-is; there are no commands or markers 
+# available.
+
+LATEX_EXTRA_FILES      = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
+# include the index chapters (such as File Index, Compound Index, etc.) 
+# in the output.
+
+LATEX_HIDE_INDICES     = YES
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include 
+# source code with syntax highlighting in the LaTeX output. 
+# Note that which sources are shown also depends on other settings 
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the 
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See 
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
+# The RTF output is optimized for Word 97 and may not look very pretty with 
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
+# RTF documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
+# will contain hyperlink fields. The RTF file will 
+# contain links (just like the HTML output) instead of page references. 
+# This makes the output suitable for online browsing using WORD or other 
+# programs which support those fields. 
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = YES
+
+# Load style sheet definitions from file. Syntax is similar to doxygen's 
+# config file, i.e. a series of assignments. You only have to provide 
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an rtf document. 
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
+# then it will generate one additional man file for each entity 
+# documented in the real man page(s). These additional files 
+# only source the real man page, but without them the man command 
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will 
+# generate an XML file that captures the structure of 
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify an XML DTD, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
+# dump the program listings (including syntax highlighting 
+# and cross-referencing information) to the XML output. Note that 
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files 
+# that can be used to generate PDF.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in 
+# front of it. If left blank docbook will be used as the default path.
+
+DOCBOOK_OUTPUT         = docbook
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
+# generate an AutoGen Definitions (see autogen.sf.net) file 
+# that captures the structure of the code including all 
+# documentation. Note that this feature is still experimental 
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
+# generate a Perl module file that captures the structure of 
+# the code including all documentation. Note that this 
+# feature is still experimental and incomplete at the 
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
+# nicely formatted so it can be parsed by a human reader.  This is useful 
+# if you want to understand what is going on.  On the other hand, if this 
+# tag is set to NO the size of the Perl module output will be much smaller 
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file 
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
+# This is useful so different doxyrules.make files included by the same 
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor.
+
+INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed. To prevent a macro definition from being 
+# undefined via #undef or recursively expanded use the := operator 
+# instead of the = operator.
+
+PREDEFINED             = 
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition that 
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
+# doxygen's preprocessor will remove all references to function-like macros 
+# that are alone on a line, have an all uppercase name, and do not end with a 
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. For each 
+# tag file the location of the external documentation should be added. The 
+# format of a tag file without this location is as follows: 
+#   TAGFILES = file1 file2 ... 
+# Adding location for the tag files is done as follows: 
+#   TAGFILES = file1=loc1 "file2 = loc2" ... 
+# where "loc1" and "loc2" can be relative or absolute paths 
+# or URLs. Note that each tag file must have a unique name (where the name does 
+# NOT include the path). If a tag file is not located in the directory in which 
+# doxygen is run, you must also specify the path to the tagfile here.
+
+TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
+# in the class index. If set to NO only the inherited external classes 
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
+# in the modules index. If set to NO, only the current project's groups will 
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed 
+# in the related pages index. If set to NO, only the current project's 
+# pages will be listed.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script 
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
+# or super classes. Setting the tag to NO turns the diagrams off. Note that 
+# this option also works with HAVE_DOT disabled, but it is recommended to 
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc 
+# command. Doxygen will then run the mscgen tool (see 
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where 
+# the mscgen tool resides. If left empty the tool is assumed to be found in the 
+# default search path.
+
+MSCGEN_PATH            = 
+
+# If set to YES, the inheritance and collaboration graphs will hide 
+# inheritance and usage relations if the target is undocumented 
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
+# available from the path. This tool is part of Graphviz, a graph visualization 
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is 
+# allowed to run in parallel. When set to 0 (the default) doxygen will 
+# base this on the number of processors available in the system. You can set it 
+# explicitly to a value larger than 0 to get control over the balance 
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS        = 0
+
+# By default doxygen will use the Helvetica font for all dot files that 
+# doxygen generates. When you want a differently looking font you can specify 
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find 
+# the font, which can be done by putting it in a standard location or by setting 
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the 
+# directory containing the font.
+
+DOT_FONTNAME           = FreeSans.ttf
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. 
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the Helvetica font. 
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to 
+# set the path where dot can find it.
+
+DOT_FONTPATH           = 
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect inheritance relations. Setting this tag to YES will force the 
+# CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect implementation dependencies (inheritance, containment, and 
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
+# collaboration diagrams in a style similar to the OMG's Unified Modeling 
+# Language.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside 
+# the class node. If there are many fields or methods and many nodes the 
+# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS 
+# threshold limits the number of items for each type to make the size more 
+# manageable. Set this to 0 for no limit. Note that the threshold may be 
+# exceeded by 50% before the limit is enforced.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If set to YES, the inheritance and collaboration graphs will show the 
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
+# tags are set to YES then doxygen will generate a graph for each documented 
+# file showing the direct and indirect include dependencies of the file with 
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
+# documented header file showing the documented files that directly or 
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then 
+# doxygen will generate a call dependency graph for every global function 
+# or class method. Note that enabling this option will significantly increase 
+# the time of a run. So in most cases it will be better to enable call graphs 
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 
+# doxygen will generate a caller dependency graph for every global function 
+# or class method. Note that enabling this option will significantly increase 
+# the time of a run. So in most cases it will be better to enable caller 
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES 
+# then doxygen will show the dependencies a directory has on other directories 
+# in a graphical way. The dependency relations are determined by the #include 
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
+# generated by dot. Possible values are svg, png, jpg, or gif. 
+# If left blank png will be used. If you choose svg you need to set 
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files 
+# visible in IE 9+ (other browsers do not have this requirement).
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to 
+# enable generation of interactive SVG images that allow zooming and panning. 
+# Note that this requires a modern browser other than Internet Explorer. 
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you 
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files 
+# visible. Older versions of IE do not have SVG support.
+
+INTERACTIVE_SVG        = NO
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               = 
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that 
+# contain dot files that are included in the documentation (see the 
+# \dotfile command).
+
+DOTFILE_DIRS           = 
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that 
+# contain msc files that are included in the documentation (see the 
+# \mscfile command).
+
+MSCFILE_DIRS           = 
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 
+# nodes that will be shown in the graph. If the number of nodes in a graph 
+# becomes larger than this value, doxygen will truncate the graph, which is 
+# visualized by representing a node as a red box. Note that doxygen if the 
+# number of direct children of the root node in a graph is already larger than 
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
+# graphs generated by dot. A depth value of 3 means that only nodes reachable 
+# from the root by following a path via at most 3 edges will be shown. Nodes 
+# that lay further from the root node will be omitted. Note that setting this 
+# option to 1 or 2 may greatly reduce the computation time needed for large 
+# code bases. Also note that the size of a graph can be further restricted by 
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
+# background. This is disabled by default, because dot on Windows does not 
+# seem to support this out of the box. Warning: Depending on the platform used, 
+# enabling this option may lead to badly anti-aliased labels on the edges of 
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
+# files in one run (i.e. multiple -o and -T options on the command line). This 
+# makes dot run faster, but since only newer versions of dot (>1.8.10) 
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
+# generate a legend page explaining the meaning of the various boxes and 
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
+# remove the intermediate dot files that are used to generate 
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/doc/realfft_1dlen.jpg b/doc/realfft_1dlen.jpg
new file mode 100644
index 00000000..0b90f0f5
Binary files /dev/null and b/doc/realfft_1dlen.jpg differ
diff --git a/doc/realfft_ex_n7.jpg b/doc/realfft_ex_n7.jpg
new file mode 100644
index 00000000..76277c27
Binary files /dev/null and b/doc/realfft_ex_n7.jpg differ
diff --git a/doc/realfft_ex_n8.jpg b/doc/realfft_ex_n8.jpg
new file mode 100644
index 00000000..248c716c
Binary files /dev/null and b/doc/realfft_ex_n8.jpg differ
diff --git a/doc/realfft_expl_01.jpg b/doc/realfft_expl_01.jpg
new file mode 100644
index 00000000..a241bc2c
Binary files /dev/null and b/doc/realfft_expl_01.jpg differ
diff --git a/doc/realfft_expl_02.jpg b/doc/realfft_expl_02.jpg
new file mode 100644
index 00000000..2f6e247d
Binary files /dev/null and b/doc/realfft_expl_02.jpg differ
diff --git a/doc/realfft_expl_03.jpg b/doc/realfft_expl_03.jpg
new file mode 100644
index 00000000..e96f947b
Binary files /dev/null and b/doc/realfft_expl_03.jpg differ
diff --git a/doc/realfft_expl_04.jpg b/doc/realfft_expl_04.jpg
new file mode 100644
index 00000000..c9b3fc6a
Binary files /dev/null and b/doc/realfft_expl_04.jpg differ
diff --git a/doc/realfft_expl_05.jpg b/doc/realfft_expl_05.jpg
new file mode 100644
index 00000000..babde6ed
Binary files /dev/null and b/doc/realfft_expl_05.jpg differ
diff --git a/doc/realfft_expl_06.jpg b/doc/realfft_expl_06.jpg
new file mode 100644
index 00000000..5e811790
Binary files /dev/null and b/doc/realfft_expl_06.jpg differ
diff --git a/doc/realfft_expl_07.jpg b/doc/realfft_expl_07.jpg
new file mode 100644
index 00000000..8b87ad54
Binary files /dev/null and b/doc/realfft_expl_07.jpg differ
diff --git a/doc/realfft_expl_08.jpg b/doc/realfft_expl_08.jpg
new file mode 100644
index 00000000..f3a9ed75
Binary files /dev/null and b/doc/realfft_expl_08.jpg differ
diff --git a/doc/realfft_fwdinv.jpg b/doc/realfft_fwdinv.jpg
new file mode 100644
index 00000000..5dcc95d3
Binary files /dev/null and b/doc/realfft_fwdinv.jpg differ
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 00000000..549590e0
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,284 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+if( WIN32 )
+	# We require 2.8.3 for windows because of a bug in cmake that prevented vs2010 from generating
+	# executables properly with multiple periods
+	cmake_minimum_required( VERSION 2.8.3 )
+else( )
+	cmake_minimum_required( VERSION 2.6 )
+endif( )
+
+# This becomes the name of the solution file
+project( clFFT )
+
+# Define a version for the code
+set( CLFFT_VERSION_MAJOR 2 )
+set( CLFFT_VERSION_MINOR 1 )
+set( CLFFT_VERSION_PATCH 0 )
+set( CLFFT_VERSION "${CLFFT_VERSION_MAJOR}.${CLFFT_VERSION_MINOR}.${CLFFT_VERSION_PATCH}")
+	
+# uncomment these to debug nmake and borland makefiles
+#SET(CMAKE_START_TEMP_FILE "")
+#SET(CMAKE_END_TEMP_FILE "")
+#SET(CMAKE_VERBOSE_MAKEFILE 1)
+
+set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR} )
+
+# On windows, it's convenient to change the default install prefix such that it does NOT point to 'program files'
+# Need to check out CMAKE_RUNTIME_OUTPUT_DIRECTORY variable, and see if that eliminates the need to modify install path
+if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT )
+	set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" FORCE )
+endif( )
+
+# Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D.  MSVC_IDE does not use CMAKE_BUILD_TYPE
+if( NOT MSVC_IDE AND NOT CMAKE_BUILD_TYPE )
+  set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE )
+endif()
+
+# Options that the user or driver program can set to control various components of the build
+option( BUILD_RUNTIME "Build the FFT runtime library" ON )
+option( BUILD_CLIENT "Build a command line clFFT client program with a variety of configurable parameters (dependency on Boost)" ON )
+option( BUILD_TEST "Build the library testing suite (dependency on google test, Boost, and FFTW)" ON )
+option( BUILD_LOADLIBRARIES "Build the optional dynamic load libraries that the FFT runtime will search for" ON )
+
+# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui.  
+# Otherwise, create a sensible default that the user can change
+if( DEFINED ENV{BOOST_ROOT} )
+	set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" )
+else( )
+	if( UNIX )
+		set( BOOST_ROOT "/usr" CACHE PATH "Modify this variable to point to the root of the Boost installation" )
+	else( )
+		set( BOOST_ROOT "/Path/To/boost_x_xx_x" CACHE PATH "Modify this variable to point to the root of the Boost installation" )
+	endif()
+endif( )
+
+# Currently, linux has a problem outputing both narrow and wide characters,
+# which happens in our client because openCL only supports narrow characters
+if( WIN32 )
+	option( UNICODE "Build with Unicode Support" ON )
+	if( UNICODE )
+		message( STATUS "UNICODE build" )
+	endif( )
+else()
+	set( UNICODE OFF )
+	message( STATUS "UNICODE feature disabled on linux" )
+endif()
+
+if( MSVC_IDE )
+	set( BUILD64 ${CMAKE_CL_64} )
+    set_property( GLOBAL PROPERTY USE_FOLDERS TRUE )
+else()
+	option( BUILD64 "Build a 64-bit product" ON )
+
+	if( IS_DIRECTORY ${PROJECT_SOURCE_DIR}/tests )
+		option( CODE_COVERAGE "Build makefiles with code coverage instrumentation" OFF )
+		if( CODE_COVERAGE )
+			message( STATUS "Code coverage instrumentation on" )
+		endif()
+	endif()
+endif()
+
+# Modify the global find property to help us find libraries like Boost in the correct paths for 64-bit
+# Essentially, find_library calls will look for /lib64 instead of /lib; works for windows and linux
+if( BUILD64 )
+	set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE )
+	message( STATUS "64bit build - FIND_LIBRARY_USE_LIB64_PATHS TRUE" )
+else()
+	set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE )
+	message( STATUS "32bit build - FIND_LIBRARY_USE_LIB64_PATHS FALSE" )
+endif()
+
+# Client is built only if boost is found; on windows, we need vs10 or higher
+# Find Boost on the system, and configure the type of boost build we want
+set( Boost_USE_MULTITHREADED ON )
+set( Boost_USE_STATIC_LIBS   ON )
+set( Boost_DETAILED_FAILURE_MSG   ON )
+set( Boost_DEBUG ON )
+set( Boost_ADDITIONAL_VERSIONS "1.46.1" "1.46" "1.44.0" "1.44" )
+
+# On linux, the boost installed in the system always appears to override any user boost installs
+if( UNIX )
+	set( Boost_NO_SYSTEM_PATHS TRUE )
+endif( )
+
+# This will define Boost_FOUND
+find_package( Boost 1.33.0 COMPONENTS program_options )
+message( STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}" )
+
+# This will define OPENCL_FOUND
+find_package( OpenCL )
+
+# This will define FFTW_FOUND
+find_package( FFTW )
+
+if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) ) 
+	message( STATUS "Cmake version 2.8 or greater needed to use GTest" )
+else()
+	# This will define GTEST_FOUND
+	find_package( GTest )
+
+	# Hack to get googletest v1.6 to work with vs2012
+	if( MSVC11 )
+		add_definitions( "/D_VARIADIC_MAX=10" )
+	endif( )
+endif()
+
+# Enable building of the clACML client if both requested and all dependencies are found
+if( BUILD_CLIENT AND Boost_FOUND )
+	set( FFT_CLIENT ON )
+else( )
+	set( FFT_CLIENT OFF )
+endif( )
+
+# Enable building of the googletest unit test framework if requested and all dependencies are found
+if( BUILD_TEST AND GTEST_FOUND AND Boost_FOUND AND FFTW_FOUND )
+	set( UNIT_TEST ON )
+else( )
+	message( "GoogleTest unit testing will NOT be built" )
+	set( UNIT_TEST OFF )
+endif( )
+
+# FFLAGS depend on the compiler, grab the compiler name from the path
+get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE )
+# message( "C_COMPILER_NAME: " ${C_COMPILER_NAME} )
+# message( "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} )
+
+# Set common compile and link options
+if( C_COMPILER_NAME STREQUAL "cl" )
+	# Following options for nMake
+	message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} )
+	if( NOT MSVC_IDE )
+		message( STATUS "Using an nMake environment to build" )
+
+		# I can't get nmake to work because of faulty /machine:, not sure that this isn't a cmake bug
+		# if( BUILD64 )
+			# set( CMAKE_EXE_LINKER_FLAGS "/machine:amd64 ${CMAKE_EXE_LINKER_FLAGS}" )
+			# set( CMAKE_SHARED_LINKER_FLAGS "/machine:amd64 ${CMAKE_SHARED_LINKER_FLAGS}" )
+			# set( CMAKE_MODULE_LINKER_FLAGS  "/machine:amd64 ${CMAKE_MODULE_LINKER_FLAGS }" )
+		# else( )
+			# set( CMAKE_EXE_LINKER_FLAGS "/machine:i386 ${CMAKE_EXE_LINKER_FLAGS}" )
+		# endif( )
+
+	endif( )
+
+elseif( C_COMPILER_NAME STREQUAL "gcc" )
+	message( STATUS "Detected GNU fortran compiler." )
+	EXEC_PROGRAM( ${CMAKE_CXX_COMPILER} ARGS --version OUTPUT_VARIABLE vnum )
+	STRING(REGEX REPLACE ".*([0-9])\\.([0-9])\\.([0-9]).*" "\\1\\2\\3" vnum ${vnum})
+	if( ${vnum} STREQUAL "452" )
+		# we only want c++0x if we're using gcc 4.5.2
+		set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" )
+	endif()
+	
+	if( BUILD64 )
+		set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" )
+		set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" )
+	else( )
+		set( CMAKE_CXX_FLAGS "-m32 ${CMAKE_CXX_FLAGS}" )
+		set( CMAKE_C_FLAGS "-m32 ${CMAKE_C_FLAGS}" )
+	endif( )
+
+    if( CODE_COVERAGE )
+        set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
+        set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
+    endif()
+else( )
+	message( FATAL_ERROR "Compiler name not detected" )
+endif( )
+
+# If UNICODE is defined, pass extra definitions into 
+if( UNICODE )
+	add_definitions( "/DUNICODE /D_UNICODE" )
+endif( )
+
+# Print out compiler flags for viewing/debug
+message( STATUS "CMAKE_CXX_COMPILER flags: " ${CMAKE_CXX_FLAGS} )
+message( STATUS "CMAKE_CXX_COMPILER debug flags: " ${CMAKE_CXX_FLAGS_DEBUG} )
+message( STATUS "CMAKE_CXX_COMPILER release flags: " ${CMAKE_CXX_FLAGS_RELEASE} )
+message( STATUS "CMAKE_CXX_COMPILER relwithdebinfo flags: " ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} )
+message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} )
+
+# configure a header file to pass the CMake version settings to the source, and package the header files in the output archive
+configure_file( "${PROJECT_SOURCE_DIR}/include/version.h.in" "${PROJECT_BINARY_DIR}/include/version.h" )
+install( FILES 
+			"${PROJECT_BINARY_DIR}/include/version.h" 
+			"include/clFFT.h"
+			"include/clAmdFft.h"
+			"include/clAmdFft.version.h" 
+		DESTINATION 
+			"./include" )
+
+
+# Recurse into subdirectory and start building files there
+if( BUILD_RUNTIME AND IS_DIRECTORY "${PROJECT_SOURCE_DIR}/library" )
+	add_subdirectory( library )
+else()
+	message( "Runtime library will NOT be built" )
+endif( )
+
+if( IS_DIRECTORY "${PROJECT_SOURCE_DIR}/scripts/perf" )
+	add_subdirectory( scripts/perf )
+endif( )
+
+# We only want to build the following if the user options are set
+if( FFT_CLIENT AND IS_DIRECTORY "${PROJECT_SOURCE_DIR}/client" )
+	add_subdirectory( client )
+else( )
+	message( "FFT clients will NOT be built" )
+endif( )
+
+# Recurse into subdirectory and start building files there
+if( BUILD_LOADLIBRARIES AND IS_DIRECTORY "${PROJECT_SOURCE_DIR}/statTimer" )
+	add_subdirectory( statTimer )
+else()
+	message( "LoadLibraries will NOT be built" )
+endif( )
+
+if( UNIT_TEST AND IS_DIRECTORY "${PROJECT_SOURCE_DIR}/tests" )
+	# enable_testing( )
+	add_subdirectory( tests )
+else( )
+	message( "GoogleTest unit tests will NOT be built" )
+endif( )
+
+# The following code is setting variables to control the behavior of CPack to generate our 
+if( WIN32 )
+	set( CPACK_SOURCE_GENERATOR "ZIP" )
+	set( CPACK_GENERATOR "ZIP" )
+else( )
+	set( CPACK_SOURCE_GENERATOR "TGZ" )
+	set( CPACK_GENERATOR "TGZ" )
+endif( )
+
+if( BUILD64 )
+	set( CPACK_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CLFFT_VERSION}-${CMAKE_HOST_SYSTEM_NAME}-x64")
+else( )
+	set( CPACK_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CLFFT_VERSION}-${CMAKE_HOST_SYSTEM_NAME}-x32")
+endif( )
+
+set( CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CLFFT_VERSION}-${CMAKE_HOST_SYSTEM_NAME}-Source")
+
+set( CPACK_PACKAGE_VERSION_MAJOR ${CLFFT_VERSION_MAJOR} )
+set( CPACK_PACKAGE_VERSION_MINOR ${CLFFT_VERSION_MINOR} )
+set( CPACK_PACKAGE_VERSION_PATCH ${CLFFT_VERSION_PATCH} )
+set( CPACK_PACKAGE_DESCRIPTION_SUMMARY "OpenCL implementation of an FFT library")
+set( CPACK_PACKAGE_VENDOR "Neutral")
+set( CPACK_SOURCE_IGNORE_FILES "/\\\\.hg/;/\\\\.svn/;/\\\\.git/" )
+
+# Define all variables that influence CPack before including CPack, such as install targets
+include( CPack )
diff --git a/src/FindFFTW.cmake b/src/FindFFTW.cmake
new file mode 100644
index 00000000..50a632e8
--- /dev/null
+++ b/src/FindFFTW.cmake
@@ -0,0 +1,104 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+
+# Locate the FFTW (http://www.fftw.org/) Framework.
+#
+# Defines the following variables:
+#
+#   FFTW_FOUND - Found the FFTW framework
+#   FFTW_INCLUDE_DIRS - Include directories
+#
+# Also defines the library variables below as normal
+# variables.  These contain debug/optimized keywords when
+# a debugging library is found.
+#
+#   FFTW_LIBRARIES - libfftw
+#
+# Accepts the following variables as input:
+#
+#   FFTW_ROOT - (as a CMake or environment variable)
+#                The root directory of the fftw install prefix
+#
+#-----------------------
+# Example Usage:
+#
+#    find_package(FFTW REQUIRED)
+#    include_directories(${FFTW_INCLUDE_DIRS})
+#
+#    add_executable(foo foo.cc)
+#    target_link_libraries(foo ${FFTW_LIBRARIES})
+#
+#-----------------------
+if( DEFINED ENV{FFTW_ROOT} )
+	set( FFTW_ROOT $ENV{FFTW_ROOT} CACHE PATH "Environment variable defining the root of FFTW" )
+else( )
+	set( FFTW_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of FFTW" )
+endif( )
+
+find_path(FFTW_INCLUDE_DIRS
+	NAMES fftw3.h
+    HINTS
+        ${FFTW_ROOT}/api
+        ${FFTW_ROOT}/include
+        ${FFTW_ROOT}
+        $ENV{FFTW_ROOT}/api
+        $ENV{FFTW_ROOT}/include
+        $ENV{FFTW_ROOT}
+	PATHS
+		/usr/include
+		/usr/local/include
+)
+mark_as_advanced( FFTW_INCLUDE_DIRS )
+
+find_library( FFTW_SINGLE_PRECISION_LIBRARIES
+	NAMES fftw3f libfftw3f-3
+	HINTS
+		${FFTW_ROOT}
+		${FFTW_ROOT}/lib
+		$ENV{FFTW_ROOT}
+		$ENV{FFTW_ROOT}/lib
+	PATHS
+		/usr/lib
+		/usr/local/lib
+	DOC "FFTW dynamic library"
+)
+mark_as_advanced( FFTW_SINGLE_PRECISION_LIBRARIES )
+
+find_library( FFTW_DOUBLE_PRECISION_LIBRARIES
+	NAMES fftw3 libfftw3-3
+	HINTS
+		${FFTW_ROOT}
+		${FFTW_ROOT}/lib
+		$ENV{FFTW_ROOT}
+		$ENV{FFTW_ROOT}/lib
+	PATHS
+		/usr/lib
+		/usr/local/lib
+	DOC "FFTW dynamic library"
+)
+mark_as_advanced( FFTW_DOUBLE_PRECISION_LIBRARIES )
+
+set( FFTW_LIBRARIES ${FFTW_SINGLE_PRECISION_LIBRARIES} ${FFTW_DOUBLE_PRECISION_LIBRARIES} )
+mark_as_advanced( FFTW_LIBRARIES )
+
+include( FindPackageHandleStandardArgs )
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDE_DIRS )
+
+if( NOT FFTW_FOUND )
+	message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" )
+	message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" )
+endif()
diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake
new file mode 100644
index 00000000..4dbb3d57
--- /dev/null
+++ b/src/FindOpenCL.cmake
@@ -0,0 +1,96 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+
+# Locate an OpenCL implementation.
+# Currently supports AMD APP SDK (http://developer.amd.com/sdks/AMDAPPSDK/Pages/default.aspx/)
+#
+# Defines the following variables:
+#
+#   OPENCL_FOUND - Found the OPENCL framework
+#   OPENCL_INCLUDE_DIRS - Include directories
+#
+# Also defines the library variables below as normal
+# variables.  These contain debug/optimized keywords when
+# a debugging library is found.
+#
+#   OPENCL_LIBRARIES - libopencl
+#
+# Accepts the following variables as input:
+#
+#   OPENCL_ROOT - (as a CMake or environment variable)
+#                The root directory of the OpenCL implementation found
+#
+#   FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether findOpenCL should search for
+#                              64bit or 32bit libs
+#-----------------------
+# Example Usage:
+#
+#    find_package(OPENCL REQUIRED)
+#    include_directories(${OPENCL_INCLUDE_DIRS})
+#
+#    add_executable(foo foo.cc)
+#    target_link_libraries(foo ${OPENCL_LIBRARIES})
+#
+#-----------------------
+if( DEFINED ENV{AMDAPPSDKROOT} )
+	set( OPENCL_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of OPENCL implementation" )
+else( )
+	set( OPENCL_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of OPENCL implementation" )
+endif( )
+
+find_path(OPENCL_INCLUDE_DIRS
+	NAMES OpenCL/cl.h CL/cl.h
+    HINTS
+		${OPENCL_ROOT}/include
+		ENV AMDAPPSDKROOT/include
+	PATHS
+		/usr/include
+		/usr/local/include
+	DOC "OpenCL header file path"
+)
+mark_as_advanced( OPENCL_INCLUDE_DIRS )
+
+# Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
+get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
+
+if( LIB64 )
+	find_library( OPENCL_LIBRARIES
+		NAMES OpenCL
+		HINTS
+			${OPENCL_ROOT}/lib
+			ENV AMDAPPSDKROOT/lib
+		DOC "OpenCL dynamic library path"
+		PATH_SUFFIXES x86_64 x64
+	)
+else( )
+	find_library( OPENCL_LIBRARIES
+		NAMES OpenCL
+		HINTS
+			${OPENCL_ROOT}/lib
+			ENV AMDAPPSDKROOT/lib
+		DOC "OpenCL dynamic library path"
+		PATH_SUFFIXES x86
+	)
+endif( )
+mark_as_advanced( OPENCL_LIBRARIES )
+
+include( FindPackageHandleStandardArgs )
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
+
+if( NOT OPENCL_FOUND )
+	message( STATUS "FindOpenCL looked for libraries named: OpenCL" )
+endif()
diff --git a/src/client/CMakeLists.pack b/src/client/CMakeLists.pack
new file mode 100644
index 00000000..b5903633
--- /dev/null
+++ b/src/client/CMakeLists.pack
@@ -0,0 +1,170 @@
+#############################################################################
+##	Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved.
+#############################################################################
+cmake_minimum_required( VERSION 2.6 )
+project( clFFT.Sample )
+
+# If AMDAPPSDKROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui.  
+# Otherwise, create a sensible default that the user can change
+if( DEFINED ENV{AMDAPPSDKROOT} )
+	set( AMD_APP_SDK_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of the ATI Stream SDK" )
+else( )
+	set( AMD_APP_SDK_ROOT "/Path/To/ATI_Stream_SDK" CACHE PATH "Modify this variable to point to the root of the ATI Stream SDK installation" )
+endif( )
+
+# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui.  
+# Otherwise, create a sensible default that the user can change
+if( DEFINED ENV{BOOST_ROOT} )
+	set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" )
+else( )
+	if( UNIX )
+		set( BOOST_ROOT "/usr" CACHE PATH "Modify this variable to point to the root of the Boost installation" )
+	else( )
+		set( BOOST_ROOT "/Path/To/boost_x_xx_x" CACHE PATH "Modify this variable to point to the root of the Boost installation" )
+	endif()
+endif( )
+
+# Currently, linux has a problem outputing both narrow and wide characters,
+# which happens in our client because openCL only supports narrow characters
+if( WIN32 )
+	option( UNICODE "Build with Unicode Support" ON )
+	if( UNICODE )
+		message( STATUS "UNICODE build" )
+	endif( )
+else()
+	set( UNICODE OFF )
+	message( STATUS "UNICODE feature disabled on linux" )
+endif()
+
+if( MSVC_IDE )
+	set( BUILD64 ${CMAKE_CL_64} )
+else()
+	option( BUILD64 "Build a 64-bit product" ON )
+	if( BUILD64 )
+		message( STATUS "64-bit build" )
+	endif( )
+
+	if( IS_DIRECTORY ${PROJECT_SOURCE_DIR}/library/test )
+		option( CODE_COVERAGE "Build makefiles with code coverage instrumentation" OFF )
+		if( CODE_COVERAGE )
+			message( STATUS "Code coverage instrumentation on" )
+		endif()
+	endif()
+endif()
+
+# For linux, modify the global find property to help us find libraries like Boost in the correct paths
+if( UNIX )
+	if( BUILD64 )
+		set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE )
+		message( STATUS "64bit build - FIND_LIBRARY_USE_LIB64_PATHS: ${FIND_LIBRARY_USE_LIB64_PATHS}" )
+	else()
+		set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE )
+		message( STATUS "32bit build - FIND_LIBRARY_USE_LIB64_PATHS: ${FIND_LIBRARY_USE_LIB64_PATHS}" )
+	endif()
+endif()
+
+# Find the absolute path to the opencl library that we need to link too; the path depends on being 64bit or 32bit
+if( BUILD64 )
+	find_library( OPENCL_LIBRARIES
+		NAMES OpenCL
+		HINTS
+			${AMD_APP_SDK_ROOT}/lib/
+			ENV AMD_APP_SDK_ROOT
+		PATH_SUFFIXES x86_64 x86
+	)
+else()
+	find_library( OPENCL_LIBRARIES
+		NAMES OpenCL
+		HINTS
+			${AMD_APP_SDK_ROOT}/lib/
+			ENV AMD_APP_SDK_ROOT
+		PATH_SUFFIXES x86
+	)
+endif()
+message( STATUS "OPENCL_LIBRARIES: ${OPENCL_LIBRARIES}" )
+
+set( Boost_USE_MULTITHREADED ON )
+set( Boost_USE_STATIC_LIBS   ON )
+set( Boost_DETAILED_FAILURE_MSG   ON )
+set( Boost_DEBUG ON )
+set( Boost_ADDITIONAL_VERSIONS "1.44.0" "1.44" )
+# On linux, the boost installed in the system always appears to override any user boost installs
+if( UNIX )
+	set( Boost_NO_SYSTEM_PATHS TRUE )
+endif( )
+find_package( Boost 1.33.0 COMPONENTS program_options )
+message(STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}")
+
+# FFLAGS depend on the compiler, grab the compiler name from the path
+get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE )
+# message( "C_COMPILER_NAME: " ${C_COMPILER_NAME} )
+# message( "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} )
+
+# Set common compile and link options
+if( C_COMPILER_NAME STREQUAL "cl" )
+	# Following options for nMake
+	message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} )
+	if( NOT MSVC_IDE )
+		message( STATUS "Using an nMake environment to build" )
+
+	endif( )
+
+elseif( C_COMPILER_NAME STREQUAL "gcc" )
+	message( STATUS "Detected GNU fortran compiler." )
+	# set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" )
+	
+	if( BUILD64 )
+		set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" )
+		set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" )
+	else( )
+		set( CMAKE_CXX_FLAGS "-m32 ${CMAKE_CXX_FLAGS}" )
+		set( CMAKE_C_FLAGS "-m32 ${CMAKE_C_FLAGS}" )
+	endif( )
+else( )
+	message( FATAL_ERROR "Compiler name not detected" )
+endif( )
+
+# If UNICODE is defined, pass extra definitions into 
+if( UNICODE )
+	add_definitions( "/DUNICODE /D_UNICODE" )
+endif( )
+
+# Print out compiler flags for viewing/debug
+message( STATUS "CMAKE_CXX_COMPILER flags: " ${CMAKE_CXX_FLAGS} )
+message( STATUS "CMAKE_CXX_COMPILER debug flags: " ${CMAKE_CXX_FLAGS_DEBUG} )
+message( STATUS "CMAKE_CXX_COMPILER release flags: " ${CMAKE_CXX_FLAGS_RELEASE} )
+message( STATUS "CMAKE_CXX_COMPILER relwithdebinfo flags: " ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} )
+message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} )
+
+include_directories( ${Boost_INCLUDE_DIRS} ${AMD_APP_SDK_ROOT}/include ${PROJECT_SOURCE_DIR}/../include )
+
+# Set the OpenCL library include path depending on target platform
+if( BUILD64 )
+    if( WIN32 )
+	    link_directories( ${AMD_APP_SDK_ROOT}/lib/x86_64/ ${PROJECT_SOURCE_DIR}/../lib64/import )
+    elseif( UNIX )
+	    link_directories( ${AMD_APP_SDK_ROOT}/lib/x86_64/ ${PROJECT_SOURCE_DIR}/../lib64 )
+    endif()
+else()
+    if( WIN32 )
+	    link_directories( ${AMD_APP_SDK_ROOT}/lib/x86/ ${PROJECT_SOURCE_DIR}/../lib32/import )
+    elseif( UNIX )
+	    link_directories( ${AMD_APP_SDK_ROOT}/lib/x86/ ${PROJECT_SOURCE_DIR}/../lib32 )
+    endif()
+endif()
+
+add_executable( Client 
+		# sources follow
+		client.cpp
+		openCL.misc.cpp
+		statisticalTimer.cpp
+		stdafx.cpp
+		client.h
+		openCL.misc.h
+		statisticalTimer.h
+		stdafx.h
+		targetver.h
+		unicode.compatibility.h
+		../include/clFFT.h )
+
+target_link_libraries( clFFT.Client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES})
diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt
new file mode 100644
index 00000000..d9609cca
--- /dev/null
+++ b/src/client/CMakeLists.txt
@@ -0,0 +1,69 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+
+#  client
+set( Client.Source	client.cpp 
+                    openCL.misc.cpp 
+                    stdafx.cpp )
+
+set( Client.Headers client.h 
+                    openCL.misc.h 
+                    ../statTimer/statisticalTimer.extern.h
+                    ../include/unicode.compatibility.h 
+                    ../include/stdafx.h 
+                    ../include/targetver.h 
+                    ../include/clFFT.h )
+
+set( Client.Files ${Client.Source} ${Client.Headers} )
+
+set( DL_LIB "" )
+if( WIN32 )
+	add_definitions( "/D_CONSOLE" )
+else()
+	# To use the dlopen() and dlclose() functions, we should link with libdl
+	set( DL_LIB "-ldl" )
+endif()
+
+# Include standard OpenCL headers
+include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include )
+
+add_executable( Client ${Client.Files} )
+
+target_link_libraries( Client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${DL_LIB} )
+
+set_target_properties( Client PROPERTIES VERSION ${CLFFT_VERSION} )
+set_target_properties( Client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
+
+if( BUILD64 )
+	# CPack configuration; include the executable into the package
+	install( TARGETS Client
+			RUNTIME DESTINATION bin64
+			LIBRARY DESTINATION lib64
+			ARCHIVE DESTINATION lib64/import
+			)
+
+else()
+	# CPack configuration; include the executable into the package
+	install( TARGETS Client
+			RUNTIME DESTINATION bin32
+			LIBRARY DESTINATION lib32
+			ARCHIVE DESTINATION lib32/import
+			)
+endif()
+
+# configure_file( "${PROJECT_SOURCE_DIR}/client/CMakeLists.pack"
+		# "${PROJECT_BINARY_DIR}/samples/CMakeLists.txt" COPYONLY )
diff --git a/src/client/client.cpp b/src/client/client.cpp
new file mode 100644
index 00000000..be9698e4
--- /dev/null
+++ b/src/client/client.cpp
@@ -0,0 +1,985 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.client.cpp : Defines the entry point for the console application.
+//
+
+#include "stdafx.h"
+#include <functional>
+#include <cmath>
+
+#include "client.h"
+#include "../library/private.h"
+#include "openCL.misc.h"
+#include "../statTimer/statisticalTimer.extern.h"
+#include "../include/sharedLibrary.h"
+#include "../include/unicode.compatibility.h"
+
+namespace po = boost::program_options;
+
+//	This is used with the program_options class so that the user can type an integer on the command line
+//	and we store into an enum varaible
+template<class _Elem, class _Traits>
+std::basic_istream<_Elem, _Traits> & operator>> (std::basic_istream<_Elem, _Traits> & stream, clfftLayout & layout)
+{
+	cl_uint tmp;
+	stream >> tmp;
+	layout = clfftLayout(tmp);
+	return stream;
+}
+
+
+template < typename T >
+int transform( size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
+				clfftLayout in_layout, clfftLayout out_layout,
+				clfftResultLocation place, clfftPrecision precision, clfftDirection dir,
+				cl_device_type deviceType, cl_uint deviceGpuList, bool printInfo,
+				cl_uint command_queue_flags, cl_uint profile_count,
+				std::auto_ptr< clfftSetupData > setupData )
+{
+	//	Our command line does not specify what dimension FFT we wish to transform; we decode
+	//	this from the lengths that the user specifies for X, Y, Z.  A length of one means that
+	//	The user does not want that dimension.
+
+	const size_t max_dimensions = 3;
+	size_t strides[ 4 ];
+	size_t o_strides[ 4 ];
+	size_t fftVectorSize = 0;
+	size_t fftVectorSizePadded = 0;
+	size_t fftBatchSize = 0;
+	size_t outfftVectorSize = 0;
+	size_t outfftVectorSizePadded = 0;
+	size_t outfftBatchSize = 0;
+	size_t size_of_input_buffers_in_bytes = 0;
+	size_t size_of_output_buffers_in_bytes = 0;
+	cl_uint number_of_output_buffers = 0;
+	clfftDim	dim = CLFFT_1D;
+	cl_mem input_cl_mem_buffers [2] = { NULL, NULL };
+	cl_mem output_cl_mem_buffers[2] = { NULL, NULL };
+	std::vector< cl_device_id > device_id;
+	cl_context context;
+	cl_command_queue queue;
+	cl_event outEvent = NULL;
+	clfftPlanHandle plan_handle;
+
+	for (unsigned u = 0; u < max_dimensions; ++u) {
+		if (0 != lengths[u])
+			continue;
+		lengths[u] = 1;
+	}
+
+	if( lengths[ 1 ] > 1 )
+	{
+		dim	= CLFFT_2D;
+	}
+	if( lengths[ 2 ] > 1 )
+	{
+		dim	= CLFFT_3D;
+	}
+
+	strides[ 0 ] = inStrides[0];
+	strides[ 1 ] = inStrides[1];
+	strides[ 2 ] = inStrides[2];
+	strides[ 3 ] = inStrides[3];
+
+	o_strides[ 0 ] = outStrides[0];
+	o_strides[ 1 ] = outStrides[1];
+	o_strides[ 2 ] = outStrides[2];
+	o_strides[ 3 ] = outStrides[3];
+
+	fftVectorSize = lengths[0] * lengths[1] * lengths[2];
+	fftVectorSizePadded = strides[3];
+	fftBatchSize = fftVectorSizePadded * batch_size;
+
+	size_t Nt = 1 + lengths[0]/2;
+
+	if(place == CLFFT_INPLACE)
+	{
+		outfftVectorSize = fftVectorSize;
+		outfftVectorSizePadded = fftVectorSizePadded;
+		outfftBatchSize = fftBatchSize;
+	}
+	else
+	{
+		outfftVectorSize = lengths[0] * lengths[1] * lengths[2];
+		outfftVectorSizePadded = o_strides[3];
+		outfftBatchSize = outfftVectorSizePadded * batch_size;
+	}
+
+
+	// Real to complex case
+	if( (in_layout == CLFFT_REAL) || (out_layout == CLFFT_REAL) )
+	{
+		fftVectorSizePadded = strides[3];
+		fftBatchSize = fftVectorSizePadded * batch_size;
+
+		outfftVectorSizePadded = o_strides[3];
+		outfftBatchSize = outfftVectorSizePadded * batch_size;
+
+		fftVectorSize = lengths[0] * lengths[1] * lengths[2];
+		outfftVectorSize = fftVectorSize;
+
+	}
+
+
+	switch( out_layout )
+	{
+	case CLFFT_COMPLEX_INTERLEAVED:
+		number_of_output_buffers = 1;
+		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof( std::complex< T > );
+		break;
+	case CLFFT_COMPLEX_PLANAR:
+		number_of_output_buffers = 2;
+		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof(T);
+		break;
+	case CLFFT_HERMITIAN_INTERLEAVED:
+		number_of_output_buffers = 1;
+		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof( std::complex< T > );
+		break;
+	case CLFFT_HERMITIAN_PLANAR:
+		number_of_output_buffers = 2;
+		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof(T);
+		break;
+	case CLFFT_REAL:
+		number_of_output_buffers = 1;
+		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof(T);
+		break;
+	}
+
+	// Fill the input buffers
+	switch( in_layout )
+	{
+	case CLFFT_COMPLEX_INTERLEAVED:
+		{
+			//	This call creates our openCL context and sets up our devices; expected to throw on error
+			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( std::complex< T > );
+
+			device_id = initializeCL( deviceType, deviceGpuList, context, printInfo );
+			createOpenCLCommandQueue( context,
+				command_queue_flags, queue,
+				device_id,
+				size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
+				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
+
+			std::vector< std::complex< T > > input( fftBatchSize );
+
+			// set zero
+			for( cl_uint i = 0; i < fftBatchSize; ++i )
+			{
+				input[ i ] = 0;
+			}
+
+			// impulse test case
+			for(size_t b = 0; b < batch_size; b++)
+			{
+				size_t p3 = b * strides[3];
+				for(size_t k = 0; k < lengths[2]; k++)
+				{
+					size_t p2 = p3 + k * strides[2];
+					for(size_t j = 0; j < lengths[1]; j++)
+					{
+						size_t p1 = p2 + j * strides[1];
+						for(size_t i = 0; i < lengths[0]; i++)
+						{
+							size_t p0 = p1 + i * strides[0];
+							input[p0] = 1;
+						}
+					}
+				}
+			}
+
+
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+
+		}
+		break;
+	case CLFFT_COMPLEX_PLANAR:
+		{
+			//	This call creates our openCL context and sets up our devices; expected to throw on error
+			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( T );
+
+			device_id = initializeCL( deviceType, deviceGpuList, context, printInfo );
+			createOpenCLCommandQueue( context,
+				command_queue_flags, queue,
+				device_id,
+				size_of_input_buffers_in_bytes, 2, input_cl_mem_buffers,
+				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
+
+			std::vector< T > real( fftBatchSize );
+			std::vector< T > imag( fftBatchSize );
+
+			// set zero
+			for( cl_uint i = 0; i < fftBatchSize; ++i )
+			{
+				real[ i ] = 0;
+				imag[ i ] = 0;
+			}
+
+			// impulse test case
+			for(size_t b = 0; b < batch_size; b++)
+			{
+				size_t p3 = b * strides[3];
+				for(size_t k = 0; k < lengths[2]; k++)
+				{
+					size_t p2 = p3 + k * strides[2];
+					for(size_t j = 0; j < lengths[1]; j++)
+					{
+						size_t p1 = p2 + j * strides[1];
+						for(size_t i = 0; i < lengths[0]; i++)
+						{
+							size_t p0 = p1 + i * strides[0];
+							real[p0] = 1;
+						}
+					}
+				}
+			}
+
+
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+		}
+		break;
+	case CLFFT_HERMITIAN_INTERLEAVED:
+		{
+			//	This call creates our openCL context and sets up our devices; expected to throw on error
+			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( std::complex< T > );
+
+			device_id = initializeCL( deviceType, deviceGpuList, context, printInfo );
+			createOpenCLCommandQueue( context,
+				command_queue_flags, queue,
+				device_id,
+				size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
+				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
+
+			std::vector< std::complex< T > > input( fftBatchSize );
+
+			// set zero
+			for( cl_uint i = 0; i < fftBatchSize; ++i )
+			{
+				input[ i ] = 0;
+			}
+
+			// impulse test case
+			for(size_t b = 0; b < batch_size; b++)
+			{
+				size_t p3 = b * strides[3];
+				input[p3] = static_cast<T>(outfftVectorSize);
+
+			}
+
+
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+		}
+		break;
+	case CLFFT_HERMITIAN_PLANAR:
+		{
+			//	This call creates our openCL context and sets up our devices; expected to throw on error
+			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( T );
+
+			device_id = initializeCL( deviceType, deviceGpuList, context, printInfo );
+			createOpenCLCommandQueue( context,
+				command_queue_flags, queue,
+				device_id,
+				size_of_input_buffers_in_bytes, 2, input_cl_mem_buffers,
+				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
+
+			std::vector< T > real( fftBatchSize );
+			std::vector< T > imag( fftBatchSize );
+
+			// set zero
+			for( cl_uint i = 0; i < fftBatchSize; ++i )
+			{
+				real[ i ] = 0;
+				imag[ i ] = 0;
+			}
+
+			// impulse test case
+			for(size_t b = 0; b < batch_size; b++)
+			{
+				size_t p3 = b * strides[3];
+				real[p3] = static_cast<T>(outfftVectorSize);
+			}
+
+
+
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+		}
+		break;
+	case CLFFT_REAL:
+		{
+			//	This call creates our openCL context and sets up our devices; expected to throw on error
+			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( T );
+
+			device_id = initializeCL( deviceType, deviceGpuList, context, printInfo );
+			createOpenCLCommandQueue( context,
+				command_queue_flags, queue,
+				device_id,
+				size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
+				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
+
+			std::vector< T > real( fftBatchSize );
+
+			// set zero
+			for( cl_uint i = 0; i < fftBatchSize; ++i )
+			{
+				real[ i ] = 0;
+			}
+
+			// impulse test case
+			for(size_t b = 0; b < batch_size; b++)
+			{
+				size_t p3 = b * strides[3];
+				for(size_t k = 0; k < lengths[2]; k++)
+				{
+					size_t p2 = p3 + k * strides[2];
+					for(size_t j = 0; j < lengths[1]; j++)
+					{
+						size_t p1 = p2 + j * strides[1];
+						for(size_t i = 0; i < lengths[0]; i++)
+						{
+							size_t p0 = p1 + i * strides[0];
+							real[p0] = 1;
+						}
+					}
+				}
+			}
+
+
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
+				0, NULL, &outEvent ),
+				"clEnqueueWriteBuffer failed" );
+		}
+		break;
+	default:
+		{
+			throw std::runtime_error( "Input layout format not yet supported" );
+		}
+		break;
+	}
+
+	//	Discover and load the timer module if present
+	void* timerLibHandle = LoadSharedLibrary( "lib", "StatTimer", false );
+	if( timerLibHandle == NULL )
+	{
+		terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl;
+	}
+
+
+	//	Timer module discovered and loaded successfully
+	//	Initialize function pointers to call into the shared module
+	PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) );
+
+	//	Create and initialize our timer class, if the external timer shared library loaded
+	baseStatTimer* timer = NULL;
+	size_t	clFFTID = 0;
+	if( get_timer )
+	{
+		timer = get_timer( CLFFT_GPU );
+		timer->Reserve( 1, profile_count );
+		timer->setNormalize( true );
+
+		clFFTID	= timer->getUniqueID( "clFFT", 0 );
+	}
+
+	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
+	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, lengths ), "clfftCreateDefaultPlan failed" );
+
+	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
+	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
+	OPENCL_V_THROW( clfftSetLayout( plan_handle, in_layout, out_layout ), "clfftSetLayout failed" );
+	OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batch_size ), "clfftSetPlanBatchSize failed" );
+	OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
+
+	OPENCL_V_THROW (clfftSetPlanInStride  ( plan_handle, dim, strides ), "clfftSetPlanInStride failed" );
+	OPENCL_V_THROW (clfftSetPlanOutStride ( plan_handle, dim, o_strides ), "clfftSetPlanOutStride failed" );
+	OPENCL_V_THROW (clfftSetPlanDistance  ( plan_handle, strides[ 3 ], o_strides[ 3 ]), "clfftSetPlanDistance failed" );
+
+	// Set backward scale factor to 1.0 for non real FFTs to do correct output checks
+	if(dir == CLFFT_BACKWARD && in_layout != CLFFT_REAL && out_layout != CLFFT_REAL)
+		OPENCL_V_THROW (clfftSetPlanScale( plan_handle, CLFFT_BACKWARD, (cl_float)1.0f ), "clfftSetPlanScale failed" );
+
+	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &queue, NULL, NULL ), "clfftBakePlan failed" );
+
+	//get the buffersize
+	size_t buffersize=0;
+	OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
+
+	//allocate the intermediate buffer
+	cl_mem clMedBuffer=NULL;
+
+	if (buffersize)
+	{
+		cl_int medstatus;
+		clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
+	}
+
+	switch( in_layout )
+	{
+	case CLFFT_COMPLEX_INTERLEAVED:
+	case CLFFT_COMPLEX_PLANAR:
+	case CLFFT_HERMITIAN_INTERLEAVED:
+	case CLFFT_HERMITIAN_PLANAR:
+	case CLFFT_REAL:
+		break;
+	default:
+		//	Don't recognize input layout
+		return CLFFT_INVALID_ARG_VALUE;
+	}
+
+	switch( out_layout )
+	{
+	case CLFFT_COMPLEX_INTERLEAVED:
+	case CLFFT_COMPLEX_PLANAR:
+	case CLFFT_HERMITIAN_INTERLEAVED:
+	case CLFFT_HERMITIAN_PLANAR:
+	case CLFFT_REAL:
+		break;
+	default:
+		//	Don't recognize output layout
+		return CLFFT_INVALID_ARG_VALUE;
+	}
+
+	if (( place == CLFFT_INPLACE )
+	&&  ( in_layout != out_layout )) {
+		switch( in_layout )
+		{
+		case CLFFT_COMPLEX_INTERLEAVED:
+			{
+				if( (out_layout == CLFFT_COMPLEX_PLANAR) || (out_layout == CLFFT_HERMITIAN_PLANAR) )
+				{
+					throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
+				}
+				break;
+			}
+		case CLFFT_COMPLEX_PLANAR:
+			{
+				if( (out_layout == CLFFT_COMPLEX_INTERLEAVED) || (out_layout == CLFFT_HERMITIAN_INTERLEAVED) )
+				{
+					throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" );
+				}
+				break;
+			}
+		case CLFFT_HERMITIAN_INTERLEAVED:
+			{
+				if( out_layout != CLFFT_REAL )
+				{
+					throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
+				}
+				break;
+			}
+		case CLFFT_HERMITIAN_PLANAR:
+			{
+				throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" );
+				break;
+			}
+		case CLFFT_REAL:
+			{
+				if( (out_layout == CLFFT_COMPLEX_PLANAR) || (out_layout == CLFFT_HERMITIAN_PLANAR) )
+				{
+					throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
+				}
+				break;
+			}
+		}
+	}
+
+	//	Loop as many times as the user specifies to average out the timings
+	//
+	cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];
+
+	for( cl_uint i = 0; i < profile_count; ++i )
+	{
+		if( timer ) timer->Start( clFFTID );
+
+		OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent,
+			&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
+			"clfftEnqueueTransform failed" );
+
+		if( timer ) timer->Stop( clFFTID );
+	}
+	OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
+
+	if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
+	{
+		//	Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
+		timer->pruneOutliers( 2.0 );
+		timer->Print( );
+		timer->Reset( );
+	}
+
+	/*****************/
+	FreeSharedLibrary( timerLibHandle );
+
+	// Read and check output data
+	// This check is not valid if the FFT is executed multiple times inplace.
+	//
+	if (( place == CLFFT_OUTOFPLACE )
+	||  ( profile_count == 1))
+	{
+		bool checkflag= false;
+		switch( out_layout )
+		{
+		case CLFFT_HERMITIAN_INTERLEAVED:
+		case CLFFT_COMPLEX_INTERLEAVED:
+			{
+				std::vector< std::complex< T > > output( outfftBatchSize );
+
+				if( place == CLFFT_INPLACE )
+				{
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &output[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+				}
+				else
+				{
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &output[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+				}
+
+				//check output data
+				for( cl_uint i = 0; i < outfftBatchSize; ++i )
+				{
+					if (0 == (i % outfftVectorSizePadded))
+					{
+						if (output[i].real() != outfftVectorSize)
+						{
+							checkflag = true;
+							break;
+						}
+					}
+					else
+					{
+						if (output[ i ].real() != 0)
+						{
+							checkflag = true;
+							break;
+						}
+					}
+
+					if (output[ i ].imag() != 0)
+					{
+						checkflag = true;
+						break;
+					}
+				}
+			}
+			break;
+		case CLFFT_HERMITIAN_PLANAR:
+		case CLFFT_COMPLEX_PLANAR:
+			{
+				std::valarray< T > real( outfftBatchSize );
+				std::valarray< T > imag( outfftBatchSize );
+
+				if( place == CLFFT_INPLACE )
+				{
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+				}
+				else
+				{
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &real[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 1 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &imag[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+				}
+
+				//  Check output data
+				for( cl_uint i = 0; i < outfftBatchSize; ++i )
+				{
+					if (0 == (i % outfftVectorSizePadded))
+					{
+						if (real[i] != outfftVectorSize)
+						{
+							checkflag = true;
+							break;
+						}
+					}
+					else
+					{
+						if (real[i] != 0)
+						{
+							checkflag = true;
+							break;
+						}
+					}
+
+					if (imag[i] != 0)
+					{
+						checkflag = true;
+						break;
+					}
+				}
+			}
+			break;
+		case CLFFT_REAL:
+			{
+				std::valarray< T > real( outfftBatchSize );
+
+				if( place == CLFFT_INPLACE )
+				{
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+				}
+				else
+				{
+					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &real[ 0 ],
+						0, NULL, NULL ),
+						"Reading the result buffer failed" );
+				}
+
+				////check output data
+
+				for(size_t b = 0; b < batch_size; b++)
+				{
+					size_t p3 = b * o_strides[3];
+					for(size_t k = 0; k < lengths[2]; k++)
+					{
+						size_t p2 = p3 + k * o_strides[2];
+						for(size_t j = 0; j < lengths[1]; j++)
+						{
+							size_t p1 = p2 + j * o_strides[1];
+							for(size_t i = 0; i < lengths[0]; i++)
+							{
+								size_t p0 = p1 + i * o_strides[0];
+
+								if (real[p0] != 1)
+								{
+									checkflag = true;
+									break;
+								}
+
+							}
+						}
+					}
+				}
+			}
+			break;
+		default:
+			{
+				throw std::runtime_error( "Input layout format not yet supported" );
+			}
+			break;
+		}
+
+		if (checkflag)
+		{
+			std::cout << "\n\n\t\tInternal Client Test *****FAIL*****" << std::endl;
+		}
+		else
+		{
+			std::cout << "\n\n\t\tInternal Client Test *****PASS*****" << std::endl;
+		}
+	}
+
+	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
+	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
+
+	cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, &outEvent );
+	return 0;
+}
+
+int _tmain( int argc, _TCHAR* argv[] )
+{
+	//	This helps with mixing output of both wide and narrow characters to the screen
+	std::ios::sync_with_stdio( false );
+
+	//	Define MEMORYREPORT on windows platfroms to enable debug memory heap checking
+#if defined( MEMORYREPORT ) && defined( _WIN32 )
+	TCHAR logPath[ MAX_PATH ];
+	::GetCurrentDirectory( MAX_PATH, logPath );
+	::_tcscat_s( logPath, _T( "\\MemoryReport.txt") );
+
+	//	We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory
+	//	statistics on app shutdown
+	HANDLE hLogFile;
+	hLogFile = ::CreateFile( logPath, GENERIC_WRITE,
+		FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL );
+
+	::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
+	::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
+	::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG );
+
+	::_CrtSetReportFile( _CRT_ASSERT, hLogFile );
+	::_CrtSetReportFile( _CRT_ERROR, hLogFile );
+	::_CrtSetReportFile( _CRT_WARN, hLogFile );
+
+	int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
+	tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF;
+	::_CrtSetDbgFlag( tmp );
+
+	//	By looking at the memory leak report that is generated by this debug heap, there is a number with
+	//	{} brackets that indicates the incremental allocation number of that block.  If you wish to set
+	//	a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap
+	//	will issue a bp on the request, allowing you to look at the call stack
+	//	::_CrtSetBreakAlloc( 1833 );
+
+#endif /* MEMORYREPORT */
+
+	//	OpenCL state
+	cl_device_type		deviceType	= CL_DEVICE_TYPE_DEFAULT;
+	cl_uint				deviceGpuList     = 0;	// a bitmap set
+
+	//	FFT state
+
+	clfftResultLocation	place = CLFFT_INPLACE;
+	clfftLayout	inLayout  = CLFFT_COMPLEX_INTERLEAVED;
+	clfftLayout	outLayout = CLFFT_COMPLEX_INTERLEAVED;
+	clfftPrecision precision = CLFFT_SINGLE;
+	clfftDirection dir = CLFFT_FORWARD;
+	size_t lengths[ 3 ] = {1,1,1};
+	size_t iStrides[ 4 ] = {0,0,0,0};
+	size_t oStrides[ 4 ] = {0,0,0,0};
+	cl_uint profile_count = 0;
+
+	cl_uint command_queue_flags = 0;
+	size_t batchSize = 1;
+
+
+	//	Initialize flags for FFT library
+	std::auto_ptr< clfftSetupData > setupData( new clfftSetupData );
+	OPENCL_V_THROW( clfftInitSetupData( setupData.get( ) ),
+		"clfftInitSetupData failed" );
+
+	try
+	{
+		// Declare the supported options.
+		po::options_description desc( "clFFT client command line options" );
+		desc.add_options()
+			( "help,h",        "produces this help message" )
+			( "version,v",     "Print queryable version information from the clFFT library" )
+			( "clInfo,i",      "Print queryable information of the OpenCL runtime" )
+			( "gpu,g",         "Force instantiation of an OpenCL GPU device" )
+			( "cpu,c",         "Force instantiation of an OpenCL CPU device" )
+			( "all,a",         "Force instantiation of all OpenCL devices" )
+			( "outPlace,o",    "Out of place FFT transform (default: in place)" )
+			( "double",		   "Double precision transform (default: single)" )
+			( "inv",			"Backward transform (default: forward)" )
+			( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" )
+			( "lenX,x",        po::value< size_t >( &lengths[ 0 ] )->default_value( 1024 ),   "Specify the length of the 1st dimension of a test array" )
+			( "lenY,y",        po::value< size_t >( &lengths[ 1 ] )->default_value( 1 ),      "Specify the length of the 2nd dimension of a test array" )
+			( "lenZ,z",        po::value< size_t >( &lengths[ 2 ] )->default_value( 1 ),      "Specify the length of the 3rd dimension of a test array" )
+			( "isX",   po::value< size_t >( &iStrides[ 0 ] )->default_value( 1 ),						"Specify the input stride of the 1st dimension of a test array" )
+			( "isY",   po::value< size_t >( &iStrides[ 1 ] )->default_value( 0 ),	"Specify the input stride of the 2nd dimension of a test array" )
+			( "isZ",   po::value< size_t >( &iStrides[ 2 ] )->default_value( 0 ),	"Specify the input stride of the 3rd dimension of a test array" )
+			( "iD", po::value< size_t >( &iStrides[ 3 ] )->default_value( 0 ), "input distance between subsequent sets of data when batch size > 1" )
+			( "osX",   po::value< size_t >( &oStrides[ 0 ] )->default_value( 1 ),						"Specify the output stride of the 1st dimension of a test array" )
+			( "osY",   po::value< size_t >( &oStrides[ 1 ] )->default_value( 0 ),	"Specify the output stride of the 2nd dimension of a test array" )
+			( "osZ",   po::value< size_t >( &oStrides[ 2 ] )->default_value( 0 ),	"Specify the output stride of the 3rd dimension of a test array" )
+			( "oD", po::value< size_t >( &oStrides[ 3 ] )->default_value( 0 ), "output distance between subsequent sets of data when batch size > 1" )
+			( "batchSize,b",   po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " )
+			( "profile,p",     po::value< cl_uint >( &profile_count )->default_value( 1 ), "Time and report the kernel speed of the FFT (default: profiling off)" )
+			( "inLayout",      po::value< clfftLayout >( &inLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" )
+			( "outLayout",     po::value< clfftLayout >( &outLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" )
+			;
+
+		po::variables_map vm;
+		po::store( po::parse_command_line( argc, argv, desc ), vm );
+		po::notify( vm );
+
+		if( vm.count( "version" ) )
+		{
+			const int indent = countOf( "clFFT client API version: " );
+			tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " )
+				<< clfftVersionMajor << _T( "." )
+				<< clfftVersionMinor << _T( "." )
+				<< clfftVersionPatch << std::endl;
+
+			cl_uint libMajor, libMinor, libPatch;
+			clfftGetVersion( &libMajor, &libMinor, &libPatch );
+
+			tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " )
+				<< libMajor << _T( "." )
+				<< libMinor << _T( "." )
+				<< libPatch << std::endl << std::endl;
+		}
+
+		if( vm.count( "help" ) )
+		{
+			//	This needs to be 'cout' as program-options does not support wcout yet
+			std::cout << desc << std::endl;
+			return 0;
+		}
+
+		size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0)
+			| ((vm.count( "cpu" ) > 0) ? 2 : 0)
+			| ((vm.count( "all" ) > 0) ? 4 : 0);
+		if ((mutex & (mutex-1)) != 0) {
+			terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl;
+			if (vm.count ( "gpu" )  > 0) terr << _T("    gpu,g   Force instantiation of an OpenCL GPU device" ) << std::endl;
+			if (vm.count ( "cpu" )  > 0) terr << _T("    cpu,c   Force instantiation of an OpenCL CPU device" ) << std::endl;
+			if (vm.count ( "all" )  > 0) terr << _T("    all,a   Force instantiation of all OpenCL devices" ) << std::endl;
+			return 1;
+		}
+
+		if( vm.count( "gpu" ) )
+		{
+			deviceType	= CL_DEVICE_TYPE_GPU;
+			deviceGpuList = ~0;
+		}
+
+		if( vm.count( "cpu" ) )
+		{
+			deviceType	= CL_DEVICE_TYPE_CPU;
+		}
+
+		if( vm.count( "all" ) )
+		{
+			deviceType	= CL_DEVICE_TYPE_ALL;
+		}
+
+		bool printInfo = false;
+		if( vm.count( "clInfo" ) )
+		{
+			printInfo = true;
+		}
+
+		if( vm.count( "outPlace" ) )
+		{
+			place = CLFFT_OUTOFPLACE;
+		}
+
+		if( vm.count( "double" ) )
+		{
+			precision = CLFFT_DOUBLE;
+		}
+
+		if( vm.count( "inv" ) )
+		{
+			dir = CLFFT_BACKWARD;
+		}
+
+		if( profile_count > 1 )
+		{
+			command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
+		}
+
+		if( vm.count( "dumpKernels" ) )
+		{
+			setupData->debugFlags	|= CLFFT_DUMP_PROGRAMS;
+		}
+
+		int inL = (int)inLayout;
+		int otL = (int)outLayout;
+
+		// input output layout support matrix
+		int ioLayoutSupport[5][5] =		{
+										{ 1, 1, 0, 0, 1 },
+										{ 1, 1, 0, 0, 1 },
+										{ 0, 0, 0, 0, 1 },
+										{ 0, 0, 0, 0, 1 },
+										{ 1, 1, 1, 1, 0 },
+										};
+
+		if((inL < 1) || (inL > 5)) throw std::runtime_error( "Invalid Input layout format" );
+		if((otL < 1) || (otL > 5)) throw std::runtime_error( "Invalid Output layout format" );
+
+		if(ioLayoutSupport[inL-1][otL-1] == 0) throw std::runtime_error( "Invalid combination of Input/Output layout formats" );
+
+		if( ((inL == 1) || (inL == 2)) && ((otL == 1) || (otL == 2)) ) // Complex-Complex cases
+		{
+			iStrides[1] = iStrides[1] ? iStrides[1] : lengths[0] * iStrides[0];
+			iStrides[2] = iStrides[2] ? iStrides[2] : lengths[1] * iStrides[1];
+			iStrides[3] = iStrides[3] ? iStrides[3] : lengths[2] * iStrides[2];
+
+
+
+			if(place == CLFFT_INPLACE)
+			{
+				oStrides[0] = iStrides[0];
+				oStrides[1] = iStrides[1];
+				oStrides[2] = iStrides[2];
+				oStrides[3] = iStrides[3];
+			}
+			else
+			{
+				oStrides[1] = oStrides[1] ? oStrides[1] : lengths[0] * oStrides[0];
+				oStrides[2] = oStrides[2] ? oStrides[2] : lengths[1] * oStrides[1];
+				oStrides[3] = oStrides[3] ? oStrides[3] : lengths[2] * oStrides[2];
+			}
+		}
+		else // Real-Complex and Complex-Real cases
+		{
+			size_t *rst, *cst;
+			size_t N = lengths[0];
+			size_t Nt = 1 + lengths[0]/2;
+			bool iflag = false;
+			bool rcFull = (inL == 1) || (inL == 2) || (otL == 1) || (otL == 2);
+
+			if(inLayout == CLFFT_REAL) { iflag = true; rst = iStrides; }
+			else { rst = oStrides; } // either in or out should be REAL
+
+			// Set either in or out strides whichever is real
+			if(place == CLFFT_INPLACE)
+			{
+				if(rcFull)	{ rst[1] = rst[1] ? rst[1] :  N * 2 * rst[0]; }
+				else		{ rst[1] = rst[1] ? rst[1] : Nt * 2 * rst[0]; }
+
+				rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1];
+				rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2];
+			}
+			else
+			{
+				rst[1] = rst[1] ? rst[1] : lengths[0] * rst[0];
+				rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1];
+				rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2];
+			}
+
+			// Set the remaining of in or out strides that is not real
+			if(iflag) { cst = oStrides; }
+			else	  { cst = iStrides; }
+
+			if(rcFull)	{ cst[1] = cst[1] ? cst[1] :  N * cst[0]; }
+			else		{ cst[1] = cst[1] ? cst[1] : Nt * cst[0]; }
+
+			cst[2] = cst[2] ? cst[2] : lengths[1] * cst[1];
+			cst[3] = cst[3] ? cst[3] : lengths[2] * cst[2];
+		}
+
+		if( precision == CLFFT_SINGLE )
+			transform<float>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceGpuList, printInfo, command_queue_flags, profile_count, setupData );
+		else
+			transform<double>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceGpuList, printInfo, command_queue_flags, profile_count, setupData );
+	}
+	catch( std::exception& e )
+	{
+		terr << _T( "clFFT error condition reported:" ) << std::endl << e.what() << std::endl;
+		return 1;
+	}
+	return 0;
+}
diff --git a/src/client/client.h b/src/client/client.h
new file mode 100644
index 00000000..ad22f363
--- /dev/null
+++ b/src/client/client.h
@@ -0,0 +1,26 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLIENT_H )
+#define CLIENT_H
+
+//	Boost headers that we want to use
+//	#define BOOST_PROGRAM_OPTIONS_DYN_LINK
+#include <boost/program_options.hpp>
+
+#endif
diff --git a/src/client/openCL.misc.cpp b/src/client/openCL.misc.cpp
new file mode 100644
index 00000000..6bbdec34
--- /dev/null
+++ b/src/client/openCL.misc.cpp
@@ -0,0 +1,535 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.opencl.cpp : Provides functions to set up openCL
+//
+
+#include "stdafx.h"
+#include <stdexcept>
+#include <iomanip>
+#include <sstream>
+#include <cstring>
+#include <vector>
+#include "clFFT.h"
+#include "openCL.misc.h"
+
+void prettyPrintPlatformInfo( const cl_platform_id& pId )
+{
+	size_t platformProfileSize	= 0;
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, 0, NULL, &platformProfileSize ),
+		"Getting CL_PLATFORM_PROFILE Platform Info string size ( ::clGetPlatformInfo() )" );
+
+	std::vector< char > szPlatformProfile( platformProfileSize );
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, platformProfileSize, &szPlatformProfile[ 0 ], NULL),
+		"Getting CL_PLATFORM_PROFILE Platform Info string ( ::clGetPlatformInfo() )" );
+
+	size_t platformVersionSize	= 0;
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, 0, NULL, &platformVersionSize ),
+		"Getting CL_PLATFORM_VERSION Platform Info string size ( ::clGetPlatformInfo() )" );
+
+	std::vector< char > szPlatformVersion( platformVersionSize );
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, platformVersionSize, &szPlatformVersion[ 0 ], NULL),
+		"Getting CL_PLATFORM_VERSION Platform Info string ( ::clGetPlatformInfo() )" );
+
+	size_t platformNameSize	= 0;
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, 0, NULL, &platformNameSize ),
+		"Getting CL_PLATFORM_NAME Platform Info string size ( ::clGetPlatformInfo() )" );
+
+	std::vector< char > szPlatformName( platformNameSize );
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, platformNameSize, &szPlatformName[ 0 ], NULL),
+		"Getting CL_PLATFORM_NAME Platform Info string ( ::clGetPlatformInfo() )" );
+
+	size_t vendorStringSize	= 0;
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, 0, NULL, &vendorStringSize ),
+		"Getting CL_PLATFORM_VENDOR Platform Info string size ( ::clGetPlatformInfo() )" );
+
+	std::vector< char > szPlatformVendor( vendorStringSize );
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, vendorStringSize, &szPlatformVendor[ 0 ], NULL),
+		"Getting CL_PLATFORM_VENDOR Platform Info string ( ::clGetPlatformInfo() )" );
+
+	size_t platformExtensionsSize	= 0;
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, 0, NULL, &platformExtensionsSize ),
+		"Getting CL_PLATFORM_EXTENSIONS Platform Info string size ( ::clGetPlatformInfo() )" );
+
+	std::vector< char > szPlatformExtensions( platformExtensionsSize );
+	OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, platformExtensionsSize, &szPlatformExtensions[ 0 ], NULL),
+		"Getting CL_PLATFORM_EXTENSIONS Platform Info string ( ::clGetPlatformInfo() )" );
+
+	const int indent = countOf( "    CL_PLATFORM_EXTENSIONS: " );
+	std::cout << std::left << std::setw( indent ) << "    CL_PLATFORM_PROFILE: " << &szPlatformProfile[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_PLATFORM_VERSION: " << &szPlatformVersion[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_PLATFORM_NAME: " << &szPlatformName[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_PLATFORM_VENDOR: " << &szPlatformVendor[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_PLATFORM_EXTENSIONS: " << &szPlatformExtensions[ 0 ] << std::endl;
+	std::cout << std::right << std::endl;
+}
+
+void prettyPrintDeviceInfo( const cl_device_id& dId )
+{
+	size_t deviceNameSize	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, 0, NULL, &deviceNameSize ),
+		"Getting CL_DEVICE_NAME Platform Info string size ( ::clGetDeviceInfo() )" );
+
+	std::vector< char > szDeviceName( deviceNameSize );
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, deviceNameSize, &szDeviceName[ 0 ], NULL ),
+		"Getting CL_DEVICE_NAME Platform Info string ( ::clGetDeviceInfo() )" );
+
+	size_t deviceVersionSize	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ),
+		"Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
+
+	std::vector< char > szDeviceVersion( deviceVersionSize );
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ),
+		"Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
+
+	size_t driverVersionSize	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, 0, NULL, &driverVersionSize ),
+		"Getting CL_DRIVER_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
+
+	std::vector< char > szDriverVersion( driverVersionSize );
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, driverVersionSize, &szDriverVersion[ 0 ], NULL ),
+		"Getting CL_DRIVER_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
+
+	size_t openCLVersionSize	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &openCLVersionSize ),
+		"Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
+
+	std::vector< char > szOpenCLVersion( openCLVersionSize );
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, openCLVersionSize, &szOpenCLVersion[ 0 ], NULL ),
+		"Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
+
+	cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_TYPE, sizeof( cl_device_type ), &devType, NULL ),
+		"Getting CL_DEVICE_TYPE device info ( ::clGetDeviceInfo() )" );
+
+	cl_uint devAddrBits = 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_ADDRESS_BITS, sizeof( cl_uint ), &devAddrBits, NULL ),
+		"Getting CL_DEVICE_ADDRESS_BITS device info ( ::clGetDeviceInfo() )" );
+
+	cl_uint maxClockFreq = 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof( cl_uint ), &maxClockFreq, NULL ),
+		"Getting CL_DEVICE_MAX_CLOCK_FREQUENCY device info ( ::clGetDeviceInfo() )" );
+
+	cl_bool devAvailable = CL_FALSE;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_AVAILABLE, sizeof( cl_bool ), &devAvailable, NULL ),
+		"Getting CL_DEVICE_AVAILABLE device info ( ::clGetDeviceInfo() )" );
+
+	cl_bool devCompAvailable = CL_FALSE;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_COMPILER_AVAILABLE, sizeof( cl_bool ), &devCompAvailable, NULL ),
+		"Getting CL_DEVICE_COMPILER_AVAILABLE device info ( ::clGetDeviceInfo() )" );
+
+	size_t devMaxWorkGroup	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size_t ), &devMaxWorkGroup, NULL ),
+		"Getting CL_DEVICE_MAX_WORK_GROUP_SIZE device info ( ::clGetDeviceInfo() )" );
+
+	cl_uint devMaxWorkItemDim = CL_FALSE;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &devMaxWorkItemDim, NULL ),
+		"Getting CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS device info ( ::clGetDeviceInfo() )" );
+
+	std::vector< size_t >	devMaxWorkItemSizes( devMaxWorkItemDim );
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( size_t )*devMaxWorkItemSizes.size( ), &devMaxWorkItemSizes[0], NULL),
+		"Getting CL_DEVICE_MAX_WORK_ITEM_SIZES device info ( ::clGetDeviceInfo() )" );
+
+	cl_bool deviceHostUnified = 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( cl_bool ), &deviceHostUnified, NULL ),
+		"Getting CL_DEVICE_HOST_UNIFIED_MEMORY Platform Info string ( ::clGetDeviceInfo() )" );
+
+	cl_ulong devMaxConstantBuffer	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( cl_ulong ), &devMaxConstantBuffer, NULL ),
+		"Getting CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE device info ( ::clGetDeviceInfo() )" );
+
+	cl_ulong devLocalMemSize	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( cl_ulong ), &devLocalMemSize, NULL ),
+		"Getting CL_DEVICE_LOCAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" );
+
+	cl_ulong deviceGlobalMemSize = 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( cl_ulong ), &deviceGlobalMemSize, NULL ),
+		"Getting CL_DEVICE_GLOBAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" );
+
+	cl_ulong deviceMaxMemAllocSize = 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &deviceMaxMemAllocSize, NULL ),
+		"Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" );
+
+	size_t deviceExtSize	= 0;
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ),
+		"Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )" );
+
+	std::vector< char > szDeviceExt( deviceExtSize );
+	OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ),
+		"Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )" );
+
+	const int indent = countOf( "    CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " );
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_NAME: " << &szDeviceName[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_VERSION: " << &szDeviceVersion[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DRIVER_VERSION: " << &szDriverVersion[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_TYPE: "
+		<< (CL_DEVICE_TYPE_DEFAULT     & devType ? "default"     : "")
+		<< (CL_DEVICE_TYPE_CPU         & devType ? "CPU"         : "")
+		<< (CL_DEVICE_TYPE_GPU         & devType ? "GPU"         : "")
+		<< (CL_DEVICE_TYPE_ACCELERATOR & devType ? "Accelerator" : "")
+		<< std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_MAX_CLOCK_FREQUENCY: " << maxClockFreq << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_ADDRESS_BITS: " << devAddrBits << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_AVAILABLE: " << ( devAvailable ? "TRUE": "FALSE") << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_COMPILER_AVAILABLE: " << ( devCompAvailable ? "TRUE": "FALSE") << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_OPENCL_C_VERSION: " << &szOpenCLVersion[ 0 ] << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_MAX_WORK_GROUP_SIZE: " << devMaxWorkGroup << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " << devMaxWorkItemDim << std::endl;
+	for( cl_uint wis = 0; wis < devMaxWorkItemSizes.size( ); ++wis )
+	{
+		std::stringstream dimString;
+		dimString << "Dimension[ " << wis << " ]  ";
+		std::cout << std::right << std::setw( indent ) << dimString.str( ) << devMaxWorkItemSizes[wis] << std::endl;
+	}
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_HOST_UNIFIED_MEMORY: " << ( deviceHostUnified ? "TRUE": "FALSE") << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: " << devMaxConstantBuffer;
+	std::cout << " ( " << devMaxConstantBuffer / 1024 << " KB )" << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_LOCAL_MEM_SIZE: " << devLocalMemSize;
+	std::cout << " ( " << devLocalMemSize / 1024 << " KB )" << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_GLOBAL_MEM_SIZE: " << deviceGlobalMemSize;
+	std::cout << " ( " << deviceGlobalMemSize / 1048576 << " MB )" << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_MAX_MEM_ALLOC_SIZE: " << deviceMaxMemAllocSize;
+	std::cout << " ( " << deviceMaxMemAllocSize / 1048576 << " MB )" << std::endl;
+	std::cout << std::left << std::setw( indent ) << "    CL_DEVICE_EXTENSIONS: " << &szDeviceExt[ 0 ] << std::endl;
+
+	std::cout << std::right << std::endl;
+}
+
+//	Verify a failed condition; return true on fail
+inline cl_bool OPENCL_V_FAIL( cl_int res )
+{
+	if( res == CL_SUCCESS )
+		return CL_FALSE;
+	else
+		return CL_TRUE;
+}
+
+std::string prettyPrintclFFTStatus( const cl_int& status )
+{
+	switch( status )
+	{
+		case CLFFT_INVALID_GLOBAL_WORK_SIZE:
+			return "CLFFT_INVALID_GLOBAL_WORK_SIZE";
+		case CLFFT_INVALID_MIP_LEVEL:
+			return "CLFFT_INVALID_MIP_LEVEL";
+		case CLFFT_INVALID_BUFFER_SIZE:
+			return "CLFFT_INVALID_BUFFER_SIZE";
+		case CLFFT_INVALID_GL_OBJECT:
+			return "CLFFT_INVALID_GL_OBJECT";
+		case CLFFT_INVALID_OPERATION:
+			return "CLFFT_INVALID_OPERATION";
+		case CLFFT_INVALID_EVENT:
+			return "CLFFT_INVALID_EVENT";
+		case CLFFT_INVALID_EVENT_WAIT_LIST:
+			return "CLFFT_INVALID_EVENT_WAIT_LIST";
+		case CLFFT_INVALID_GLOBAL_OFFSET:
+			return "CLFFT_INVALID_GLOBAL_OFFSET";
+		case CLFFT_INVALID_WORK_ITEM_SIZE:
+			return "CLFFT_INVALID_WORK_ITEM_SIZE";
+		case CLFFT_INVALID_WORK_GROUP_SIZE:
+			return "CLFFT_INVALID_WORK_GROUP_SIZE";
+		case CLFFT_INVALID_WORK_DIMENSION:
+			return "CLFFT_INVALID_WORK_DIMENSION";
+		case CLFFT_INVALID_KERNEL_ARGS:
+			return "CLFFT_INVALID_KERNEL_ARGS";
+		case CLFFT_INVALID_ARG_SIZE:
+			return "CLFFT_INVALID_ARG_SIZE";
+		case CLFFT_INVALID_ARG_VALUE:
+			return "CLFFT_INVALID_ARG_VALUE";
+		case CLFFT_INVALID_ARG_INDEX:
+			return "CLFFT_INVALID_ARG_INDEX";
+		case CLFFT_INVALID_KERNEL:
+			return "CLFFT_INVALID_KERNEL";
+		case CLFFT_INVALID_KERNEL_DEFINITION:
+			return "CLFFT_INVALID_KERNEL_DEFINITION";
+		case CLFFT_INVALID_KERNEL_NAME:
+			return "CLFFT_INVALID_KERNEL_NAME";
+		case CLFFT_INVALID_PROGRAM_EXECUTABLE:
+			return "CLFFT_INVALID_PROGRAM_EXECUTABLE";
+		case CLFFT_INVALID_PROGRAM:
+			return "CLFFT_INVALID_PROGRAM";
+		case CLFFT_INVALID_BUILD_OPTIONS:
+			return "CLFFT_INVALID_BUILD_OPTIONS";
+		case CLFFT_INVALID_BINARY:
+			return "CLFFT_INVALID_BINARY";
+		case CLFFT_INVALID_SAMPLER:
+			return "CLFFT_INVALID_SAMPLER";
+		case CLFFT_INVALID_IMAGE_SIZE:
+			return "CLFFT_INVALID_IMAGE_SIZE";
+		case CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR:
+			return "CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+		case CLFFT_INVALID_MEM_OBJECT:
+			return "CLFFT_INVALID_MEM_OBJECT";
+		case CLFFT_INVALID_HOST_PTR:
+			return "CLFFT_INVALID_HOST_PTR";
+		case CLFFT_INVALID_COMMAND_QUEUE:
+			return "CLFFT_INVALID_COMMAND_QUEUE";
+		case CLFFT_INVALID_QUEUE_PROPERTIES:
+			return "CLFFT_INVALID_QUEUE_PROPERTIES";
+		case CLFFT_INVALID_CONTEXT:
+			return "CLFFT_INVALID_CONTEXT";
+		case CLFFT_INVALID_DEVICE:
+			return "CLFFT_INVALID_DEVICE";
+		case CLFFT_INVALID_PLATFORM:
+			return "CLFFT_INVALID_PLATFORM";
+		case CLFFT_INVALID_DEVICE_TYPE:
+			return "CLFFT_INVALID_DEVICE_TYPE";
+		case CLFFT_INVALID_VALUE:
+			return "CLFFT_INVALID_VALUE";
+		case CLFFT_MAP_FAILURE:
+			return "CLFFT_MAP_FAILURE";
+		case CLFFT_BUILD_PROGRAM_FAILURE:
+			return "CLFFT_BUILD_PROGRAM_FAILURE";
+		case CLFFT_IMAGE_FORMAT_NOT_SUPPORTED:
+			return "CLFFT_IMAGE_FORMAT_NOT_SUPPORTED";
+		case CLFFT_IMAGE_FORMAT_MISMATCH:
+			return "CLFFT_IMAGE_FORMAT_MISMATCH";
+		case CLFFT_MEM_COPY_OVERLAP:
+			return "CLFFT_MEM_COPY_OVERLAP";
+		case CLFFT_PROFILING_INFO_NOT_AVAILABLE:
+			return "CLFFT_PROFILING_INFO_NOT_AVAILABLE";
+		case CLFFT_OUT_OF_HOST_MEMORY:
+			return "CLFFT_OUT_OF_HOST_MEMORY";
+		case CLFFT_OUT_OF_RESOURCES:
+			return "CLFFT_OUT_OF_RESOURCES";
+		case CLFFT_MEM_OBJECT_ALLOCATION_FAILURE:
+			return "CLFFT_MEM_OBJECT_ALLOCATION_FAILURE";
+		case CLFFT_COMPILER_NOT_AVAILABLE:
+			return "CLFFT_COMPILER_NOT_AVAILABLE";
+		case CLFFT_DEVICE_NOT_AVAILABLE:
+			return "CLFFT_DEVICE_NOT_AVAILABLE";
+		case CLFFT_DEVICE_NOT_FOUND:
+			return "CLFFT_DEVICE_NOT_FOUND";
+		case CLFFT_SUCCESS:
+			return "CLFFT_SUCCESS";
+		case CLFFT_NOTIMPLEMENTED:
+			return "CLFFT_NOTIMPLEMENTED";
+		case CLFFT_FILE_NOT_FOUND:
+			return "CLFFT_FILE_NOT_FOUND";
+		case CLFFT_FILE_CREATE_FAILURE:
+			return "CLFFT_FILE_CREATE_FAILURE";
+		case CLFFT_VERSION_MISMATCH:
+			return "CLFFT_VERSION_MISMATCH";
+		case CLFFT_INVALID_PLAN:
+			return "CLFFT_INVALID_PLAN";
+		default:
+			return "Error code not defined";
+		break;
+	}
+}
+
+std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
+										  cl_uint deviceGpuList,
+										  cl_context& context,
+										  bool printclInfo )
+{
+	cl_int status = 0;
+
+	/*
+		* Have a look at the available platforms and pick either
+		* the AMD one if available or a reasonable default.
+		*/
+
+	cl_uint numPlatforms	= 0;
+	cl_platform_id platform = NULL;
+	OPENCL_V_THROW( ::clGetPlatformIDs( 0, NULL, &numPlatforms ),
+			"Getting number of platforms( ::clGetPlatformsIDs() )" );
+
+	if( numPlatforms > 0 )
+	{
+		std::vector< cl_platform_id > platforms( numPlatforms );
+		OPENCL_V_THROW( ::clGetPlatformIDs( numPlatforms, &platforms[ 0 ], NULL ),
+			"Getting Platform Id's ( ::clGetPlatformsIDs() )" );
+
+		//	TODO: How should we determine what platform to choose?  We are just defaulting to the last one reported, as we
+		//	print out the info
+		for( unsigned int i=0; i < numPlatforms; ++i )
+		{
+			if( printclInfo )
+			{
+				std::cout << "OpenCL platform [ " << i << " ]:" << std::endl;
+				prettyPrintPlatformInfo( platforms[i] );
+			}
+
+			platform = platforms[i];
+		}
+	}
+
+	if( NULL == platform )
+	{
+		throw std::runtime_error( "No appropriate OpenCL platform could be found" );
+	}
+
+	/*
+	 * If we could find our platform, use it. Otherwise use just available platform.
+	 */
+
+	//	Get the device list for this type.
+	//
+	cl_uint num_devices = 0;
+	OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, 0, NULL, &num_devices ),
+		"Getting OpenCL devices ( ::clGetDeviceIDs() )" );
+	if( 0 == num_devices )
+	{
+		OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
+	}
+
+	std::vector< cl_device_id > deviceIDs( num_devices );
+	OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, num_devices, &deviceIDs[0], NULL),
+		"Getting OpenCL deviceIDs ( ::clGetDeviceIDs() )" );
+
+	if( (CL_DEVICE_TYPE_GPU == deviceType) && (~cl_uint(0) != deviceGpuList) )
+	{
+		//	The command line options specify to user certain gpu(s)
+		//
+		for( unsigned u = (unsigned) deviceIDs.size(); u-- > 0; )
+		{
+			if( 0 != (deviceGpuList & (1<<u) ) )
+				continue;
+
+			//  Remove this GPU from the list
+			deviceIDs[u] = deviceIDs.back();
+			deviceIDs.pop_back();
+		}
+	}
+
+	if( 0 == deviceIDs.size( ) )
+	{
+		OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
+	}
+
+	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
+
+	/////////////////////////////////////////////////////////////////
+	// Create an OpenCL context
+	/////////////////////////////////////////////////////////////////
+	context = clCreateContext( cps,
+							   (cl_uint) deviceIDs.size(),
+							   & deviceIDs[0],
+							   NULL,
+							   NULL,
+							   &status);
+	OPENCL_V_THROW( status, "Creating Context ( ::clCreateContextFromType() )" );
+
+	/* First, get the size of device list data */
+	size_t deviceListSize;
+	OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize ),
+		"Getting device array size ( ::clGetContextInfo() )" );
+
+	/////////////////////////////////////////////////////////////////
+	// Detect OpenCL devices
+	/////////////////////////////////////////////////////////////////
+	std::vector< cl_device_id > devices( deviceListSize/sizeof( cl_device_id ) );
+
+	/* Now, get the device list data */
+	OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, &devices[ 0 ], NULL ),
+		"Getting device array ( ::clGetContextInfo() )" );
+
+	if( printclInfo )
+	{
+		cl_uint cContextDevices	= 0;
+
+		size_t deviceVersionSize	= 0;
+		OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ),
+			"Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
+
+		std::vector< char > szDeviceVersion( deviceVersionSize );
+		OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ),
+			"Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
+
+		char openclstr[11]="OpenCL 1.0";
+
+		if (!strncmp((const char*)&szDeviceVersion[ 0 ], openclstr, 10))
+		{
+			cContextDevices	= 1;
+		}
+		else
+		{
+			OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_NUM_DEVICES, sizeof( cContextDevices ), &cContextDevices, NULL ),
+				"Getting number of context devices ( ::clGetContextInfo() )" );
+		}
+
+		for( cl_uint i = 0; i < cContextDevices; ++i )
+		{
+			std::cout << "OpenCL devices [ " << i << " ]:" << std::endl;
+			prettyPrintDeviceInfo( devices[i] );
+		}
+	}
+
+	return devices;
+}
+
+int cleanupCL( cl_context* context, cl_command_queue* commandQueue,
+	const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent )
+{
+	if( *outEvent != NULL )
+		OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" );
+
+	releaseOpenCLMemBuffer( numBuffersIn, inputBuffer);
+	releaseOpenCLMemBuffer( numBuffersOut, outputBuffer);
+
+	if( *commandQueue != NULL )
+		OPENCL_V_THROW( clReleaseCommandQueue( *commandQueue ), "Error: In clReleaseCommandQueue\n" );
+
+	if( *context != NULL )
+		OPENCL_V_THROW( clReleaseContext( *context ), "Error: In clReleaseContext\n" );
+
+	return 0;
+}
+
+int createOpenCLMemoryBuffer( cl_context& context, const size_t bufferSizeBytes, const cl_uint numBuffers, cl_mem buffer[], cl_mem_flags accessibility) {
+	cl_int status = 0;
+
+	for( cl_uint i = 0; i < numBuffers; ++i )
+	{
+		buffer[ i ] = ::clCreateBuffer( context, accessibility, bufferSizeBytes, NULL, &status);
+		OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
+	}
+
+	return 0;
+}
+
+int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[])
+{
+	for( cl_uint i = 0; i < numBuffers; ++i )
+	{
+		if( buffer[ i ] != NULL )
+			OPENCL_V_THROW( clReleaseMemObject( buffer[ i ] ), "Error: In clReleaseMemObject\n" );
+	}
+
+	return 0;
+}
+
+void createOpenCLCommandQueue( cl_context& context,
+							   cl_uint commandQueueFlags,
+							   cl_command_queue& commandQueue,
+							   std::vector< cl_device_id > devices,
+							   const size_t bufferSizeBytesIn,
+							   const cl_uint numBuffersIn,
+							   cl_mem clMemBufferIn[],
+							   const size_t bufferSizeBytesOut,
+							   const cl_uint numBuffersOut,
+							   cl_mem clMemBufferOut[] )
+{
+	cl_int status = 0;
+	commandQueue = ::clCreateCommandQueue( context, devices[0], commandQueueFlags, &status );
+	OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" );
+
+	createOpenCLMemoryBuffer( context, bufferSizeBytesIn,  numBuffersIn,  clMemBufferIn,  CL_MEM_READ_WRITE);
+	createOpenCLMemoryBuffer( context, bufferSizeBytesOut, numBuffersOut, clMemBufferOut, CL_MEM_READ_WRITE);
+}
+
diff --git a/src/client/openCL.misc.h b/src/client/openCL.misc.h
new file mode 100644
index 00000000..f7f6c202
--- /dev/null
+++ b/src/client/openCL.misc.h
@@ -0,0 +1,110 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( OPENCL_MISC_H )
+#define OPENCL_MISC_H
+#include <memory>
+#include <stdexcept>
+#include "unicode.compatibility.h"
+
+//	Creating a portable defintion of countof
+#if defined( _WIN32 )
+	#define countOf _countof
+#else
+	#define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) )
+#endif
+
+/*
+ * \brief OpenCL related initialization
+ *        Create Context, Device list
+ *        Load CL file, compile, link CL source
+ *		  Build program and kernel objects
+ */
+std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
+										  cl_uint deviceGpuList,
+										  cl_context& context,
+										  bool printclInfo );
+
+/*
+ * \brief OpenCL memory buffer creation
+ */
+int createOpenCLMemoryBuffer(
+		cl_context& context,
+		const size_t bufferSizeBytes,
+		const cl_uint numBuffers,
+		cl_mem buffer[],
+		cl_mem_flags accessibility
+		);
+
+/*
+ * \brief OpenCL command queue creation
+ *        Create Command Queue
+ *        Create OpenCL memory buffer objects
+ */
+void createOpenCLCommandQueue( cl_context& context,
+							   cl_uint commandQueueFlags,
+							   cl_command_queue& commandQueue,
+							   std::vector< cl_device_id > devices,
+							   const size_t bufferSizeBytesIn,
+							   const cl_uint numBuffersIn,
+							   cl_mem clMemBufferIn[],
+							   const size_t bufferSizeBytesOut,
+							   const cl_uint numBuffersOut,
+							   cl_mem clMemBufferOut[] );
+
+/*
+ * \brief release OpenCL memory buffer
+ */
+int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] );
+
+std::string prettyPrintclFFTStatus( const cl_int& status );
+
+//	This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
+//	If an error occurs, we throw.
+//	Note: std::runtime_error does not take unicode strings as input, so only strings supported
+inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno )
+{
+	switch( res )
+	{
+		case	CL_SUCCESS:		/**< No error */
+			break;
+		default:
+		{
+			std::stringstream tmp;
+			tmp << "OPENCL_V_THROWERROR< ";
+			tmp << prettyPrintclFFTStatus( res );
+			tmp << " > (";
+			tmp << lineno;
+			tmp << "): ";
+			tmp << msg;
+			std::string errorm (tmp.str());
+			std::cout << errorm<< std::endl;
+			throw	std::runtime_error( errorm );
+		}
+	}
+
+	return	res;
+}
+#define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__)
+
+/*
+ * \brief Release OpenCL resources (Context, Memory etc.)
+ */
+int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent );
+
+#endif
diff --git a/src/client/stdafx.cpp b/src/client/stdafx.cpp
new file mode 100644
index 00000000..2587b2c1
--- /dev/null
+++ b/src/client/stdafx.cpp
@@ -0,0 +1,25 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// stdafx.cpp : source file that includes just the standard includes
+// clFFT.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/src/include/clAmdFft.h b/src/include/clAmdFft.h
new file mode 100644
index 00000000..848d0760
--- /dev/null
+++ b/src/include/clAmdFft.h
@@ -0,0 +1,535 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+/*! @file clAmdFft.h
+ * /note clAmdFft.h is a deprecated header file.  
+ * This header is provided to help projects that were written with the older clAmdFft codebase, to help them 
+ * port to the new API at their own schedule.  It will not be maintained or updated, and will be removed after 
+ * a reasonable amount of time has passed.  All new code should be written against clFFT.h.  
+ * Older projects should migrate to the new header at their earliest convenience.
+ */
+
+#pragma once
+#if !defined( CLAMDFFT_DOTH )
+#define CLAMDFFT_DOTH
+
+#include "clFFT.h"
+
+/* The following header defines a fixed version number as this header is deprecated and won't be updated */
+#include "clAmdFft.version.h"
+
+/*	In general, you can not use namespaces for strict C compliance, so we prefix our public accessible names
+ *	with the string clAmdFft
+ */
+
+/*	All functions will return pre-defined error codes, and will NOT throw exceptions to the caller
+ */
+
+/*!  @brief clAmdFft error codes definition, incorporating OpenCL error definitions
+ *
+ *   This enumeration is a superset of the OpenCL error codes.  For example, CL_OUT_OF_HOST_MEMORY,
+ *   which is defined in cl.h is aliased as CLFFT_OUT_OF_HOST_MEMORY.  The set of basic OpenCL
+ *   error codes is extended to add extra values specific to the clAmdFft package.
+ */
+typedef enum clfftStatus_ clAmdFftStatus;
+
+/*!  @brief The dimension of the input and output buffers that will be fed into all FFT transforms */
+typedef enum clfftDim_ clAmdFftDim;
+
+/*!  @brief These are the expected layouts of the buffers */
+typedef enum clfftLayout_ clAmdFftLayout;
+
+/*!  @brief This is the expected precision of each FFT.
+ */
+typedef enum clfftPrecision_ clAmdFftPrecision;
+
+/*!  @brief What is the expected direction of each FFT, time or the frequency domains */
+typedef enum clfftDirection_ clAmdFftDirection;
+
+/*!  @brief Are the input buffers overwritten with the results */
+typedef enum clfftResultLocation_ clAmdFftResultLocation;
+
+/*! @brief This determines whether the result is returned in original order. It is valid only for
+dimensions greater than 1. */
+typedef enum clfftResultTransposed_ clAmdFftResultTransposed;
+
+/*! @brief Data structure that can be passed to clAmdFftSetup() to control the behavior of the FFT runtime
+ *  @details This structure contains values that can be initialized before instantiation of the FFT runtime
+ *  with ::clAmdFftSetup().  To initialize this structure, pass a pointer to a user struct to ::clAmdFftInitSetupData( ),
+ *  which will clear the structure and set the version member variables to the current values.
+ */
+typedef struct clfftSetupData_ clAmdFftSetupData;
+
+/*!  @brief An abstract handle to the object that represents the state of the FFT(s) */
+typedef clfftPlanHandle clAmdFftPlanHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+	/*! @brief Initialize an clAmdFftSetupData struct for the client
+	 *  @details clAmdFftSetupData is passed to clAmdFftSetup to control behavior of the FFT runtime
+	 *  @param[out] setupData Data structure is cleared, initialized with version information and default values
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftInitSetupData( clAmdFftSetupData* setupData )
+	{
+		return clfftInitSetupData( setupData );
+	}
+
+	/*! @brief Initialize internal FFT resources.
+	 *  @details AMD's FFT implementation caches kernels, programs and buffers for its internal use.
+	 *  @param[in] setupData Data structure that can be passed into the setup routine to control FFT generation behavior
+	 * 	and debug functionality
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftSetup( const clAmdFftSetupData* setupData )
+	{
+		return clfftSetup( setupData );
+	}
+
+	/*! @brief Release all internal resources.
+	 *  @details Call when client is done with this FFT library, allowing the library to destroy all resources it has cached
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftTeardown( )
+	{
+		return clfftTeardown( );
+	}
+
+	/*! @brief Query the FFT library for version information
+	 *  @details Return the major, minor and patch version numbers associated with this FFT library
+	 *  @param[out] major Major functionality change
+	 *  @param[out] minor Minor functionality change
+	 *  @param[out] patch Bug fixes, documentation changes, no new features introduced
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch )
+	{
+		return clfftGetVersion( major, minor, patch );
+	}
+
+	/*! @brief Create a plan object initialized entirely with default values.
+	 *  @details A plan is a repository of state for calculating FFT's.  Allows the runtime to pre-calculate kernels, programs
+	 * 	and buffers and associate them with buffers of specified dimensions.
+	 *  @param[out] plHandle Handle to the newly created plan
+	 *  @param[in] context Client is responsible for providing an OpenCL context for the plan
+	 *  @param[in] dim The dimensionality of the FFT transform; describes how many elements are in the array
+	 *  @param[in] clLengths An array of lengths, of size 'dim'.  Each value describes the length of additional dimensions
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftCreateDefaultPlan( clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim,
+								const size_t* clLengths )
+	{
+		return clfftCreateDefaultPlan( plHandle, context, dim, clLengths );
+	}
+
+	/*! @brief Create a copy of an existing plan.
+	 *  @details This API allows a client to create a new plan based upon an existing plan.  This is a convenience function
+	 *  provided for quickly creating plans that are similar, but may differ slightly.
+	 *  @param[out] out_plHandle Handle to the newly created plan that is based on in_plHandle
+	 *  @param[in] new_context Client is responsible for providing a new context for the new plan
+	 *  @param[in] in_plHandle Handle to a plan to be copied, previously created
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftCopyPlan( clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle )
+	{
+		return clfftCopyPlan( out_plHandle, new_context, in_plHandle );
+	}
+
+	/*! @brief Prepare the plan for execution.
+	 *  @details After all plan parameters are set, the client has the option of 'baking' the plan, which tells the runtime that
+	 *  no more changes to the plan's parameters are expected, and the OpenCL kernels should be compiled.  This optional function
+	 *  allows the client application to perform this function when the application is being initialized instead of on the first
+	 *  execution.
+	 *  At this point, the clAmdFft runtime will apply all implimented optimizations, possibly including
+	 *  running kernel experiments on the devices in the plan context.
+	 *  <p>  Users should assume that this function will take a long time to execute.  If a plan is not baked before being executed,
+	 *  users should assume that the first call to clAmdFftEnqueueTransform will take a long time to execute.
+	 *  <p>  If any significant parameter of a plan is changed after the plan is baked (by a subsequent call to one of
+	 *  the clAmdFftSetPlan____ functions), that will not be considered an error.  Instead, the plan will revert back to
+	 *  the unbaked state, discarding the benefits of the baking operation.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] numQueues Number of command queues in commQueueFFT; 0 is a valid value, in which case client does not want
+	 * 	the runtime to run load experiments and only pre-calculate state information
+	 *  @param[in] commQueueFFT An array of cl_command_queues created by the client; the command queues must be a proper subset of
+	 * 	the devices included in the plan context
+	 *  @param[in] pfn_notify A function pointer to a notification routine. The notification routine is a callback function that
+	 *  an application can register and which will be called when the program executable has been built (successfully or unsuccessfully).
+	 *  Currently, this parameter MUST be NULL or nullptr.
+	 *  @param[in] user_data Passed as an argument when pfn_notify is called.
+	 *  Currently, this parameter MUST be NULL or nullptr.
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftBakePlan( clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT,
+							void (CL_CALLBACK *pfn_notify)(clAmdFftPlanHandle plHandle, void *user_data), void* user_data )
+	{
+		return clfftBakePlan( plHandle, numQueues, commQueueFFT, pfn_notify, user_data );
+	}
+
+	/*! @brief Release the resources of a plan.
+	 *  @details A plan may include kernels, programs and buffers associated with it that consume memory.  When a plan
+	 *  is not needed anymore, the client should release the plan.
+	 *  @param[in,out] plHandle Handle to a plan previously created
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftDestroyPlan( clAmdFftPlanHandle* plHandle )
+	{
+		return clfftDestroyPlan( plHandle );
+	}
+
+	/*! @brief Retrieve the OpenCL context of a previously created plan.
+	 *  @details User should pass a reference to an cl_context variable, which will be changed to point to a
+	 *  context set in the specified plan.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] context Reference to user allocated cl_context, which will point to context set in plan
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanContext( const clAmdFftPlanHandle plHandle, cl_context* context )
+	{
+		return clfftGetPlanContext( plHandle, context );
+	}
+
+	/*! @brief Retrieve the floating point precision of the FFT data
+	 *  @details User should pass a reference to an clAmdFftPrecision variable, which will be set to the
+	 *  precision of the FFT complex data in the plan.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] precision Reference to user clAmdFftPrecision enum
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanPrecision( const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision )
+	{
+		return clfftGetPlanPrecision( plHandle, precision );
+	}
+
+	/*! @brief Set the floating point precision of the FFT data
+	 *  @details Set the plan property which will be the precision of the FFT complex data in the plan.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] precision Reference to user clAmdFftPrecision enum
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanPrecision( clAmdFftPlanHandle plHandle, clAmdFftPrecision precision )
+	{
+		return clfftSetPlanPrecision( plHandle, precision );
+	}
+
+	/*! @brief Retrieve the scaling factor that should be applied to the FFT data
+	 *  @details User should pass a reference to an cl_float variable, which will be set to the
+	 *  floating point scaling factor that will be multiplied across the FFT data.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dir Which direction does the scaling factor apply to
+	 *  @param[out] scale Reference to user cl_float variable
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanScale( const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale )
+	{
+		return clfftGetPlanScale( plHandle, dir, scale );
+	}
+
+	/*! @brief Set the scaling factor that should be applied to the FFT data
+	 *  @details Set the plan property which will be the floating point scaling factor that will be
+	 *  multiplied across the FFT data.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dir Which direction does the scaling factor apply to
+	 *  @param[in] scale Reference to user cl_float variable
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanScale( clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale )
+	{
+		return clfftSetPlanScale( plHandle, dir, scale );
+	}
+
+	/*! @brief Retrieve the number of discrete arrays that this plan can handle concurrently
+	 *  @details User should pass a reference to an cl_uint variable, which will be set to the
+	 *  number of discrete arrays (1D or 2D) that will be batched together for this plan
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] batchSize How many discrete number of FFT's are to be performed
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanBatchSize( const clAmdFftPlanHandle plHandle, size_t* batchSize )
+	{
+		return clfftGetPlanBatchSize( plHandle, batchSize );
+	}
+
+	/*! @brief Set the number of discrete arrays that this plan can handle concurrently
+	 *  @details Set the plan property which will be set to the number of discrete arrays (1D or 2D)
+	 *  that will be batched together for this plan
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] batchSize How many discrete number of FFT's are to be performed
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanBatchSize( clAmdFftPlanHandle plHandle, size_t batchSize )
+	{
+		return clfftSetPlanBatchSize( plHandle, batchSize );
+	}
+
+	/*! @brief Retrieve the dimensionality of FFT's to be transformed in the plan
+	 *  @details Queries a plan object and retrieves the dimensionality that the plan is set for.  A size is returned to
+	 *  help the client allocate the proper storage to hold the dimensions in a further call to clAmdFftGetPlanLength
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] dim The dimensionality of the FFT's to be transformed
+	 *  @param[out] size Value used to allocate an array to hold the FFT dimensions.
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanDim( const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size )
+	{
+		return clfftGetPlanDim( plHandle, dim, size );
+	}
+
+	/*! @brief Set the dimensionality of FFT's to be transformed by the plan
+	 *  @details Set the dimensionality of FFT's to be transformed by the plan
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimensionality of the FFT's to be transformed
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanDim( clAmdFftPlanHandle plHandle, const clAmdFftDim dim )
+	{
+		return clfftSetPlanDim( plHandle, dim );
+	}
+
+	/*! @brief Retrieve the length of each dimension of the FFT
+	 *  @details User should pass a reference to a size_t array, which will be set to the
+	 *  length of each discrete dimension of the FFT
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the length parameters; describes how many elements are in the array
+	 *  @param[out] clLengths An array of lengths, of size 'dim'.  Each array value describes the length of each dimension
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanLength( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths )
+	{
+		return clfftGetPlanLength( plHandle, dim, clLengths );
+	}
+
+	/*! @brief Set the length of each dimension of the FFT
+	 *  @details Set the plan property which will be the length of each discrete dimension of the FFT
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the length parameters; describes how many elements are in the array
+	 *  @param[in] clLengths An array of lengths, of size 'dim'.  Each value describes the length of additional dimensions
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanLength( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths )
+	{
+		return clfftSetPlanLength( plHandle, dim, clLengths );
+	}
+
+	/*! @brief Retrieve the distance between consecutive elements for input buffers in a dimension.
+	 *  @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely
+	 *  ignored
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[out] clStrides An array of strides, of size 'dim'.
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanInStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides )
+	{
+		return clfftGetPlanInStride( plHandle, dim, clStrides );
+	}
+
+	/*! @brief Set the distance between consecutive elements for input buffers in a dimension.
+	 *  @details Set the plan properties which will be the distance between elements in a given dimension
+	 *  (units are in terms of clAmdFftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[in] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously.
+	 * 	Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored in packed format.
+	 *  See  @ref DistanceStridesandPitches for details.
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanInStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides )
+	{
+		return clfftSetPlanInStride( plHandle, dim, clStrides );
+	}
+
+	/*! @brief Retrieve the distance between consecutive elements for output buffers in a dimension.
+	 *  @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely
+	 *  ignored
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[out] clStrides An array of strides, of size 'dim'.
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanOutStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides )
+	{
+		return clfftGetPlanOutStride( plHandle, dim, clStrides );
+	}
+
+	/*! @brief Set the distance between consecutive elements for output buffers in a dimension.
+	 *  @details Set the plan properties which will be the distance between elements in a given dimension
+	 *  (units are in terms of clAmdFftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[in] clStrides An array of strides, of size 'dim'.  Usually strideX=1 so that successive elements in the first dimension are stored contiguously.
+	 * 	Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored in packed format.
+	 *  @sa clAmdFftSetPlanInStride
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanOutStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides )
+	{
+		return clfftSetPlanOutStride( plHandle, dim, clStrides );
+	}
+
+	/*! @brief Retrieve the distance between Array objects
+	 *  @details Pitch is the distance between each discrete array object in an FFT array. This is only used
+	 *  for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input.
+	 *  For contiguous arrays in memory, iDist=(strideX*strideY*strideZ)
+	 *  @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output.
+	 *  For contiguous arrays in memory, oDist=(strideX*strideY*strideZ)
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanDistance( const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist )
+	{
+		return clfftGetPlanDistance( plHandle, iDist, oDist );
+	}
+
+	/*! @brief Set the distance between Array objects
+	 *  @details Pitch is the distance between each discrete array object in an FFT array. This is only used
+	 *  for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input.
+	 *  For contiguous arrays in memory, iDist=(strideX*strideY*strideZ)
+	 *  @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output.
+	 *  For contiguous arrays in memory, oDist=(strideX*strideY*strideZ)
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanDistance( clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist )
+	{
+		return clfftSetPlanDistance( plHandle, iDist, oDist );
+	}
+
+	/*! @brief Retrieve the expected layout of the input and output buffers
+	 *  @details Output buffers can be filled with either hermitian or complex numbers.  Complex numbers can be stored
+	 *  in various layouts; this informs the FFT engine what layout to produce on output
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] iLayout Indicates how the input buffers are laid out in memory
+	 *  @param[out] oLayout Indicates how the output buffers are laid out in memory
+	 */
+	__inline clAmdFftStatus clAmdFftGetLayout( const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout )
+	{
+		return clfftGetLayout( plHandle, iLayout, oLayout );
+	}
+
+	/*! @brief Set the expected layout of the input and output buffers
+	 *  @details Output buffers can be filled with either hermitian or complex numbers.  Complex numbers can be stored
+	 *  in various layouts; this informs the FFT engine what layout to produce on output
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] iLayout Indicates how the input buffers are laid out in memory
+	 *  @param[in] oLayout Indicates how the output buffers are laid out in memory
+	 */
+	__inline clAmdFftStatus clAmdFftSetLayout( clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout )
+	{
+		return clfftSetLayout( plHandle, iLayout, oLayout );
+	}
+
+	/*! @brief Retrieve whether the input buffers are going to be overwritten with results
+	 *  @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the
+	 *  transform.  If the setting is for out-of-place transforms, the engine knows to look for separate output buffers
+	 *  on the Enqueue call.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results
+	 */
+	__inline clAmdFftStatus clAmdFftGetResultLocation( const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness )
+	{
+		return clfftGetResultLocation( plHandle, placeness );
+	}
+
+	/*! @brief Set whether the input buffers are going to be overwritten with results
+	 *  @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the
+	 *  transform.  If the setting is for out-of-place transforms, the engine knows to look for separate output buffers
+	 *  on the Enqueue call.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results
+	 */
+	__inline clAmdFftStatus clAmdFftSetResultLocation( clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness )
+	{
+		return clfftSetResultLocation( plHandle, placeness );
+	}
+
+	/*! @brief Retrieve the final transpose setting of a muti-dimensional FFT
+	 *  @details A multi-dimensional FFT typically transposes the data several times during calculation.  If the client
+	 *  does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped
+	 *  for possible speed improvements
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] transposed Parameter specifies whether the final transpose can be skipped
+	 */
+	__inline clAmdFftStatus clAmdFftGetPlanTransposeResult( const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed * transposed )
+	{
+		return clfftGetPlanTransposeResult( plHandle, transposed );
+	}
+
+	/*! @brief Set the final transpose setting of a muti-dimensional FFT
+	 *  @details A multi-dimensional FFT typically transposes the data several times during calculation.  If the client
+	 *  does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped
+	 *  for possible speed improvements
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] transposed Parameter specifies whether the final transpose can be skipped
+	 */
+	__inline clAmdFftStatus clAmdFftSetPlanTransposeResult( clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed )
+	{
+		return clfftSetPlanTransposeResult( plHandle, transposed );
+	}
+
+	/*! @brief Get buffer size (in bytes), which may be needed internally for an intermediate buffer
+	 *  @details Very large FFT transforms may need multiple passes, and the operation would need a temporary buffer to hold
+	 *  intermediate results. This function is only valid after the plan is baked, otherwise an invalid operation error
+	 *  is returned. If buffersize returns as 0, the runtime needs no temporary buffer.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] buffersize Size in bytes for intermediate buffer
+	 */
+	__inline clAmdFftStatus clAmdFftGetTmpBufSize( const clAmdFftPlanHandle plHandle, size_t* buffersize )
+	{
+		return clfftGetTmpBufSize( plHandle, buffersize );
+	}
+
+	/*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking)
+	 *  @details This transform API is the function that actually computes the FFT transfrom. It is non-blocking as it
+	 *  only enqueues the OpenCL kernels for execution. The synchronization step has to be managed by the user.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dir Forwards or backwards transform
+	 *  @param[in] numQueuesAndEvents Number of command queues in commQueues; number of expected events to be returned in outEvents
+	 *  @param[in] commQueues An array of cl_command_queues created by the client; the command queues must be a proper subset of
+	 * 	the devices included in the plan context
+	 *  @param[in] numWaitEvents Specify the number of elements in the eventWaitList array
+	 *  @param[in] waitEvents Events that this transform should wait to complete before executing on the device
+	 *  @param[out] outEvents The runtime fills this array with events corresponding 1 to 1 with the input command queues passed
+	 *	in commQueues.  This parameter can be NULL or nullptr, in which case client is not interested in receiving notifications
+	 *	when transforms are finished, otherwise if not NULL the client is responsible for allocating this array, with at least
+	 *	as many elements as specified in numQueuesAndEvents.
+	 *  @param[in] inputBuffers An array of cl_mem objects that contain data for processing by the FFT runtime.  If the transform
+	 *  is in place, the FFT results will overwrite the input buffers
+	 *  @param[out] outputBuffers An array of cl_mem objects that will store the results of out of place transforms.  If the transform
+	 *  is in place, this parameter may be NULL or nullptr.  It is completely ignored
+	 *  @param[in] tmpBuffer A cl_mem object that is reserved as a temporary buffer for FFT processing. If clTmpBuffers is NULL or nullptr,
+	 *  and the runtime needs temporary storage, an internal temporary buffer will be created on the fly managed by the runtime.
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clAmdFftStatus clAmdFftEnqueueTransform(
+												clAmdFftPlanHandle plHandle,
+												clAmdFftDirection dir,
+												cl_uint numQueuesAndEvents,
+												cl_command_queue* commQueues,
+												cl_uint numWaitEvents,
+												const cl_event* waitEvents,
+												cl_event* outEvents,
+												cl_mem* inputBuffers,
+												cl_mem* outputBuffers,
+												cl_mem tmpBuffer
+												)
+	{
+		return clfftEnqueueTransform( plHandle, dir, numQueuesAndEvents, commQueues, numWaitEvents, waitEvents, outEvents, 
+			inputBuffers, outputBuffers, tmpBuffer );
+	}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/include/clAmdFft.version.h b/src/include/clAmdFft.version.h
new file mode 100644
index 00000000..ec9ef542
--- /dev/null
+++ b/src/include/clAmdFft.version.h
@@ -0,0 +1,29 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+/*! @file clAmdFft.version.h
+ * /note clAmdFft.version.h is a deprecated header file.  
+ * This header is provided to help projects that were written with the older clAmdFft codebase, to help them 
+ * port to the new API at their own schedule.  It will not be maintained or updated, and will be removed after 
+ * a reasonable amount of time has passed.  All new code should be written against clFFT.h.  
+ * Older projects should migrate to the new header at their earliest convenience.
+ */
+
+/* the configured version and settings for clFFT
+ */
+#define clAmdFftVersionMajor 2
+#define clAmdFftVersionMinor 0
+#define clAmdFftVersionPatch 0
diff --git a/src/include/clFFT.h b/src/include/clFFT.h
new file mode 100644
index 00000000..f75ded30
--- /dev/null
+++ b/src/include/clFFT.h
@@ -0,0 +1,580 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+/*! @file clFFT.h
+ * clFFT.h defines all of the public interfaces and types that are meant to be used by clFFT clients
+ * This is the one public header file that should be consumed by clFFT clients.  It is written to adhere to native "C"
+ * interfaces to make clFFT library as portable as possible; it should be callable from C, C++, .NET and Fortran,
+ * either with the proper linking or using wrapper classes.
+ *
+ */
+
+#pragma once
+#if !defined( CLFFT_H )
+#define CLFFT_H
+
+#if defined(__APPLE__) || defined(__MACOSX)
+	#include <OpenCL/cl.h>
+#else
+	#include <CL/cl.h>
+#endif
+
+#include "version.h"
+
+/*! This preprocessor definition is the standard way of making exporting APIs
+ *  from a DLL simpler. All files within this DLL are compiled with the CLFFT_EXPORTS
+ *  symbol defined on the command line. This symbol should not be defined on any project
+ *  that uses this DLL. This way any other project whose source files include this file see
+ *  clfft functions as being imported from a DLL, whereas this DLL sees symbols
+ *  defined with this macro as being exported.
+ */
+#if defined( _WIN32 )
+	#if !defined( __cplusplus )
+		#define inline __inline
+	#endif
+
+	#if defined( CLFFT_EXPORTS )
+		#define CLFFTAPI __declspec( dllexport )
+	#else
+		#define CLFFTAPI __declspec( dllimport )
+	#endif
+#else
+	#define CLFFTAPI
+#endif
+
+/*	In general, you can not use namespaces for strict C compliance, so we prefix our public accessible names
+ *	with the string clfft
+ */
+
+/*	All functions will return pre-defined error codes, and will NOT throw exceptions to the caller
+ */
+
+/*!  @brief clfft error codes definition, incorporating OpenCL error definitions
+ *
+ *   This enumeration is a superset of the OpenCL error codes.  For example, CL_OUT_OF_HOST_MEMORY,
+ *   which is defined in cl.h is aliased as CLFFT_OUT_OF_HOST_MEMORY.  The set of basic OpenCL
+ *   error codes is extended to add extra values specific to the clfft package.
+ */
+enum clfftStatus_
+{
+	CLFFT_INVALID_GLOBAL_WORK_SIZE			= CL_INVALID_GLOBAL_WORK_SIZE,
+	CLFFT_INVALID_MIP_LEVEL					= CL_INVALID_MIP_LEVEL,
+	CLFFT_INVALID_BUFFER_SIZE				= CL_INVALID_BUFFER_SIZE,
+	CLFFT_INVALID_GL_OBJECT					= CL_INVALID_GL_OBJECT,
+	CLFFT_INVALID_OPERATION					= CL_INVALID_OPERATION,
+	CLFFT_INVALID_EVENT						= CL_INVALID_EVENT,
+	CLFFT_INVALID_EVENT_WAIT_LIST			= CL_INVALID_EVENT_WAIT_LIST,
+	CLFFT_INVALID_GLOBAL_OFFSET				= CL_INVALID_GLOBAL_OFFSET,
+	CLFFT_INVALID_WORK_ITEM_SIZE			= CL_INVALID_WORK_ITEM_SIZE,
+	CLFFT_INVALID_WORK_GROUP_SIZE			= CL_INVALID_WORK_GROUP_SIZE,
+	CLFFT_INVALID_WORK_DIMENSION			= CL_INVALID_WORK_DIMENSION,
+	CLFFT_INVALID_KERNEL_ARGS				= CL_INVALID_KERNEL_ARGS,
+	CLFFT_INVALID_ARG_SIZE					= CL_INVALID_ARG_SIZE,
+	CLFFT_INVALID_ARG_VALUE					= CL_INVALID_ARG_VALUE,
+	CLFFT_INVALID_ARG_INDEX					= CL_INVALID_ARG_INDEX,
+	CLFFT_INVALID_KERNEL					= CL_INVALID_KERNEL,
+	CLFFT_INVALID_KERNEL_DEFINITION			= CL_INVALID_KERNEL_DEFINITION,
+	CLFFT_INVALID_KERNEL_NAME				= CL_INVALID_KERNEL_NAME,
+	CLFFT_INVALID_PROGRAM_EXECUTABLE		= CL_INVALID_PROGRAM_EXECUTABLE,
+	CLFFT_INVALID_PROGRAM					= CL_INVALID_PROGRAM,
+	CLFFT_INVALID_BUILD_OPTIONS				= CL_INVALID_BUILD_OPTIONS,
+	CLFFT_INVALID_BINARY					= CL_INVALID_BINARY,
+	CLFFT_INVALID_SAMPLER					= CL_INVALID_SAMPLER,
+	CLFFT_INVALID_IMAGE_SIZE				= CL_INVALID_IMAGE_SIZE,
+	CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR	= CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+	CLFFT_INVALID_MEM_OBJECT				= CL_INVALID_MEM_OBJECT,
+	CLFFT_INVALID_HOST_PTR					= CL_INVALID_HOST_PTR,
+	CLFFT_INVALID_COMMAND_QUEUE				= CL_INVALID_COMMAND_QUEUE,
+	CLFFT_INVALID_QUEUE_PROPERTIES			= CL_INVALID_QUEUE_PROPERTIES,
+	CLFFT_INVALID_CONTEXT					= CL_INVALID_CONTEXT,
+	CLFFT_INVALID_DEVICE					= CL_INVALID_DEVICE,
+	CLFFT_INVALID_PLATFORM					= CL_INVALID_PLATFORM,
+	CLFFT_INVALID_DEVICE_TYPE				= CL_INVALID_DEVICE_TYPE,
+	CLFFT_INVALID_VALUE						= CL_INVALID_VALUE,
+	CLFFT_MAP_FAILURE						= CL_MAP_FAILURE,
+	CLFFT_BUILD_PROGRAM_FAILURE				= CL_BUILD_PROGRAM_FAILURE,
+	CLFFT_IMAGE_FORMAT_NOT_SUPPORTED		= CL_IMAGE_FORMAT_NOT_SUPPORTED,
+	CLFFT_IMAGE_FORMAT_MISMATCH				= CL_IMAGE_FORMAT_MISMATCH,
+	CLFFT_MEM_COPY_OVERLAP					= CL_MEM_COPY_OVERLAP,
+	CLFFT_PROFILING_INFO_NOT_AVAILABLE		= CL_PROFILING_INFO_NOT_AVAILABLE,
+	CLFFT_OUT_OF_HOST_MEMORY				= CL_OUT_OF_HOST_MEMORY,
+	CLFFT_OUT_OF_RESOURCES					= CL_OUT_OF_RESOURCES,
+	CLFFT_MEM_OBJECT_ALLOCATION_FAILURE		= CL_MEM_OBJECT_ALLOCATION_FAILURE,
+	CLFFT_COMPILER_NOT_AVAILABLE			= CL_COMPILER_NOT_AVAILABLE,
+	CLFFT_DEVICE_NOT_AVAILABLE				= CL_DEVICE_NOT_AVAILABLE,
+	CLFFT_DEVICE_NOT_FOUND					= CL_DEVICE_NOT_FOUND,
+	CLFFT_SUCCESS							= CL_SUCCESS,
+	//-------------------------- Extended status codes for clfft ----------------------------------------
+	CLFFT_BUGCHECK =  4*1024,	/*!< Bugcheck. */
+	CLFFT_NOTIMPLEMENTED,		/*!< Functionality is not implemented yet. */
+	CLFFT_TRANSPOSED_NOTIMPLEMENTED, /*!< Transposed functionality is not implemented for this transformation. */
+	CLFFT_FILE_NOT_FOUND,		/*!< Tried to open an existing file on the host system, but failed. */
+	CLFFT_FILE_CREATE_FAILURE,	/*!< Tried to create a file on the host system, but failed. */
+	CLFFT_VERSION_MISMATCH,		/*!< Version conflict between client and library. */
+	CLFFT_INVALID_PLAN,			/*!< Requested plan could not be found. */
+	CLFFT_DEVICE_NO_DOUBLE,		/*!< Double precision not supported on this device. */
+	CLFFT_ENDSTATUS				/* This value will always be last, and marks the length of clfftStatus. */
+};
+typedef enum clfftStatus_ clfftStatus;
+
+/*!  @brief The dimension of the input and output buffers that will be fed into all FFT transforms */
+typedef enum clfftDim_
+{
+	CLFFT_1D		= 1,		/*!< 1 Dimensional FFT transform (default). */
+	CLFFT_2D,					/*!< 2 Dimensional FFT transform. */
+	CLFFT_3D,					/*!< 3 Dimensional FFT transform. */
+	ENDDIMENSION			/*!< This value will always be last, and marks the length of clfftDim. */
+} clfftDim;
+
+/*!  @brief These are the expected layouts of the buffers */
+typedef enum clfftLayout_
+{
+	CLFFT_COMPLEX_INTERLEAVED	= 1,	/*!< An array of complex numbers, with real and imaginary components together (default). */
+	CLFFT_COMPLEX_PLANAR,				/*!< Arrays of real componets and arrays of imaginary components that have been seperated out. */
+	CLFFT_HERMITIAN_INTERLEAVED,		/*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in same array. */
+	CLFFT_HERMITIAN_PLANAR,				/*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in separate arrays. */
+	CLFFT_REAL,							/*!< An array of real numbers, with no corresponding imaginary components. */
+	ENDLAYOUT			/*!< This value will always be last, and marks the length of clfftLayout. */
+} clfftLayout;
+
+/*!  @brief This is the expected precision of each FFT.
+ */
+typedef enum clfftPrecision_
+{
+	CLFFT_SINGLE	= 1,	/*!< An array of complex numbers, with real and imaginary components as floats (default). */
+	CLFFT_DOUBLE,			/*!< An array of complex numbers, with real and imaginary components as doubles. */
+	CLFFT_SINGLE_FAST,		/*!< Faster implementation preferred. */
+	CLFFT_DOUBLE_FAST,		/*!< Faster implementation preferred. */
+	ENDPRECISION	/*!< This value will always be last, and marks the length of clfftPrecision. */
+} clfftPrecision;
+
+/*!  @brief What is the expected direction of each FFT, time or the frequency domains */
+typedef enum clfftDirection_
+{
+	CLFFT_FORWARD	= -1,		/*!< FFT transform from the time to the frequency domain. */
+	CLFFT_BACKWARD	= 1,		/*!< FFT transform from the frequency to the time domain. */
+	CLFFT_MINUS		= -1,		/*!< Alias for the forward transform. */
+	CLFFT_PLUS		= 1,		/*!< Alias for the backward transform. */
+	ENDDIRECTION			/*!< This value will always be last, and marks the length of clfftDirection. */
+} clfftDirection;
+
+/*!  @brief Are the input buffers overwritten with the results */
+typedef enum clfftResultLocation_
+{
+	CLFFT_INPLACE		= 1,		/*!< The input and output buffers are the same (default). */
+	CLFFT_OUTOFPLACE,				/*!< Seperate input and output buffers. */
+	ENDPLACE				/*!< This value will always be last, and marks the length of clfftPlaceness. */
+} clfftResultLocation;
+
+/*! @brief This determines whether the result is returned in original order. It is valid only for
+dimensions greater than 1. */
+typedef enum clfftResultTransposed_ {
+	CLFFT_NOTRANSPOSE = 1,		/*!< The results are returned in the original preserved order (default) */
+	CLFFT_TRANSPOSED,			/*!< The result is transposed where transpose kernel is supported (possibly faster) */
+	ENDTRANSPOSED			/*!< This value will always be last, and marks the length of clfftResultTransposed */
+} clfftResultTransposed;
+
+/*! 	BitMasks to be used with clfftSetupData.debugFlags */
+#define CLFFT_DUMP_PROGRAMS 0x1
+
+/*! @brief Data structure that can be passed to clfftSetup() to control the behavior of the FFT runtime
+ *  @details This structure contains values that can be initialized before instantiation of the FFT runtime
+ *  with ::clfftSetup().  To initialize this structure, pass a pointer to a user struct to ::clfftInitSetupData( ),
+ *  which will clear the structure and set the version member variables to the current values.
+ */
+struct clfftSetupData_
+{
+	cl_uint major;		/*!< Major version number of the project; signifies major API changes. */
+	cl_uint minor;		/*!< Minor version number of the project; minor API changes that could break backwards compatibility. */
+	cl_uint patch;		/*!< Patch version number of the project; Always incrementing number, signifies change over time. */
+
+	/*! 	Bitwise flags that control the behavior of library debug logic. */
+	cl_ulong debugFlags;  /*! This should be set to zero, except when debugging the clfft library.
+	                       *  <p> debugFlags can be set to CLFFT_DUMP_PROGRAMS, in which case the dynamically generated OpenCL kernels will
+	                       *  be written to text files in the current working directory.  These files will have a *.cl suffix.
+	                       */
+};
+typedef struct clfftSetupData_ clfftSetupData;
+
+/*!  @brief An abstract handle to the object that represents the state of the FFT(s) */
+typedef size_t clfftPlanHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+	/*! @brief Initialize an clfftSetupData struct for the client
+	 *  @details clfftSetupData is passed to clfftSetup to control behavior of the FFT runtime
+	 *  @param[out] setupData Data structure is cleared, initialized with version information and default values
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	__inline clfftStatus clfftInitSetupData( clfftSetupData* setupData )
+	{
+		setupData->major	= clfftVersionMajor;
+		setupData->minor	= clfftVersionMinor;
+		setupData->patch	= clfftVersionPatch;
+		setupData->debugFlags	= 0;
+
+		return	CLFFT_SUCCESS;
+	}
+
+	/*! @brief Initialize internal FFT resources.
+	 *  @details AMD's FFT implementation caches kernels, programs and buffers for its internal use.
+	 *  @param[in] setupData Data structure that can be passed into the setup routine to control FFT generation behavior
+	 * 	and debug functionality
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftSetup( const clfftSetupData* setupData );
+
+	/*! @brief Release all internal resources.
+	 *  @details Call when client is done with this FFT library, allowing the library to destroy all resources it has cached
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftTeardown( );
+
+	/*! @brief Query the FFT library for version information
+	 *  @details Return the major, minor and patch version numbers associated with this FFT library
+	 *  @param[out] major Major functionality change
+	 *  @param[out] minor Minor functionality change
+	 *  @param[out] patch Bug fixes, documentation changes, no new features introduced
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch );
+
+	/*! @brief Create a plan object initialized entirely with default values.
+	 *  @details A plan is a repository of state for calculating FFT's.  Allows the runtime to pre-calculate kernels, programs
+	 * 	and buffers and associate them with buffers of specified dimensions.
+	 *  @param[out] plHandle Handle to the newly created plan
+	 *  @param[in] context Client is responsible for providing an OpenCL context for the plan
+	 *  @param[in] dim The dimensionality of the FFT transform; describes how many elements are in the array
+	 *  @param[in] clLengths An array of lengths, of size 'dim'.  Each value describes the length of additional dimensions
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftCreateDefaultPlan( clfftPlanHandle* plHandle, cl_context context, const clfftDim dim,
+								const size_t* clLengths );
+
+	/*! @brief Create a copy of an existing plan.
+	 *  @details This API allows a client to create a new plan based upon an existing plan.  This is a convenience function
+	 *  provided for quickly creating plans that are similar, but may differ slightly.
+	 *  @param[out] out_plHandle Handle to the newly created plan that is based on in_plHandle
+	 *  @param[in] new_context Client is responsible for providing a new context for the new plan
+	 *  @param[in] in_plHandle Handle to a plan to be copied, previously created
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftCopyPlan( clfftPlanHandle* out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle );
+
+	/*! @brief Prepare the plan for execution.
+	 *  @details After all plan parameters are set, the client has the option of 'baking' the plan, which tells the runtime that
+	 *  no more changes to the plan's parameters are expected, and the OpenCL kernels should be compiled.  This optional function
+	 *  allows the client application to perform this function when the application is being initialized instead of on the first
+	 *  execution.
+	 *  At this point, the clfft runtime will apply all implimented optimizations, possibly including
+	 *  running kernel experiments on the devices in the plan context.
+	 *  <p>  Users should assume that this function will take a long time to execute.  If a plan is not baked before being executed,
+	 *  users should assume that the first call to clfftEnqueueTransform will take a long time to execute.
+	 *  <p>  If any significant parameter of a plan is changed after the plan is baked (by a subsequent call to one of
+	 *  the clfftSetPlan____ functions), that will not be considered an error.  Instead, the plan will revert back to
+	 *  the unbaked state, discarding the benefits of the baking operation.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] numQueues Number of command queues in commQueueFFT; 0 is a valid value, in which case client does not want
+	 * 	the runtime to run load experiments and only pre-calculate state information
+	 *  @param[in] commQueueFFT An array of cl_command_queues created by the client; the command queues must be a proper subset of
+	 * 	the devices included in the plan context
+	 *  @param[in] pfn_notify A function pointer to a notification routine. The notification routine is a callback function that
+	 *  an application can register and which will be called when the program executable has been built (successfully or unsuccessfully).
+	 *  Currently, this parameter MUST be NULL or nullptr.
+	 *  @param[in] user_data Passed as an argument when pfn_notify is called.
+	 *  Currently, this parameter MUST be NULL or nullptr.
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT,
+							void (CL_CALLBACK *pfn_notify)(clfftPlanHandle plHandle, void *user_data), void* user_data );
+
+	/*! @brief Release the resources of a plan.
+	 *  @details A plan may include kernels, programs and buffers associated with it that consume memory.  When a plan
+	 *  is not needed anymore, the client should release the plan.
+	 *  @param[in,out] plHandle Handle to a plan previously created
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftDestroyPlan( clfftPlanHandle* plHandle );
+
+	/*! @brief Retrieve the OpenCL context of a previously created plan.
+	 *  @details User should pass a reference to an cl_context variable, which will be changed to point to a
+	 *  context set in the specified plan.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] context Reference to user allocated cl_context, which will point to context set in plan
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanContext( const clfftPlanHandle plHandle, cl_context* context );
+
+	/*! @brief Retrieve the floating point precision of the FFT data
+	 *  @details User should pass a reference to an clfftPrecision variable, which will be set to the
+	 *  precision of the FFT complex data in the plan.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] precision Reference to user clfftPrecision enum
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanPrecision( const clfftPlanHandle plHandle, clfftPrecision* precision );
+
+	/*! @brief Set the floating point precision of the FFT data
+	 *  @details Set the plan property which will be the precision of the FFT complex data in the plan.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] precision Reference to user clfftPrecision enum
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanPrecision( clfftPlanHandle plHandle, clfftPrecision precision );
+
+	/*! @brief Retrieve the scaling factor that should be applied to the FFT data
+	 *  @details User should pass a reference to an cl_float variable, which will be set to the
+	 *  floating point scaling factor that will be multiplied across the FFT data.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dir Which direction does the scaling factor apply to
+	 *  @param[out] scale Reference to user cl_float variable
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanScale( const clfftPlanHandle plHandle, clfftDirection dir, cl_float* scale );
+
+	/*! @brief Set the scaling factor that should be applied to the FFT data
+	 *  @details Set the plan property which will be the floating point scaling factor that will be
+	 *  multiplied across the FFT data.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dir Which direction does the scaling factor apply to
+	 *  @param[in] scale Reference to user cl_float variable
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanScale( clfftPlanHandle plHandle, clfftDirection dir, cl_float scale );
+
+	/*! @brief Retrieve the number of discrete arrays that this plan can handle concurrently
+	 *  @details User should pass a reference to an cl_uint variable, which will be set to the
+	 *  number of discrete arrays (1D or 2D) that will be batched together for this plan
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] batchSize How many discrete number of FFT's are to be performed
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanBatchSize( const clfftPlanHandle plHandle, size_t* batchSize );
+
+	/*! @brief Set the number of discrete arrays that this plan can handle concurrently
+	 *  @details Set the plan property which will be set to the number of discrete arrays (1D or 2D)
+	 *  that will be batched together for this plan
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] batchSize How many discrete number of FFT's are to be performed
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanBatchSize( clfftPlanHandle plHandle, size_t batchSize );
+
+	/*! @brief Retrieve the dimensionality of FFT's to be transformed in the plan
+	 *  @details Queries a plan object and retrieves the dimensionality that the plan is set for.  A size is returned to
+	 *  help the client allocate the proper storage to hold the dimensions in a further call to clfftGetPlanLength
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] dim The dimensionality of the FFT's to be transformed
+	 *  @param[out] size Value used to allocate an array to hold the FFT dimensions.
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanDim( const clfftPlanHandle plHandle, clfftDim* dim, cl_uint* size );
+
+	/*! @brief Set the dimensionality of FFT's to be transformed by the plan
+	 *  @details Set the dimensionality of FFT's to be transformed by the plan
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimensionality of the FFT's to be transformed
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanDim( clfftPlanHandle plHandle, const clfftDim dim );
+
+	/*! @brief Retrieve the length of each dimension of the FFT
+	 *  @details User should pass a reference to a size_t array, which will be set to the
+	 *  length of each discrete dimension of the FFT
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the length parameters; describes how many elements are in the array
+	 *  @param[out] clLengths An array of lengths, of size 'dim'.  Each array value describes the length of each dimension
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanLength( const clfftPlanHandle plHandle, const clfftDim dim, size_t* clLengths );
+
+	/*! @brief Set the length of each dimension of the FFT
+	 *  @details Set the plan property which will be the length of each discrete dimension of the FFT
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the length parameters; describes how many elements are in the array
+	 *  @param[in] clLengths An array of lengths, of size 'dim'.  Each value describes the length of additional dimensions
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanLength( clfftPlanHandle plHandle, const clfftDim dim, const size_t* clLengths );
+
+	/*! @brief Retrieve the distance between consecutive elements for input buffers in a dimension.
+	 *  @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely
+	 *  ignored
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[out] clStrides An array of strides, of size 'dim'.
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanInStride( const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides );
+
+	/*! @brief Set the distance between consecutive elements for input buffers in a dimension.
+	 *  @details Set the plan properties which will be the distance between elements in a given dimension
+	 *  (units are in terms of clfftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[in] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously.
+	 * 	Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored in packed format.
+	 *  See  @ref DistanceStridesandPitches for details.
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanInStride( clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides );
+
+	/*! @brief Retrieve the distance between consecutive elements for output buffers in a dimension.
+	 *  @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely
+	 *  ignored
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[out] clStrides An array of strides, of size 'dim'.
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanOutStride( const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides );
+
+	/*! @brief Set the distance between consecutive elements for output buffers in a dimension.
+	 *  @details Set the plan properties which will be the distance between elements in a given dimension
+	 *  (units are in terms of clfftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dim The dimension of the stride parameters; describes how many elements are in the array
+	 *  @param[in] clStrides An array of strides, of size 'dim'.  Usually strideX=1 so that successive elements in the first dimension are stored contiguously.
+	 * 	Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored in packed format.
+	 *  @sa clfftSetPlanInStride
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanOutStride( clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides );
+
+	/*! @brief Retrieve the distance between Array objects
+	 *  @details Pitch is the distance between each discrete array object in an FFT array. This is only used
+	 *  for 'array' dimensions in clfftDim; see clfftSetPlanDimension (units are in terms of clfftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input.
+	 *  For contiguous arrays in memory, iDist=(strideX*strideY*strideZ)
+	 *  @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output.
+	 *  For contiguous arrays in memory, oDist=(strideX*strideY*strideZ)
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanDistance( const clfftPlanHandle plHandle, size_t* iDist, size_t* oDist );
+
+	/*! @brief Set the distance between Array objects
+	 *  @details Pitch is the distance between each discrete array object in an FFT array. This is only used
+	 *  for 'array' dimensions in clfftDim; see clfftSetPlanDimension (units are in terms of clfftPrecision)
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input.
+	 *  For contiguous arrays in memory, iDist=(strideX*strideY*strideZ)
+	 *  @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output.
+	 *  For contiguous arrays in memory, oDist=(strideX*strideY*strideZ)
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanDistance( clfftPlanHandle plHandle, size_t iDist, size_t oDist );
+
+	/*! @brief Retrieve the expected layout of the input and output buffers
+	 *  @details Output buffers can be filled with either hermitian or complex numbers.  Complex numbers can be stored
+	 *  in various layouts; this informs the FFT engine what layout to produce on output
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] iLayout Indicates how the input buffers are laid out in memory
+	 *  @param[out] oLayout Indicates how the output buffers are laid out in memory
+	 */
+	CLFFTAPI clfftStatus	clfftGetLayout( const clfftPlanHandle plHandle, clfftLayout* iLayout, clfftLayout* oLayout );
+
+	/*! @brief Set the expected layout of the input and output buffers
+	 *  @details Output buffers can be filled with either hermitian or complex numbers.  Complex numbers can be stored
+	 *  in various layouts; this informs the FFT engine what layout to produce on output
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] iLayout Indicates how the input buffers are laid out in memory
+	 *  @param[in] oLayout Indicates how the output buffers are laid out in memory
+	 */
+	CLFFTAPI clfftStatus	clfftSetLayout( clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout );
+
+	/*! @brief Retrieve whether the input buffers are going to be overwritten with results
+	 *  @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the
+	 *  transform.  If the setting is for out-of-place transforms, the engine knows to look for separate output buffers
+	 *  on the Enqueue call.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results
+	 */
+	CLFFTAPI clfftStatus	clfftGetResultLocation( const clfftPlanHandle plHandle, clfftResultLocation* placeness );
+
+	/*! @brief Set whether the input buffers are going to be overwritten with results
+	 *  @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the
+	 *  transform.  If the setting is for out-of-place transforms, the engine knows to look for separate output buffers
+	 *  on the Enqueue call.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results
+	 */
+	CLFFTAPI clfftStatus	clfftSetResultLocation( clfftPlanHandle plHandle, clfftResultLocation placeness );
+
+	/*! @brief Retrieve the final transpose setting of a muti-dimensional FFT
+	 *  @details A multi-dimensional FFT typically transposes the data several times during calculation.  If the client
+	 *  does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped
+	 *  for possible speed improvements
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] transposed Parameter specifies whether the final transpose can be skipped
+	 */
+	CLFFTAPI clfftStatus	clfftGetPlanTransposeResult( const clfftPlanHandle plHandle, clfftResultTransposed * transposed );
+
+	/*! @brief Set the final transpose setting of a muti-dimensional FFT
+	 *  @details A multi-dimensional FFT typically transposes the data several times during calculation.  If the client
+	 *  does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped
+	 *  for possible speed improvements
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] transposed Parameter specifies whether the final transpose can be skipped
+	 */
+	CLFFTAPI clfftStatus	clfftSetPlanTransposeResult( clfftPlanHandle plHandle, clfftResultTransposed transposed );
+
+
+	/*! @brief Get buffer size (in bytes), which may be needed internally for an intermediate buffer
+	 *  @details Very large FFT transforms may need multiple passes, and the operation would need a temporary buffer to hold
+	 *  intermediate results. This function is only valid after the plan is baked, otherwise an invalid operation error
+	 *  is returned. If buffersize returns as 0, the runtime needs no temporary buffer.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[out] buffersize Size in bytes for intermediate buffer
+	 */
+	CLFFTAPI clfftStatus clfftGetTmpBufSize( const clfftPlanHandle plHandle, size_t* buffersize );
+
+	/*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking)
+	 *  @details This transform API is the function that actually computes the FFT transfrom. It is non-blocking as it
+	 *  only enqueues the OpenCL kernels for execution. The synchronization step has to be managed by the user.
+	 *  @param[in] plHandle Handle to a plan previously created
+	 *  @param[in] dir Forwards or backwards transform
+	 *  @param[in] numQueuesAndEvents Number of command queues in commQueues; number of expected events to be returned in outEvents
+	 *  @param[in] commQueues An array of cl_command_queues created by the client; the command queues must be a proper subset of
+	 * 	the devices included in the plan context
+	 *  @param[in] numWaitEvents Specify the number of elements in the eventWaitList array
+	 *  @param[in] waitEvents Events that this transform should wait to complete before executing on the device
+	 *  @param[out] outEvents The runtime fills this array with events corresponding 1 to 1 with the input command queues passed
+	 *	in commQueues.  This parameter can be NULL or nullptr, in which case client is not interested in receiving notifications
+	 *	when transforms are finished, otherwise if not NULL the client is responsible for allocating this array, with at least
+	 *	as many elements as specified in numQueuesAndEvents.
+	 *  @param[in] inputBuffers An array of cl_mem objects that contain data for processing by the FFT runtime.  If the transform
+	 *  is in place, the FFT results will overwrite the input buffers
+	 *  @param[out] outputBuffers An array of cl_mem objects that will store the results of out of place transforms.  If the transform
+	 *  is in place, this parameter may be NULL or nullptr.  It is completely ignored
+	 *  @param[in] tmpBuffer A cl_mem object that is reserved as a temporary buffer for FFT processing. If clTmpBuffers is NULL or nullptr,
+	 *  and the runtime needs temporary storage, an internal temporary buffer will be created on the fly managed by the runtime.
+	 *  @return Enum describing error condition; superset of OpenCL error codes
+	 */
+	CLFFTAPI clfftStatus	clfftEnqueueTransform(
+												clfftPlanHandle plHandle,
+												clfftDirection dir,
+												cl_uint numQueuesAndEvents,
+												cl_command_queue* commQueues,
+												cl_uint numWaitEvents,
+												const cl_event* waitEvents,
+												cl_event* outEvents,
+												cl_mem* inputBuffers,
+												cl_mem* outputBuffers,
+												cl_mem tmpBuffer
+												);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/include/convenienceFunctions.h b/src/include/convenienceFunctions.h
new file mode 100644
index 00000000..e32bd3fc
--- /dev/null
+++ b/src/include/convenienceFunctions.h
@@ -0,0 +1,28 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+/*****************************************************/
+template< typename T >
+unsigned int float_as_hex( T a ) {
+	return *(unsigned int*)&a;
+}
+
+/*****************************************************/
+template< typename T >
+T hex_as_float( unsigned int a ) {
+	return *(T*)&a;
+}
\ No newline at end of file
diff --git a/src/include/sharedLibrary.h b/src/include/sharedLibrary.h
new file mode 100644
index 00000000..9f34b3a1
--- /dev/null
+++ b/src/include/sharedLibrary.h
@@ -0,0 +1,90 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#ifndef _SHAREDLIBRARY_H_
+#define _SHAREDLIBRARY_H_
+#include <string>
+
+//	_WIN32 is defined for both 32 & 64 bit environments
+#if defined( _WIN32 )
+	#define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
+	// Windows Header Files:
+	#include <windows.h>
+#else
+	#include <dlfcn.h>
+#endif
+
+inline void* LoadSharedLibrary( std::string linuxPrefix, std::string libraryName, bool quiet )
+{
+#if defined( _WIN32 )
+	libraryName += ".dll";
+
+	//	HMODULE is actually the load address; function returns NULL if it cannot find the shared library
+	HMODULE fileHandle	= ::LoadLibraryExA( libraryName.c_str( ), NULL, NULL );
+#else
+	tstring linuxName = linuxPrefix;
+	linuxName += libraryName += ".so";
+	void* fileHandle = ::dlopen( linuxName.c_str( ), RTLD_NOW );
+	if( !quiet && !fileHandle )
+	{
+		std::cerr << ::dlerror( ) << std::endl;
+	}
+#endif
+
+	return fileHandle;
+}
+
+//	If the function succeeds, the return value is nonzero.
+//	If the function fails, the return value is zero.
+inline int FreeSharedLibrary( void*& libHandle )
+{
+	int result	= 0;
+
+#if defined( _WIN32 )
+	if( libHandle != 0 )
+		result = ::FreeLibrary( reinterpret_cast< HMODULE >( libHandle ) );
+#else
+	if( libHandle != 0 )
+		result = ( ::dlclose( libHandle ) == 0 );
+#endif
+
+	libHandle	= NULL;
+
+	return result;
+}
+
+//	This takes a shared module handle returned from LoadSharedLibrary, and a text string of a symbol
+//	to load from the module, and returns a pointer to that symbol.  If the symbol is not found, NULL
+//	is returned.  If the module handle is NULL, NULL is returned.
+inline void* LoadFunctionAddr( void* libHandle, std::string funcName )
+{
+	if( libHandle == NULL )
+		return NULL;
+
+#if defined( _WIN32 )
+	HMODULE fileHandle = reinterpret_cast< HMODULE >( libHandle );
+
+	void* pFunc	= ::GetProcAddress( fileHandle, funcName.c_str( ) );
+#else
+	void* pFunc = ::dlsym( libHandle, funcName.c_str( ) );
+#endif
+
+	return pFunc;
+}
+
+#endif // _SHAREDLIBRARY_H_
diff --git a/src/include/stdafx.h b/src/include/stdafx.h
new file mode 100644
index 00000000..5a8077bf
--- /dev/null
+++ b/src/include/stdafx.h
@@ -0,0 +1,49 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <iomanip>
+#include <cstring>
+#include <memory>
+#include <vector>
+#include <valarray>
+#include <cstring>
+#include <stdarg.h>
+#include <assert.h>
+#include <complex>
+
+//	_WIN32 is defined for both 32 & 64 bit environments
+#if defined( _WIN32 )
+	#include <tchar.h>
+	#include "targetver.h"
+
+	#define NOMINMAX
+	#define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
+	// Windows Header Files:
+	#include <windows.h>
+#endif
diff --git a/src/include/targetver.h b/src/include/targetver.h
new file mode 100644
index 00000000..7c05692e
--- /dev/null
+++ b/src/include/targetver.h
@@ -0,0 +1,25 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>
diff --git a/src/include/unicode.compatibility.h b/src/include/unicode.compatibility.h
new file mode 100644
index 00000000..56a365f9
--- /dev/null
+++ b/src/include/unicode.compatibility.h
@@ -0,0 +1,59 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( amd_unicode_h )
+#define amd_unicode_h
+
+//	Typedefs to support unicode and ansii compilation
+#if defined( _UNICODE )
+	typedef std::wstring		tstring;
+	typedef std::wstringstream	tstringstream;
+	typedef std::wifstream		tifstream;
+	typedef std::wofstream		tofstream;
+	typedef std::wfstream		tfstream;
+	static std::wostream&	tout	= std::wcout;
+	static std::wostream&	terr	= std::wcerr;
+#else
+	typedef std::string tstring;
+	typedef std::stringstream tstringstream;
+	typedef std::ifstream		tifstream;
+	typedef std::ofstream		tofstream;
+	typedef std::fstream		tfstream;
+	static std::ostream&	tout	= std::cout;
+	static std::ostream&	terr	= std::cerr;
+#endif
+
+//	These macros help linux cope with the conventions of windows tchar.h file
+#if defined( _WIN32 )
+	#include <tchar.h>
+	#include <windows.h>
+#else
+	#if defined( __GNUC__ )
+		typedef char TCHAR;
+		typedef char _TCHAR;
+		#define _tmain main
+
+		#if defined( UNICODE )
+			#define _T(x)	L ## x
+		#else
+			#define _T(x)	x
+		#endif
+	#endif
+#endif
+
+#endif
diff --git a/src/include/version.h.in b/src/include/version.h.in
new file mode 100644
index 00000000..343cd7e2
--- /dev/null
+++ b/src/include/version.h.in
@@ -0,0 +1,22 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+/* the configured version and settings for clFFT
+ */
+#define clfftVersionMajor @CLFFT_VERSION_MAJOR@
+#define clfftVersionMinor @CLFFT_VERSION_MINOR@
+#define clfftVersionPatch @CLFFT_VERSION_PATCH@
diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt
new file mode 100644
index 00000000..ea7637bf
--- /dev/null
+++ b/src/library/CMakeLists.txt
@@ -0,0 +1,102 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+# List the names of common files to compile across all platforms
+set( clFFT.Source	transform.cpp 
+								accessors.cpp 
+								plan.cpp 
+								repo.cpp 
+								generator.stockham.cpp 
+								generator.transpose.cpp 
+								generator.copy.cpp
+								lifetime.cpp 
+								stdafx.cpp )
+
+# Windows only uses dllmain
+if( MSVC )
+	set( clFFT.Source ${clFFT.Source} dllmain.cpp )
+endif( )
+
+set( clFFT.Headers	private.h 
+					repo.h 
+					plan.h 
+					lock.h 
+					mainpage.h  
+					generator.h 
+					generator.stockham.h 
+					generator.transpose.h 
+					../include/stdafx.h 
+					../include/unicode.compatibility.h 
+					../include/targetver.h 
+					../include/clAmdFft.h 
+					../include/clFFT.h )
+
+set( clFFT.Files ${clFFT.Source} ${clFFT.Headers} )
+
+# For a rainy day, add pre-compiled header support
+#if( MSVC )
+#	if (USE_MSVC_PCH)
+	
+#		set_source_files_properties(LungAnalysisPCH.cxx
+#			PROPERTIES
+#			COMPILE_FLAGS "/YcLungAnalysisPCH.h"
+#			)
+#		foreach( src_file ${UPMC_LA_SRCS} )
+#			set_source_files_properties(
+#				${src_file}
+#				PROPERTIES
+#				COMPILE_FLAGS "/YuLungAnalysisPCH.h"
+#				)
+#		endforeach( src_file ${UPMC_LA_SRCS} )
+		
+#		list(APPEND UPMC_LA_SRCS LungAnalysisPCH.cxx)
+#		list(APPEND UPMC_LA_HDRS LungAnalysisPCH.h)
+
+#	endif(USE_MSVC_PCH)
+#endif (MSVC)
+  
+#	add_definitions( ${Boost_LIB_DIAGNOSTIC_DEFINITIONS} )
+add_definitions( "/DCLFFT_EXPORTS" )
+
+# Include standard OpenCL headers
+include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include )
+
+add_library( clFFT SHARED ${clFFT.Files} )
+target_link_libraries( clFFT ${OPENCL_LIBRARIES} )
+
+set_target_properties( clFFT PROPERTIES VERSION ${CLFFT_VERSION} )
+set_target_properties( clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
+
+if( UNIX )
+	# Right now, linux has problems compiling dynamic_cast, but the flag below doesn't help
+	# set_target_properties( clFFT PROPERTIES COMPILE_FLAGS "-frtti" )
+endif( )
+
+if( BUILD64 )
+	# CPack configuration; include the executable into the package
+	install( TARGETS clFFT
+			RUNTIME DESTINATION bin64
+			LIBRARY DESTINATION lib64
+			ARCHIVE DESTINATION lib64/import
+			)
+else()
+	# CPack configuration; include the executable into the package
+	install( TARGETS clFFT
+			RUNTIME DESTINATION bin32
+			LIBRARY DESTINATION lib32
+			ARCHIVE DESTINATION lib32/import
+			)
+endif()
diff --git a/src/library/ReadMe.txt b/src/library/ReadMe.txt
new file mode 100644
index 00000000..72470754
--- /dev/null
+++ b/src/library/ReadMe.txt
@@ -0,0 +1,56 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+========================================================================
+    CONSOLE APPLICATION : AMD.clFFT Project Overview
+========================================================================
+
+AppWizard has created this AMD.clFFT application for you.
+
+This file contains a summary of what you will find in each of the files that
+make up your AMD.clFFT application.
+
+
+AMD.clFFT.vcxproj
+    This is the main project file for VC++ projects generated using an Application Wizard.
+    It contains information about the version of Visual C++ that generated the file, and
+    information about the platforms, configurations, and project features selected with the
+    Application Wizard.
+
+AMD.clFFT.vcxproj.filters
+    This is the filters file for VC++ projects generated using an Application Wizard. 
+    It contains information about the association between the files in your project 
+    and the filters. This association is used in the IDE to show grouping of files with
+    similar extensions under a specific node (for e.g. ".cpp" files are associated with the
+    "Source Files" filter).
+
+AMD.clFFT.cpp
+    This is the main application source file.
+
+/////////////////////////////////////////////////////////////////////////////
+Other standard files:
+
+StdAfx.h, StdAfx.cpp
+    These files are used to build a precompiled header (PCH) file
+    named AMD.clFFT.pch and a precompiled types file named StdAfx.obj.
+
+/////////////////////////////////////////////////////////////////////////////
+Other notes:
+
+AppWizard uses "TODO:" comments to indicate parts of the source code you
+should add to or customize.
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
new file mode 100644
index 00000000..8d6ce65f
--- /dev/null
+++ b/src/library/accessors.cpp
@@ -0,0 +1,826 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.accessors.cpp : Defines all the getters/setters for the Plan
+//
+
+#include "stdafx.h"
+#include "private.h"
+#include "repo.h"
+
+using std::vector;
+
+clfftStatus clfftGetPlanBatchSize( const clfftPlanHandle plHandle, size_t* batchsize )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanBatchSize" ) );
+
+	*batchsize   = fftPlan->batchsize;
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanBatchSize( clfftPlanHandle plHandle, size_t batchsize )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanBatchSize" ) );
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked		= false;
+	fftPlan->batchsize  = batchsize;
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanContext( const clfftPlanHandle plHandle, cl_context* context )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanContext" ) );
+
+	*context = fftPlan->context;
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanPrecision( const clfftPlanHandle plHandle, clfftPrecision* precision )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanPrecision" ) );
+
+	*precision	= fftPlan->precision;
+
+	return	CLFFT_SUCCESS;
+}
+
+// This is a helper function to query a device for it's caps and check whether a certain user supplied cap is present
+// Returns CLFFT_SUCCESS if the cap is present, CLFFT_INVALID_OPERATION if it is not found.  All devices specified
+// in the devices vector must contain the cap.
+clfftStatus checkDevExt( std::string cap, std::vector< cl_device_id >& devices )
+{
+	for( size_t d = 0; d < devices.size( ); ++d)
+	{
+		size_t deviceExtSize	= 0;
+		OPENCL_V( ::clGetDeviceInfo( devices[ d ], CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ),
+			"Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )" );
+
+		std::vector< char > szDeviceExt( deviceExtSize );
+		OPENCL_V( ::clGetDeviceInfo( devices[ d ], CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ),
+			"Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )" );
+
+		std::string strDeviceExt = &szDeviceExt[ 0 ];
+
+		if( strDeviceExt.find( cap.c_str( ), 0 ) == std::string::npos )
+			return CLFFT_DEVICE_NO_DOUBLE;
+	}
+
+	return CLFFT_SUCCESS;
+}
+clfftStatus clfftSetPlanPrecision( clfftPlanHandle plHandle, clfftPrecision precision )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanPrecision" ) );
+
+	if( precision >= ENDPRECISION )
+		return CLFFT_INVALID_ARG_VALUE;
+
+	//	We do not support CLFFT_*_FAST currently
+	if( precision == CLFFT_SINGLE_FAST || precision == CLFFT_DOUBLE_FAST )
+		return CLFFT_NOTIMPLEMENTED;
+
+	//	If the user specifies double precision, check that the device supports double precision first
+	if( precision == CLFFT_DOUBLE || precision == CLFFT_DOUBLE_FAST )
+	{
+		clfftStatus retAmdFp64 = checkDevExt( "cl_amd_fp64", fftPlan->devices );
+		if( retAmdFp64 != CLFFT_SUCCESS )
+		{
+			//	If AMD's extention is not supported, check for Khronos extention
+			clfftStatus retKhrFp64 = checkDevExt( "cl_khr_fp64", fftPlan->devices );
+			if( retKhrFp64 != CLFFT_SUCCESS )
+				return retKhrFp64;
+		}
+	}
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked		= false;
+	fftPlan->precision	= precision;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanScale( const clfftPlanHandle plHandle, clfftDirection dir, cl_float* scale )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanScale" ) );
+
+	if( dir >= ENDDIRECTION )
+		return CLFFT_INVALID_ARG_VALUE;
+
+	if( dir == CLFFT_FORWARD || dir == CLFFT_MINUS )
+		*scale = (cl_float)(fftPlan->forwardScale);
+	else
+		*scale = (cl_float)(fftPlan->backwardScale);
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanScale( clfftPlanHandle plHandle, clfftDirection dir, cl_float scale )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanScale" ) );
+
+	if( dir >= ENDDIRECTION )
+		return CLFFT_INVALID_ARG_VALUE;
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked		= false;
+
+	if( dir == CLFFT_FORWARD || dir == CLFFT_MINUS )
+		fftPlan->forwardScale = scale;
+	else
+		fftPlan->backwardScale = scale;
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanDim( const clfftPlanHandle plHandle, clfftDim* dim, cl_uint* size )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanDim" ) );
+
+	*dim		= fftPlan->dim;
+
+	switch( fftPlan->dim )
+	{
+		case CLFFT_1D:
+		{
+			*size = 1;
+		}
+			break;
+		case CLFFT_2D:
+		{
+			*size = 2;
+		}
+			break;
+		case CLFFT_3D:
+		{
+			*size = 3;
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanDim( clfftPlanHandle plHandle, const clfftDim dim )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanDim" ) );
+
+	//	We resize the vectors in the plan to keep their sizes consistent with the value of the dimension
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			fftPlan->length.resize( 1 );
+			fftPlan->inStride.resize( 1 );
+			fftPlan->outStride.resize( 1 );
+		}
+			break;
+		case CLFFT_2D:
+		{
+			fftPlan->length.resize( 2 );
+			fftPlan->inStride.resize( 2 );
+			fftPlan->outStride.resize( 2 );
+		}
+			break;
+		case CLFFT_3D:
+		{
+			fftPlan->length.resize( 3 );
+			fftPlan->inStride.resize( 3 );
+			fftPlan->outStride.resize( 3 );
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked	= false;
+	fftPlan->dim	= dim;
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanLength( const clfftPlanHandle plHandle, const clfftDim dim, size_t* clLengths )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanLength" ) );
+
+	if( clLengths == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	if( fftPlan->length.empty( ) )
+		return CLFFT_INVALID_ARG_INDEX;
+
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			clLengths[ DimX ] = fftPlan->length[ DimX ];
+		}
+			break;
+		case CLFFT_2D:
+		{
+			if( fftPlan->length.size( ) < 2 )
+				return CLFFT_INVALID_ARG_INDEX;
+
+			clLengths[ DimX ] = fftPlan->length[ DimX ];
+			clLengths[ DimY ] = fftPlan->length[ DimY ];
+		}
+			break;
+		case CLFFT_3D:
+        {
+			if( fftPlan->length.size( ) < 3 )
+				return CLFFT_INVALID_ARG_INDEX;
+
+			clLengths[ DimX ] = fftPlan->length[ DimX ];
+			clLengths[ DimY ] = fftPlan->length[ DimY ];
+			clLengths[ DimZ ] = fftPlan->length[ DimZ ];
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanLength( clfftPlanHandle plHandle, const clfftDim dim, const size_t* clLengths )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanLength" ) );
+
+	if( clLengths == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	//	Simplest to clear any previous contents, because it's valid for user to shrink dimension
+	fftPlan->length.clear( );
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			//	Minimum length size is 1
+			if( clLengths[ DimX ] == 0 )
+				return CLFFT_INVALID_ARG_VALUE;
+
+			if( !IsASupportedLength( clLengths[ DimX ] ) )
+				return CLFFT_NOTIMPLEMENTED;
+
+			fftPlan->length.push_back( clLengths[ DimX ] );
+		}
+			break;
+		case CLFFT_2D:
+		{
+			//	Minimum length size is 1
+			if( clLengths[ DimX ] == 0 || clLengths[ DimY ] == 0 )
+				return CLFFT_INVALID_ARG_VALUE;
+
+			if( !IsASupportedLength( clLengths[ DimX ] ) || !IsASupportedLength( clLengths[ DimY ] ) )
+			{
+				return CLFFT_NOTIMPLEMENTED;
+			}
+
+			fftPlan->length.push_back( clLengths[ DimX ] );
+			fftPlan->length.push_back( clLengths[ DimY ] );
+		}
+			break;
+		case CLFFT_3D:
+		{
+			//	Minimum length size is 1
+			if( clLengths[ DimX ] == 0 || clLengths[ DimY ] == 0 || clLengths[ DimZ ] == 0)
+				return CLFFT_INVALID_ARG_VALUE;
+
+			if( !IsASupportedLength( clLengths[ DimX ] ) || !IsASupportedLength( clLengths[ DimY ] ) ||
+				!IsASupportedLength( clLengths[ DimZ ] ) )
+			{
+				return CLFFT_NOTIMPLEMENTED;
+			}
+
+			fftPlan->length.push_back( clLengths[ DimX ] );
+			fftPlan->length.push_back( clLengths[ DimY ] );
+			fftPlan->length.push_back( clLengths[ DimZ ] );
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	fftPlan->dim = dim;
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked	= false;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanInStride( const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanInStride" ) );
+
+	if( clStrides == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			if( fftPlan->inStride.size( ) > 0 )
+				clStrides[ DimX ] = fftPlan->inStride[ DimX ];
+			else
+				return CLFFT_INVALID_ARG_INDEX;
+		}
+			break;
+		case CLFFT_2D:
+		{
+			if( fftPlan->inStride.size( ) > 1 )
+			{
+				clStrides[ DimX ] = fftPlan->inStride[ DimX ];
+				clStrides[ DimY ] = fftPlan->inStride[ DimY ];
+			}
+			else
+				return CLFFT_INVALID_ARG_INDEX;
+		}
+			break;
+		case CLFFT_3D:
+		{
+			if( fftPlan->inStride.size( ) > 2 )
+			{
+				clStrides[ DimX ] = fftPlan->inStride[ DimX ];
+				clStrides[ DimY ] = fftPlan->inStride[ DimY ];
+				clStrides[ DimZ ] = fftPlan->inStride[ DimZ ];
+			}
+			else
+				return CLFFT_INVALID_ARG_INDEX;
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanInStride( clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanInStride" ) );
+
+	if( clStrides == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	//	Simplest to clear any previous contents, because it's valid for user to shrink dimension
+	fftPlan->inStride.clear( );
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			fftPlan->inStride.push_back( clStrides[ DimX ] );
+		}
+			break;
+		case CLFFT_2D:
+		{
+			fftPlan->inStride.push_back( clStrides[ DimX ] );
+			fftPlan->inStride.push_back( clStrides[ DimY ] );
+		}
+			break;
+		case CLFFT_3D:
+		{
+			fftPlan->inStride.push_back( clStrides[ DimX ] );
+			fftPlan->inStride.push_back( clStrides[ DimY ] );
+			fftPlan->inStride.push_back( clStrides[ DimZ ] );
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked	= false;
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanOutStride( const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanOutStride" ) );
+
+	if( clStrides == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			if( fftPlan->outStride.size( ) > 0 )
+				clStrides[ DimX ] = fftPlan->outStride[ DimX ];
+			else
+				return CLFFT_INVALID_ARG_INDEX;
+		}
+			break;
+		case CLFFT_2D:
+		{
+			if( fftPlan->outStride.size( ) > 1 )
+			{
+				clStrides[ DimX ] = fftPlan->outStride[ DimX ];
+				clStrides[ DimY ] = fftPlan->outStride[ DimY ];
+			}
+			else
+				return CLFFT_INVALID_ARG_INDEX;
+		}
+			break;
+		case CLFFT_3D:
+		{
+			if( fftPlan->outStride.size( ) > 2 )
+			{
+				clStrides[ DimX ] = fftPlan->outStride[ DimX ];
+				clStrides[ DimY ] = fftPlan->outStride[ DimY ];
+				clStrides[ DimZ ] = fftPlan->outStride[ DimZ ];
+			}
+			else
+				return CLFFT_INVALID_ARG_INDEX;
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanOutStride( clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanOutStride" ) );
+
+	if( clStrides == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	//	Simplest to clear any previous contents, because it's valid for user to shrink dimension
+	fftPlan->outStride.clear( );
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			fftPlan->outStride.push_back( clStrides[ DimX ] );
+		}
+			break;
+		case CLFFT_2D:
+		{
+			fftPlan->outStride.push_back( clStrides[ DimX ] );
+			fftPlan->outStride.push_back( clStrides[ DimY ] );
+		}
+			break;
+		case CLFFT_3D:
+		{
+			fftPlan->outStride.push_back( clStrides[ DimX ] );
+			fftPlan->outStride.push_back( clStrides[ DimY ] );
+			fftPlan->outStride.push_back( clStrides[ DimZ ] );
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked	= false;
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetPlanDistance( const clfftPlanHandle plHandle, size_t* iDist, size_t* oDist )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanDistance" ) );
+
+	*iDist				= fftPlan->iDist;
+	*oDist				= fftPlan->oDist;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanDistance( clfftPlanHandle plHandle, size_t iDist, size_t oDist )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanDistance" ) );
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked	= false;
+	fftPlan->iDist	= iDist;
+	fftPlan->oDist	= oDist;
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetLayout( const clfftPlanHandle plHandle, clfftLayout* iLayout, clfftLayout* oLayout )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetLayout" ) );
+
+	*iLayout			= fftPlan->inputLayout;
+	*oLayout			= fftPlan->outputLayout;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetLayout( clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetLayout" ) );
+
+	//	Basic error checking on parameter
+	if( ( iLayout >= ENDLAYOUT ) || ( oLayout >= ENDLAYOUT ) )
+		return CLFFT_INVALID_ARG_VALUE;
+
+	//	We currently only support a subset of formats
+	switch( iLayout )
+	{
+		case CLFFT_COMPLEX_INTERLEAVED:
+			{
+				if( (oLayout == CLFFT_HERMITIAN_INTERLEAVED) || (oLayout == CLFFT_HERMITIAN_PLANAR) || (oLayout == CLFFT_REAL))
+					return CLFFT_NOTIMPLEMENTED;
+			}
+			break;
+		case CLFFT_COMPLEX_PLANAR:
+			{
+				if( (oLayout == CLFFT_HERMITIAN_INTERLEAVED) || (oLayout == CLFFT_HERMITIAN_PLANAR) || (oLayout == CLFFT_REAL))
+					return CLFFT_NOTIMPLEMENTED;
+			}
+			break;
+		case CLFFT_HERMITIAN_INTERLEAVED:
+			{
+				if(oLayout != CLFFT_REAL) return CLFFT_NOTIMPLEMENTED;
+			}
+			break;
+		case CLFFT_HERMITIAN_PLANAR:
+			{
+				if(oLayout != CLFFT_REAL) return CLFFT_NOTIMPLEMENTED;
+			}
+			break;
+		case CLFFT_REAL:
+			{
+				if((oLayout == CLFFT_REAL) || (oLayout == CLFFT_COMPLEX_INTERLEAVED) || (oLayout == CLFFT_COMPLEX_PLANAR))
+					return CLFFT_NOTIMPLEMENTED;
+			}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	//	We currently only support a subset of formats
+	switch( oLayout )
+	{
+		case CLFFT_COMPLEX_PLANAR:
+		case CLFFT_COMPLEX_INTERLEAVED:
+		case CLFFT_HERMITIAN_INTERLEAVED:
+		case CLFFT_HERMITIAN_PLANAR:
+		case CLFFT_REAL:
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked	= false;
+	fftPlan->inputLayout	= iLayout;
+	fftPlan->outputLayout	= oLayout;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetResultLocation( const clfftPlanHandle plHandle, clfftResultLocation* placeness )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetResultLocation" ) );
+
+	*placeness	= fftPlan->placeness;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetResultLocation( clfftPlanHandle plHandle, clfftResultLocation placeness )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetResultLocation" ) );
+
+	//	Basic error checking on parameter
+	if( placeness >= ENDPLACE )
+		return CLFFT_INVALID_ARG_VALUE;
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked		= false;
+	fftPlan->placeness	= placeness;
+
+	return	CLFFT_SUCCESS;
+}
+
+
+clfftStatus clfftGetPlanTransposeResult( const clfftPlanHandle plHandle, clfftResultTransposed * transposed )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetResultLocation" ) );
+
+	*transposed	= fftPlan->transposed;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftSetPlanTransposeResult( clfftPlanHandle plHandle, clfftResultTransposed transposed )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetResultLocation" ) );
+
+	//	Basic error checking on parameter
+	if( transposed >= ENDTRANSPOSED )
+		return CLFFT_INVALID_ARG_VALUE;
+
+	//	If we modify the state of the plan, we assume that we can't trust any pre-calculated contents anymore
+	fftPlan->baked		= false;
+	fftPlan->transposed	= transposed;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetTmpBufSize( const clfftPlanHandle plHandle, size_t* buffersize )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanBatchSize" ) );
+
+	if (fftPlan->baked == true)
+	{
+		*buffersize   = fftPlan->tmpBufSize;
+		return CLFFT_SUCCESS;
+	}
+
+	return CLFFT_INVALID_OPERATION;
+}
+
+clfftStatus clfftSetInternal( clfftPlanHandle plHandle, void* data )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetResultLocation" ) );
+
+	struct InternalData {
+		size_t					large1D_Xfactor;
+		size_t					cacheSize;
+		bool                    bLdsComplex;
+		bool                    ldsPadding;
+		unsigned                uLdsFraction;
+	} *mydata;
+
+	mydata = (InternalData *) data;
+
+	fftPlan->large1D_Xfactor = mydata->large1D_Xfactor;
+	fftPlan->cacheSize       = mydata->cacheSize;
+	fftPlan->bLdsComplex     = mydata->bLdsComplex;
+	fftPlan->ldsPadding      = mydata->ldsPadding;
+	fftPlan->uLdsFraction    = mydata->uLdsFraction;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftLocalMemSize( const clfftPlanHandle plHandle, cl_ulong* local_mem_size )
+{
+	FFTRepo& repo = FFTRepo::getInstance( );
+	FFTPlan* plan = NULL;
+	lockRAII* lock = NULL;
+
+	OPENCL_V( repo.getPlan( plHandle, plan, lock ), _T( "repo.getPlan failed" ) );
+	scopedLock sLock( *lock, _T( "clfftLocalMemSize" ) );
+
+	*local_mem_size = plan->envelope.limit_LocalMemSize;
+	return CLFFT_SUCCESS;
+}
\ No newline at end of file
diff --git a/src/library/dllmain.cpp b/src/library/dllmain.cpp
new file mode 100644
index 00000000..5d651328
--- /dev/null
+++ b/src/library/dllmain.cpp
@@ -0,0 +1,36 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// dllmain.cpp : Defines the entry point for the DLL application.
+#include "stdafx.h"
+
+BOOL APIENTRY DllMain( HMODULE hModule,
+					   DWORD  ul_reason_for_call,
+					   LPVOID lpReserved
+					 )
+{
+	switch (ul_reason_for_call)
+	{
+	case DLL_PROCESS_ATTACH:
+	case DLL_THREAD_ATTACH:
+	case DLL_THREAD_DETACH:
+	case DLL_PROCESS_DETACH:
+		break;
+	}
+	return TRUE;
+}
+
diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
new file mode 100644
index 00000000..e839ed8a
--- /dev/null
+++ b/src/library/generator.copy.cpp
@@ -0,0 +1,474 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include "stdafx.h"
+#include <math.h>
+#include <list>
+#include "generator.stockham.h"
+
+using namespace StockhamGenerator;
+
+namespace CopyGenerator
+{
+    // Copy kernel
+    template <Precision PR>
+    class CopyKernel
+    {
+        size_t N;
+		size_t Nt;
+		const FFTKernelGenKeyParams params;
+		bool h2c, c2h;
+
+		inline std::string OffsetCalc(const std::string &off, bool input = true)
+		{
+			std::string str;
+
+			const size_t *pStride = input ? params.fft_inStride : params.fft_outStride;
+
+			std::string batch = "batch";
+
+			switch(params.fft_DataDim)
+			{
+			case 5:
+				{
+					str += "\t{\n\tuint ocalc1 = ";
+					str += batch; str += "%"; str += SztToStr(params.fft_N[1] * params.fft_N[2] * params.fft_N[3]);
+					str += ";\n";
+
+					str += "\tuint ocalc0 = ";
+					str += "ocalc1"; str += "%"; str += SztToStr(params.fft_N[1] * params.fft_N[2]);
+					str += ";\n";
+
+					str += "\t"; str += off; str += " = ";
+					str += "("; str += batch; str += "/"; str += SztToStr(params.fft_N[1] * params.fft_N[2] * params.fft_N[3]);
+					str += ")*"; str += SztToStr(pStride[4]); str += " + ";
+
+					str += "(ocalc1"; str += "/"; str += SztToStr(params.fft_N[1] * params.fft_N[2]); str += ")*";
+					str += SztToStr(pStride[3]); str += " + ";
+
+					str += "(ocalc0"; str += "/"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[2]); str += " + ";
+					str += "(ocalc0"; str += "%"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[1]); str += ";\n";
+
+					str += "\t}\n";
+				}
+				break;
+			case 4:
+				{
+					str += "\t{\n\tuint ocalc0 = ";
+					str += batch; str += "%"; str += SztToStr(params.fft_N[1] * params.fft_N[2]);
+					str += ";\n";
+
+					str += "\t"; str += off; str += " = ";
+					str += "("; str += batch; str += "/"; str += SztToStr(params.fft_N[1] * params.fft_N[2]); str += ")*";
+					str += SztToStr(pStride[3]); str += " + ";
+
+					str += "(ocalc0"; str += "/"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[2]); str += " + ";
+					str += "(ocalc0"; str += "%"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[1]); str += ";\n";
+
+					str += "\t}\n";
+				}
+				break;
+			case 3:
+				{
+					str += "\t"; str += off; str += " = ";
+					str += "("; str += batch; str += "/"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[2]); str += " + ";
+					str += "("; str += batch; str += "%"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[1]); str += ";\n";
+				}
+				break;
+			case 2:
+				{
+					str += "\t"; str += off; str += " = ";
+					str += batch; str += "*"; str += SztToStr(pStride[1]); str += ";\n";
+				}
+				break;
+			default:
+				assert(false);
+			}
+
+			return str;
+		}
+
+    public:
+        CopyKernel( const FFTKernelGenKeyParams &paramsVal) :
+					params(paramsVal)
+
+        {
+			N = params.fft_N[0];
+			Nt = 1 + N/2;
+
+			h2c = (	(params.fft_inputLayout == CLFFT_HERMITIAN_PLANAR) ||
+					(params.fft_inputLayout == CLFFT_HERMITIAN_INTERLEAVED) ) ? true : false;
+			c2h = (	(params.fft_outputLayout == CLFFT_HERMITIAN_PLANAR) ||
+					(params.fft_outputLayout == CLFFT_HERMITIAN_INTERLEAVED) ) ? true : false;
+
+			// We only do out-of-place copies at this point
+			assert(params.fft_placeness == CLFFT_OUTOFPLACE);
+		}
+
+        void GenerateKernel(std::string &str)
+		{
+			std::string rType  = RegBaseType<PR>(1);
+			std::string r2Type  = RegBaseType<PR>(2);
+
+			bool inIlvd; // Input is interleaved format
+			bool outIlvd; // Output is interleaved format
+			inIlvd  = (	(params.fft_inputLayout == CLFFT_COMPLEX_INTERLEAVED) ||
+						(params.fft_inputLayout == CLFFT_HERMITIAN_INTERLEAVED) ) ? true : false;
+			outIlvd = (	(params.fft_outputLayout == CLFFT_COMPLEX_INTERLEAVED) ||
+						(params.fft_outputLayout == CLFFT_HERMITIAN_INTERLEAVED) ) ? true : false;
+
+
+
+			// Pragma
+			str += ClPragma<PR>();
+
+			std::string sfx = FloatSuffix<PR>();
+
+			// Copy kernel begin
+			str += "__kernel void ";
+
+			// Function name
+			if(h2c)	str += "copy_h2c";
+			else	str += "copy_c2h";
+
+			str += "(";
+
+			if(inIlvd)
+			{
+				str += "__global const "; str += r2Type; str += " * restrict gbIn, ";
+			}
+			else
+			{
+				str += "__global const "; str += rType; str += " * restrict gbInRe, ";
+				str += "__global const "; str += rType; str += " * restrict gbInIm, ";
+			}
+
+			if(outIlvd)
+			{
+				str += "__global "; str += r2Type; str += " * restrict gbOut)\n";
+			}
+			else
+			{
+				str += "__global "; str += rType; str += " * restrict gbOutRe, ";
+				str += "__global "; str += rType; str += " * restrict gbOutIm)\n";
+			}
+
+
+			str += "{\n";
+
+			// Initialize
+			str += "\tuint me = get_global_id(0);\n\t";
+
+			// Declare memory pointers
+			str += "\n\t";
+			str += "uint iOffset;\n\t";
+			str += "uint oOffset;\n\t";
+
+			// input
+			if(inIlvd)
+			{
+				str += "__global "; str += r2Type; str += " *lwbIn;\n\t";
+			}
+			else
+			{
+				str += "__global "; str += rType; str += " *lwbInRe;\n\t";
+				str += "__global "; str += rType; str += " *lwbInIm;\n\t";
+			}
+
+			// output
+			if(outIlvd)
+			{
+					str += "__global "; str += r2Type; str += " *lwbOut;\n";
+				if(h2c)
+				{
+					str += "\t";
+					str += "__global "; str += r2Type; str += " *lwbOut2;\n\n";
+				}
+			}
+			else
+			{
+					str += "__global "; str += rType; str += " *lwbOutRe;\n\t";
+					str += "__global "; str += rType; str += " *lwbOutIm;\n";
+				if(h2c)
+				{
+					str += "\t";
+					str += "__global "; str += rType; str += " *lwbOutRe2;\n\t";
+					str += "__global "; str += rType; str += " *lwbOutIm2;\n\n";
+				}
+			}
+
+
+
+			// Setup registers
+			str += "\t"; str += RegBaseType<PR>(2); str += " R;\n\n";
+
+			// Setup variables
+			str += "\tuint batch, mel, mel2;\n\t";
+			str += "batch = me/"; str += SztToStr(Nt); str += ";\n\t";
+			str += "mel = me%"; str += SztToStr(Nt); str += ";\n\t";
+			str += "mel2 = ("; str += SztToStr(N); str += " - mel)%"; str += SztToStr(N); str += ";\n\n";
+
+
+			// Setup memory pointers
+			str += OffsetCalc("iOffset", true);
+			str += OffsetCalc("oOffset", false);
+
+			// offset strings
+			std::string inF, inF2, outF, outF2;
+			inF   = "(mel*";  inF   += SztToStr(params.fft_inStride[0]);  inF   += ")";
+			inF2  = "(mel2*"; inF2  += SztToStr(params.fft_inStride[0]);  inF2  += ")";
+			outF  = "(mel*";  outF  += SztToStr(params.fft_outStride[0]); outF  += ")";
+			outF2 = "(mel2*"; outF2 += SztToStr(params.fft_outStride[0]); outF2 += ")";
+
+			str += "\n\t";
+
+			// inputs
+			if(inIlvd)
+			{
+				str += "lwbIn = gbIn + iOffset + "; str += inF; str += ";\n\t";
+			}
+			else
+			{
+				str += "lwbInRe = gbInRe + iOffset + "; str += inF; str += ";\n\t";
+				str += "lwbInIm = gbInIm + iOffset + "; str += inF; str += ";\n\t";
+			}
+
+			// outputs
+			if(outIlvd)
+			{
+					str += "lwbOut = gbOut + oOffset + "; str += outF; str += ";\n";
+				if(h2c)
+				{
+					str += "\t";
+					str += "lwbOut2 = gbOut + oOffset + "; str += outF2; str += ";\n";
+				}
+			}
+			else
+			{
+					str += "lwbOutRe = gbOutRe + oOffset + "; str += outF; str += ";\n\t";
+					str += "lwbOutIm = gbOutIm + oOffset + "; str += outF; str += ";\n";
+				if(h2c)
+				{
+					str += "\t";
+					str += "lwbOutRe2 = gbOutRe + oOffset + "; str += outF2; str += ";\n\t";
+					str += "lwbOutIm2 = gbOutIm + oOffset + "; str += outF2; str += ";\n";
+				}
+			}
+
+			str += "\n\t";
+
+			// Do the copy
+			if(c2h)
+			{
+				if(inIlvd)
+				{
+					str += "R = lwbIn[0];\n\t";
+				}
+				else
+				{
+					str += "R.x = lwbInRe[0];\n\t";
+					str += "R.y = lwbInIm[0];\n\t";
+				}
+
+				if(outIlvd)
+				{
+					str += "lwbOut[0] = R;\n\n";
+				}
+				else
+				{
+					str += "lwbOutRe[0] = R.x;\n\t";
+					str += "lwbOutIm[0] = R.y;\n\t";
+				}
+			}
+			else
+			{
+				if(inIlvd)
+				{
+					str += "R = lwbIn[0];\n\t";
+				}
+				else
+				{
+					str += "R.x = lwbInRe[0];\n\t";
+					str += "R.y = lwbInIm[0];\n\t";
+				}
+
+				if(outIlvd)
+				{
+					str += "lwbOut[0] = R;\n\t";
+					str += "R.y = -R.y;\n\t";
+					str += "lwbOut2[0] = R;\n\n";
+				}
+				else
+				{
+					str += "lwbOutRe[0] = R.x;\n\t";
+					str += "lwbOutIm[0] = R.y;\n\t";
+					str += "R.y = -R.y;\n\t";
+					str += "lwbOutRe2[0] = R.x;\n\t";
+					str += "lwbOutIm2[0] = R.y;\n\n";
+				}
+			}
+
+			str += "}\n";
+		}
+    };
+};
+
+
+template<>
+clfftStatus FFTPlan::GetKernelGenKeyPvt<Copy> (FFTKernelGenKeyParams & params) const
+{
+
+    //    Query the devices in this context for their local memory sizes
+    //    How we generate a kernel depends on the *minimum* LDS size for all devices.
+    //
+    const FFTEnvelope * pEnvelope = NULL;
+    OPENCL_V(const_cast<FFTPlan*>(this)->GetEnvelope (& pEnvelope), _T("GetEnvelope failed"));
+    BUG_CHECK (NULL != pEnvelope);
+
+    ::memset( &params, 0, sizeof( params ) );
+    params.fft_precision    = this->precision;
+    params.fft_placeness    = this->placeness;
+    params.fft_inputLayout  = this->inputLayout;
+	params.fft_MaxWorkGroupSize = this->envelope.limit_WorkGroupSize;
+
+    ARG_CHECK (this->inStride.size() == this->outStride.size())
+
+    params.fft_outputLayout = this->outputLayout;
+
+    switch (this->inStride.size()) {
+        //    1-D array is a 2-D data structure.
+        //    1-D unit is a special case of 1-D array.
+    case 1:
+        ARG_CHECK(this->length   .size() > 0);
+        ARG_CHECK(this->outStride.size() > 0);
+        params.fft_DataDim      = 2;
+        params.fft_N[0]         = this->length[0];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->oDist;
+        break;
+
+        //    2-D array is a 3-D data structure
+        //    2-D unit is a speical case of 2-D array.
+    case 2:
+        ARG_CHECK(this->length   .size() > 1);
+        ARG_CHECK(this->outStride.size() > 1);
+        params.fft_DataDim      = 3;
+        params.fft_N[0]         = this->length[0];
+        params.fft_N[1]         = this->length[1];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->inStride[1];
+        params.fft_inStride[2]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->outStride[1];
+        params.fft_outStride[2] = this->oDist;
+        break;
+
+        //    3-D array is a 4-D data structure
+        //    3-D unit is a special case of 3-D array.
+    case 3:
+        ARG_CHECK(this->length   .size() > 2);
+        ARG_CHECK(this->outStride.size() > 2);
+        params.fft_DataDim      = 4;
+        params.fft_N[0]         = this->length[0];
+        params.fft_N[1]         = this->length[1];
+        params.fft_N[2]         = this->length[2];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->inStride[1];
+        params.fft_inStride[2]  = this->inStride[2];
+        params.fft_inStride[3]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->outStride[1];
+        params.fft_outStride[2] = this->outStride[2];
+        params.fft_outStride[3] = this->oDist;
+        break;
+
+    default:
+        ARG_CHECK (false);
+    }
+
+    params.fft_fwdScale  = this->forwardScale;
+    params.fft_backScale = this->backwardScale;
+
+    return CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GetWorkSizesPvt<Copy> (std::vector<size_t> & globalWS, std::vector<size_t> & localWS) const
+{
+    FFTKernelGenKeyParams fftParams;
+	OPENCL_V( this->GetKernelGenKeyPvt<Copy>( fftParams ), _T("GetKernelGenKey() failed!") );
+
+	size_t count = this->batchsize;
+	switch(fftParams.fft_DataDim)
+	{
+	case 5: assert(false);
+	case 4: count *= fftParams.fft_N[2];
+	case 3: count *= fftParams.fft_N[1];
+	case 2: count *= (1 + fftParams.fft_N[0]/2); break;
+	case 1: assert(false);
+	}
+
+	globalWS.push_back( count );
+    localWS.push_back( 64 );
+
+    return    CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GetMax1DLengthPvt<Copy> (size_t * longest) const
+{
+	return FFTPlan::GetMax1DLengthPvt<Stockham>(longest);
+}
+
+using namespace CopyGenerator;
+
+template<>
+clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo ) const
+{
+    FFTKernelGenKeyParams params;
+    OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
+
+	std::string programCode;
+	Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
+	switch(pr)
+	{
+	case P_SINGLE:
+		{
+			CopyKernel<P_SINGLE> kernel(params);
+			kernel.GenerateKernel(programCode);
+		} break;
+	case P_DOUBLE:
+		{
+			CopyKernel<P_DOUBLE> kernel(params);
+			kernel.GenerateKernel(programCode);
+		} break;
+	}
+
+
+    OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
+    OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
+
+    return CLFFT_SUCCESS;
+}
diff --git a/src/library/generator.h b/src/library/generator.h
new file mode 100644
index 00000000..2aac9836
--- /dev/null
+++ b/src/library/generator.h
@@ -0,0 +1,31 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( AMD_CLFFT_generator_H )
+#define AMD_CLFFT_generator_H
+
+//	Enum to help provide descriptive names to array indices, when indexing into our various vectors
+enum clfftGenerators
+{
+	Stockham, // Using the Stockham autosort frameworks
+	Transpose,
+	Copy,
+	ENDGENERATORS			///< This value will always be last, and marks the length of clfftGenerators
+};
+
+#endif
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
new file mode 100644
index 00000000..8a6f5a60
--- /dev/null
+++ b/src/library/generator.stockham.cpp
@@ -0,0 +1,3250 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include "stdafx.h"
+#include <math.h>
+#include "generator.stockham.h"
+#include <list>
+
+// FFT Stockham Autosort Method
+//
+//   Each pass does one digit reverse in essence. Hence by the time all passes are done, complete
+//   digit reversal is done and output FFT is in correct order. Intermediate FFTs are stored in natural order,
+//   which is not the case with basic Cooley-Tukey algorithm. Natural order in intermediate data makes it
+//   convenient for stitching together passes with different radices.
+//
+//  Basic FFT algorithm:
+//
+//        Pass loop
+//        {
+//            Outer loop
+//            {
+//                Inner loop
+//                {
+//                }
+//            }
+//        }
+//
+//  The sweeps of the outer and inner loop resemble matrix indexing, this matrix changes shape with every pass as noted below
+//
+//   FFT pass diagram (radix 2)
+//
+//                k            k+R                                    k
+//            * * * * * * * * * * * * * * * *                     * * * * * * * *
+//            *   |             |           *                     *   |         *
+//            *   |             |           *                     *   |         *
+//            *   |             |           * LS        -->       *   |         *
+//            *   |             |           *                     *   |         *
+//            *   |             |           *                     *   |         *
+//            * * * * * * * * * * * * * * * *                     *   |         *
+//                         RS                                     *   |         * L
+//                                                                *   |         *
+//                                                                *   |         *
+//                                                                *   |         *
+//                                                                *   |         *
+//                                                                *   |         *
+//                                                                *   |         *
+//                                                                *   |         *
+//                                                                * * * * * * * *
+//                                                                       R
+//
+//
+//    With every pass, the matrix doubles in height and halves in length
+//
+//
+//  N = 2^T = Length of FFT
+//  q = pass loop index
+//  k = outer loop index = (0 ... R-1)
+//  j = inner loop index = (0 ... LS-1)
+//
+//  Tables shows how values change as we go through the passes
+//
+//    q | LS   |  R   |  L  | RS
+//   ___|______|______|_____|___
+//    0 |  1   | N/2  |  2  | N
+//    1 |  2   | N/4  |  4  | N/2
+//    2 |  4   | N/8  |  8  | N/4
+//    . |  .   | .    |  .  | .
+//  T-1 |  N/2 | 1    |  N  | 2
+//
+//
+//   Data Read Order
+//     Radix 2: k*LS + j, (k+R)*LS + j
+//     Radix 3: k*LS + j, (k+R)*LS + j, (k+2R)*LS + j
+//     Radix 4: k*LS + j, (k+R)*LS + j, (k+2R)*LS + j, (k+3R)*LS + j
+//     Radix 5: k*LS + j, (k+R)*LS + j, (k+2R)*LS + j, (k+3R)*LS + j, (k+4R)*LS + j
+//
+//   Data Write Order
+//       Radix 2: k*L + j, k*L + j + LS
+//       Radix 3: k*L + j, k*L + j + LS, k*L + j + 2*LS
+//       Radix 4: k*L + j, k*L + j + LS, k*L + j + 2*LS, k*L + j + 3*LS
+//       Radix 5: k*L + j, k*L + j + LS, k*L + j + 2*LS, k*L + j + 3*LS, k*L + j + 4*LS
+//
+
+namespace StockhamGenerator
+{
+	// Experimnetal Start =========================================
+	// Kernel Generator Parameterization ==========================
+
+	// Uncomment this directive to activate parameter reads from file
+//#define PARMETERS_TO_BE_READ
+
+	// Parameters to read
+	struct ParamRead
+	{
+		size_t	workGroupSize;
+		size_t	numTransformsPerWg;
+		std::vector<size_t> radices;
+		bool	halfLds;
+	};
+
+	// File format
+
+	// WorkGroupSize:
+	// TransformsPerWorkGroup:
+	// Radices:
+	// LdsUse:
+
+	void ReadParameterFile(ParamRead &readParam)
+	{
+		const char *fileName = "parameters.txt";
+		std::ifstream file(fileName);
+
+		if(!file.is_open())
+		{
+			std::cout << "File: " << fileName << " could not be opened, exiting ...." << std::endl;
+			exit(-1);
+		}
+
+		std::string strWgs = "WorkGroupSize:";
+		std::string strNtw = "TransformsPerWorkGroup:";
+		std::string strRad = "Radices:";
+		std::string strLds = "LdsUse:";
+		std::string numbers = "0123456789";
+
+		std::string line;
+		while(std::getline(file, line))
+		{
+
+			size_t pos;
+
+			pos = line.find(strWgs);
+			if(pos != std::string::npos)
+			{
+				line.erase(pos, strWgs.length());
+				size_t numStart	= line.find_first_of(numbers);
+				size_t numEnd	= line.find_first_not_of(numbers, numStart);
+				std::string val = line.substr(numStart, numEnd-numStart);
+				readParam.workGroupSize = strtol(val.c_str(), NULL, 10);
+				continue;
+			}
+
+			pos = line.find(strNtw);
+			if(pos != std::string::npos)
+			{
+				line.erase(pos, strNtw.length());
+				size_t numStart	= line.find_first_of(numbers);
+				size_t numEnd	= line.find_first_not_of(numbers, numStart);
+				std::string val = line.substr(numStart, numEnd-numStart);
+				readParam.numTransformsPerWg = strtol(val.c_str(), NULL, 10);
+				continue;
+			}
+
+			pos = line.find(strRad);
+			if(pos != std::string::npos)
+			{
+				line.erase(pos, strRad.length());
+				while(std::string::npos != line.find_first_of(numbers))
+				{
+					size_t numStart	= line.find_first_of(numbers);
+					size_t numEnd	= line.find_first_not_of(numbers, numStart);
+					std::string val = line.substr(numStart, numEnd-numStart);
+					readParam.radices.push_back(strtol(val.c_str(), NULL, 10));
+					line.erase(0, numEnd);
+				}
+				continue;
+			}
+		}
+
+		//std::cout << std::endl;
+		//std::cout << "File Parameters" << std::endl;
+		//std::cout << strWgs << " " << readParam.workGroupSize << std::endl;
+		//std::cout << strNtw << " " << readParam.numTransformsPerWg << std::endl;
+		//std::cout << strRad << " "; for(size_t i=0; i<readParam.radices.size(); i++) std::cout << readParam.radices[i] << " "; std::cout << std::endl;
+		//std::cout << std::endl;
+	}
+
+	// Uncomment this directive to experiment with kernels read from file
+//#define KERNEL_INTERJECT
+
+	void ReadKernelFromFile(std::string &str)
+	{
+		const char *fileName = "fft_kernel.cl";
+		std::ifstream file(fileName);
+
+		if(!file.is_open())
+		{
+			std::cout << "File: " << fileName << " could not be opened, exiting ...." << std::endl;
+			exit(-1);
+		}
+
+		str.clear();
+
+		std::string line;
+		while(std::getline(file, line))
+		{
+			str += line;
+			str += '\n';
+		}
+	}
+
+	// Experimental End ===========================================
+
+#define RADIX_TABLE_COMMON 	{     2048,           256,             1,         4,     8, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{      512,            64,             1,         3,     8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{      256,            64,             1,         4,     4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{       64,            64,             4,         3,     4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{       32,            64,            16,         2,     8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{       16,            64,            16,         2,     4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{        4,            64,            32,         2,     2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },	\
+							{        2,            64,            64,         1,     2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+
+
+
+    template <Precision PR>
+	class KernelCoreSpecs
+	{
+		struct SpecRecord
+		{
+			size_t length;
+			size_t workGroupSize;
+			size_t numTransforms;
+			size_t numPasses;
+			size_t radices[12]; // Setting upper limit of number of passes to 12
+		};
+
+		typedef typename std::map<size_t, SpecRecord> SpecTable;
+		SpecTable specTable;
+
+	public:
+		KernelCoreSpecs()
+		{
+			switch(PR)
+			{
+			case P_SINGLE:
+				{
+					SpecRecord specRecord[] = {
+
+					RADIX_TABLE_COMMON
+
+					//  Length, WorkGroupSize, NumTransforms, NumPasses,  Radices
+					{     4096,           256,             1,         4,     8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0 },
+					{     1024,           128,             1,         4,     8, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 },
+					{      128,            64,             4,         3,     8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+					{        8,            64,            32,         2,     4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+
+					};
+
+					size_t tableLength = sizeof(specRecord)/sizeof(specRecord[0]);
+					for(size_t i=0; i<tableLength; i++) specTable[specRecord[i].length] = specRecord[i];
+
+				} break;
+
+			case P_DOUBLE:
+				{
+					SpecRecord specRecord[] = {
+
+					RADIX_TABLE_COMMON
+
+					//  Length, WorkGroupSize, NumTransforms, NumPasses,  Radices
+					{     1024,           128,             1,         4,     8, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 },
+					//{      128,            64,             1,         7,     2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0 },
+					{      128,            64,             4,         3,     8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+					{        8,            64,            16,         3,     2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+
+					};
+
+					size_t tableLength = sizeof(specRecord)/sizeof(specRecord[0]);
+					for(size_t i=0; i<tableLength; i++) specTable[specRecord[i].length] = specRecord[i];
+				} break;
+
+			default:
+				assert(false);
+			}
+		}
+
+		void GetRadices(size_t length, size_t &numPasses, const size_t * &pRadices) const
+		{
+			pRadices = NULL;
+			numPasses = 0;
+
+			typename SpecTable::const_iterator it = specTable.find(length);
+			if(it != specTable.end())
+			{
+				pRadices = it->second.radices;
+				numPasses = it->second.numPasses;
+			}
+		}
+
+		void GetWGSAndNT(size_t length, size_t &workGroupSize, size_t &numTransforms) const
+		{
+			workGroupSize = 0;
+			numTransforms = 0;
+
+			typename SpecTable::const_iterator it = specTable.find(length);
+			if(it != specTable.end())
+			{
+				workGroupSize = it->second.workGroupSize;
+				numTransforms = it->second.numTransforms;
+			}
+		}
+	};
+
+
+
+	// Given the length of 1d fft, this function determines the appropriate work group size
+	// and the number of transforms per work group
+	// TODO for optimizations - experiment with different possibilities for work group sizes and num transforms for improving performance
+	void DetermineSizes(const size_t &MAX_WGS, const size_t &length, size_t &workGroupSize, size_t &numTrans)
+	{
+		assert(MAX_WGS >= 64);
+
+		if(length == 1) // special case
+		{
+			workGroupSize = 64;
+			numTrans = 64;
+			return;
+		}
+
+		size_t baseRadix[] = {5,3,2}; // list only supported primes
+		size_t baseRadixSize = sizeof(baseRadix)/sizeof(baseRadix[0]);
+
+		size_t l = length;
+		std::map<size_t, size_t> primeFactors;
+		std::map<size_t, size_t> primeFactorsExpanded;
+		for(size_t r=0; r<baseRadixSize; r++)
+		{
+			size_t rad = baseRadix[r];
+			size_t p = 0;
+			size_t e = 1;
+			while(!(l%rad))
+			{
+				l /= rad;
+				e *= rad;
+				p++;
+			}
+
+			primeFactors[rad] = p;
+			primeFactorsExpanded[rad] = e;
+		}
+
+		assert(l == 1); // Makes sure the number is composed of only supported primes
+
+		if		(primeFactorsExpanded[2] == length)	// Length is pure power of 2
+		{
+			//if(length == 1024) { workGroupSize = 128;  numTrans = 1; }
+			if		(length >= 1024)	{ workGroupSize = (MAX_WGS >= 256) ? 256 : MAX_WGS; numTrans = 1; }
+			//else if (length == 512)		{ workGroupSize = (MAX_WGS >= 128) ? 128 : MAX_WGS; numTrans = 1; }
+			else if (length == 512)		{ workGroupSize = 64; numTrans = 1; }
+			else if	(length >= 16)		{ workGroupSize = 64;  numTrans = 256/length; }
+			else						{ workGroupSize = 64;  numTrans = 128/length; }
+		}
+		else if	(primeFactorsExpanded[3] == length) // Length is pure power of 3
+		{
+			workGroupSize = (MAX_WGS >= 256) ? 243 : 27;
+			if(length >= 3*workGroupSize)	numTrans = 1;
+			else							numTrans = (3*workGroupSize)/length;
+		}
+		else if	(primeFactorsExpanded[5] == length) // Length is pure power of 5
+		{
+			workGroupSize = (MAX_WGS >= 128) ? 125 : 25;
+			if(length >= 5*workGroupSize)	numTrans = 1;
+			else							numTrans = (5*workGroupSize)/length;
+		}
+		else
+		{
+			size_t leastNumPerWI; // least number of elements in one work item
+			size_t maxWorkGroupSize; // maximum work group size desired
+
+			if		(primeFactorsExpanded[2] * primeFactorsExpanded[3] == length) // Length is mix of 2&3 only
+			{
+				if(!(length%12))	{ leastNumPerWI = 12; maxWorkGroupSize = (MAX_WGS >= 128) ? 128 : MAX_WGS; }
+				else				{ leastNumPerWI = 6;  maxWorkGroupSize = (MAX_WGS >= 256) ? 256 : MAX_WGS; }
+			}
+			else if	(primeFactorsExpanded[2] * primeFactorsExpanded[5] == length) // Length is mix of 2&5 only
+			{
+				if(!(length%20))	{ leastNumPerWI = 20; maxWorkGroupSize = 64; }
+				else				{ leastNumPerWI = 10; maxWorkGroupSize = (MAX_WGS >= 128) ? 128 : MAX_WGS; }
+			}
+			else if (primeFactorsExpanded[3] * primeFactorsExpanded[5] == length) // Length is mix of 3&5 only
+			{
+				leastNumPerWI = 15;
+				maxWorkGroupSize = 64;
+			}
+			else
+			{
+				leastNumPerWI = 30;
+				maxWorkGroupSize = 64;
+			}
+
+
+			// Make sure the work group size does not exceed MAX_WGS
+			// for large problems sizes, this means doing more work per work-item
+			size_t lnpi;
+			size_t ft = 1;
+			while(1)
+			{
+				lnpi = leastNumPerWI * ft++;
+				if(length%lnpi) continue;
+
+				if( (length/lnpi) <= MAX_WGS )
+				{
+					leastNumPerWI = lnpi;
+					break;
+				}
+			}
+
+			numTrans = 1;
+			size_t n=1;
+			while( ((n*length)/leastNumPerWI) <= maxWorkGroupSize )
+			{
+				numTrans = n;
+				n++;
+			}
+
+			workGroupSize = (numTrans*length)/leastNumPerWI;
+			assert(workGroupSize <= MAX_WGS);
+		}
+	}
+
+	// Twiddle factors table
+    class TwiddleTable
+    {
+        size_t N; // length
+		double *wc, *ws; // cosine, sine arrays
+
+	public:
+		TwiddleTable(size_t length) : N(length)
+		{
+			// Allocate memory for the tables
+			// We compute twiddle factors in double precision for both P_SINGLE and P_DOUBLE
+			wc = new double[N];
+			ws = new double[N];
+		}
+
+		~TwiddleTable()
+		{
+			// Free
+			delete[] wc;
+			delete[] ws;
+		}
+
+		template <Precision PR>
+		void GenerateTwiddleTable(const std::vector<size_t> &radices, std::string &twStr)
+		{
+			const double TWO_PI = -6.283185307179586476925286766559;
+
+			// Make sure the radices vector sums up to N
+			size_t sz = 1;
+			for(std::vector<size_t>::const_iterator i = radices.begin();
+				i != radices.end(); i++)
+			{
+				sz *= (*i);
+			}
+			assert(sz == N);
+
+			// Generate the table
+			size_t L = 1;
+			size_t nt = 0;
+			for(std::vector<size_t>::const_iterator i = radices.begin();
+				i != radices.end(); i++)
+			{
+				size_t radix = *i;
+
+				L *= radix;
+
+				// Twiddle factors
+				for(size_t k=0; k<(L/radix); k++)
+				{
+					double theta = TWO_PI * ((double)k)/((double)L);
+
+					for(size_t j=1; j<radix; j++)
+					{
+						double c = cos(((double)j) * theta);
+						double s = sin(((double)j) * theta);
+
+						//if (fabs(c) < 1.0E-12)	c = 0.0;
+						//if (fabs(s) < 1.0E-12)	s = 0.0;
+
+						wc[nt]   = c;
+						ws[nt++] = s;
+					}
+				}
+			}
+
+			std::string sfx = FloatSuffix<PR>();
+
+			// Stringize the table
+			std::stringstream ss;
+			for(size_t i = 0; i < (N-1); i++)
+			{
+				ss << "("; ss << RegBaseType<PR>(2); ss << ")(";
+
+				char cv[64], sv[64];
+				sprintf(cv, "%036.34lf", wc[i]);
+				sprintf(sv, "%036.34lf", ws[i]);
+				ss << cv; ss << sfx; ss << ", ";
+				ss << sv; ss << sfx; ss << "),\n";
+			}
+			twStr += ss.str();
+		}
+    };
+
+
+	// Twiddle factors table for large N
+	// used in 3-step algorithm
+    class TwiddleTableLarge
+    {
+        size_t N; // length
+		size_t X, Y;
+		size_t tableSize;
+		double *wc, *ws; // cosine, sine arrays
+
+	public:
+		TwiddleTableLarge(size_t length) : N(length)
+		{
+			X = size_t(1) << ARBITRARY::TWIDDLE_DEE;
+			Y = DivRoundingUp<size_t> (CeilPo2(N), ARBITRARY::TWIDDLE_DEE);
+			tableSize = X * Y;
+
+			// Allocate memory for the tables
+			wc = new double[tableSize];
+			ws = new double[tableSize];
+		}
+
+		~TwiddleTableLarge()
+		{
+			// Free
+			delete[] wc;
+			delete[] ws;
+		}
+
+		template <Precision PR>
+		void GenerateTwiddleTable(std::string &twStr)
+		{
+			const double TWO_PI = -6.283185307179586476925286766559;
+
+			// Generate the table
+			size_t nt = 0;
+			double phi = TWO_PI / double (N);
+			for (size_t iY = 0; iY < Y; ++iY)
+			{
+				size_t i = size_t(1) << (iY * ARBITRARY::TWIDDLE_DEE);
+				for (size_t iX = 0; iX < X; ++iX)
+				{
+					size_t j = i * iX;
+
+					double c = cos(phi * (double)j);
+					double s = sin(phi * (double)j);
+
+					//if (fabs(c) < 1.0E-12)	c = 0.0;
+					//if (fabs(s) < 1.0E-12)	s = 0.0;
+
+					wc[nt]   = c;
+					ws[nt++] = s;
+				}
+			}
+
+			std::string sfx = FloatSuffix<PR>();
+
+			// Stringize the table
+			std::stringstream ss;
+			nt = 0;
+
+			ss << "\n __constant ";
+			ss << RegBaseType<PR>(2);
+			ss << " " << TwTableLargeName();
+			ss << "[" << Y << "][" << X << "] = {\n";
+			for (size_t iY = 0; iY < Y; ++iY)
+			{
+				ss << "{ ";
+				for (size_t iX = 0; iX < X; ++iX)
+				{
+					char cv[64], sv[64];
+					sprintf(cv, "%036.34lf", wc[nt]);
+					sprintf(sv, "%036.34lf", ws[nt++]);
+					ss << "("; ss << RegBaseType<PR>(2); ss << ")(";
+					ss << cv; ss << sfx; ss << ", ";
+					ss << sv; ss << sfx; ss << ")";
+					ss << ", ";
+				}
+				ss << " },\n";
+			}
+			ss << "};\n\n";
+
+
+			// Twiddle calc function
+			ss << "__attribute__((always_inline)) ";
+			ss << RegBaseType<PR>(2);
+			ss << "\n" << TwTableLargeFunc() << "(uint u)\n{\n";
+
+			ss << "\t" "uint j = u & " << unsigned(X-1) << ";\n";
+			ss << "\t" ; ss << RegBaseType<PR>(2); ss << " result = ";
+			ss << TwTableLargeName();
+			ss << "[0][j];\n";
+
+			for (size_t iY = 1; iY < Y; ++iY)
+			{
+				std::string phasor = TwTableLargeName();
+				phasor += "[";
+				phasor += SztToStr(iY);
+				phasor += "][j]";
+
+				stringpair product = ComplexMul((RegBaseType<PR>(2)).c_str(), "result", phasor.c_str());
+
+				ss << "\t" "u >>= " << unsigned (ARBITRARY::TWIDDLE_DEE) << ";\n";
+				ss << "\t" "j = u & " << unsigned(X-1) << ";\n";
+				ss << "\t" "result = " << product.first << "\n";
+				ss << "\t" "\t" << product.second <<";\n";
+			}
+			ss << "\t" "return result;\n}\n\n";
+
+			twStr += ss.str();
+		}
+    };
+
+    // A pass inside an FFT kernel
+    template <Precision PR>
+    class Pass
+    {
+		size_t position;					// Position in the kernel
+
+		size_t algL;						// 'L' value from fft algorithm
+		size_t algLS;						// 'LS' value
+		size_t algR;						// 'R' value
+
+		size_t length;						// Length of FFT
+        size_t radix;						// Base radix
+		size_t cnPerWI;						// Complex numbers per work-item
+
+		size_t workGroupSize;				// size of the workgroup = (length / cnPerWI)
+											// this number is essentially number of work-items needed to compute 1 transform
+											// this number will be different from the kernel class workGroupSize if there
+											// are multiple transforms per workgroup
+
+		size_t numButterfly;				// Number of basic FFT butterflies = (cnPerWI / radix)
+		size_t numB1, numB2, numB4;			// number of different types of butterflies
+
+		bool r2c;							// real to complex transform
+		bool c2r;							// complex to real transform
+		bool rcFull;
+		bool rcSimple;
+
+		bool enableGrouping;
+		bool linearRegs;
+		Pass<PR> *nextPass;
+
+		inline void RegBase(size_t regC, std::string &str) const
+		{
+			str += "B";
+			str += SztToStr(regC);
+		}
+
+		inline void RegBaseAndCount(size_t num, std::string &str) const
+		{
+			str += "C";
+			str += SztToStr(num);
+		}
+
+		inline void RegBaseAndCountAndPos(const std::string &RealImag, size_t radPos, std::string &str) const
+		{
+			str += RealImag;
+			str += SztToStr(radPos);
+		}
+
+		void RegIndex(size_t regC, size_t num, const std::string &RealImag, size_t radPos, std::string &str) const
+		{
+			RegBase(regC, str);
+			RegBaseAndCount(num, str);
+			RegBaseAndCountAndPos(RealImag, radPos, str);
+		}
+
+		void DeclareRegs(const std::string &regType, size_t regC, size_t numB, std::string &passStr) const
+		{
+			std::string regBase;
+			RegBase(regC, regBase);
+
+			if(linearRegs)
+			{
+				assert(regC == 1);
+				assert(numB == numButterfly);
+			}
+
+			for(size_t i=0; i<numB; i++)
+			{
+				passStr += "\n\t";
+				passStr += regType;
+				passStr += " ";
+
+				std::string regBaseCount = regBase;
+				RegBaseAndCount(i, regBaseCount);
+
+				for(size_t r=0; ; r++)
+				{
+					if(linearRegs)
+					{
+						std::string regIndex = "R";
+						RegBaseAndCountAndPos("", i*radix + r, regIndex);
+
+						passStr += regIndex;
+					}
+					else
+					{
+						std::string regRealIndex(regBaseCount), regImagIndex(regBaseCount);
+
+						RegBaseAndCountAndPos("R", r, regRealIndex); // real
+						RegBaseAndCountAndPos("I", r, regImagIndex); // imaginary
+
+						passStr += regRealIndex; passStr += ", ";
+						passStr += regImagIndex;
+					}
+
+					if(r == radix-1)
+					{
+						passStr += ";";
+						break;
+					}
+					else
+					{
+						passStr += ", ";
+					}
+				}
+			}
+		}
+
+		inline std::string IterRegArgs() const
+		{
+			std::string str = "";
+
+			if(linearRegs)
+			{
+				std::string regType = RegBaseType<PR>(2);
+
+				for(size_t i=0; i<cnPerWI; i++)
+				{
+					if(i != 0) str += ", ";
+					str += regType; str += " *R";
+					str += SztToStr(i);
+				}
+			}
+
+			return str;
+		}
+
+#define SR_READ			1
+#define SR_TWMUL		2
+#define SR_TWMUL_3STEP	3
+#define SR_WRITE		4
+
+#define SR_COMP_REAL 0 // real
+#define SR_COMP_IMAG 1 // imag
+#define SR_COMP_BOTH 2 // real & imag
+
+		// SweepRegs is to iterate through the registers to do the three basic operations:
+		// reading, twiddle multiplication, writing
+		void SweepRegs(	size_t flag, bool fwd, bool interleaved, size_t stride, size_t component,
+						double scale,
+						const std::string &bufferRe, const std::string &bufferIm, const std::string &offset,
+						size_t regC, size_t numB, size_t numPrev, std::string &passStr) const
+		{
+			assert( (flag == SR_READ )			||
+					(flag == SR_TWMUL)			||
+					(flag == SR_TWMUL_3STEP)	||
+					(flag == SR_WRITE) );
+
+			const std::string twTable = TwTableName();
+			const std::string tw3StepFunc = TwTableLargeFunc();
+
+			// component: 0 - real, 1 - imaginary, 2 - both
+			size_t cStart, cEnd;
+			switch(component)
+			{
+			case SR_COMP_REAL:	cStart = 0; cEnd = 1; break;
+			case SR_COMP_IMAG:	cStart = 1; cEnd = 2; break;
+			case SR_COMP_BOTH:	cStart = 0; cEnd = 2; break;
+			default:	assert(false);
+			}
+
+			// Read/Write logic:
+			// The double loop inside pass loop of FFT algorithm is mapped into the
+			// workGroupSize work items with each work item handling cnPerWI numbers
+
+			// Read logic:
+			// Reads for any pass appear the same with the stockham algorithm when mapped to
+			// the work items. The buffer is divided into (L/radix) sized blocks and the
+			// values are read in linear order inside each block.
+
+			// Vector reads are possible if we have unit strides
+			// since read pattern remains the same for all passes and they are contiguous
+			// Writes are not contiguous
+
+			// TODO : twiddle multiplies can be combined with read
+			// TODO : twiddle factors can be reordered in the table to do vector reads of them
+
+			// Write logic:
+			// outer loop index k and the inner loop index j map to 'me' as follows:
+			// In one work-item (1 'me'), there are 'numButterfly' fft butterflies. They
+			// are indexed as numButterfly*me + butterflyIndex, where butterflyIndex's range is
+			// 0 ... numButterfly-1. The total number of butterflies needed is covered over all
+			// the work-items. So essentially the double loop k,j is flattened to fit this linearly
+			// increasing 'me'.
+			// j = (numButterfly*me + butterflyIndex)%LS
+			// k = (numButterfly*me + butterflyIndex)/LS
+
+
+			std::string twType = RegBaseType<PR>(2);
+			std::string rType  = RegBaseType<PR>(1);
+
+			size_t butterflyIndex = numPrev;
+
+			std::string regBase;
+			RegBase(regC, regBase);
+
+			// special write back to global memory with float4 grouping, writing 2 complex numbers at once
+			if( numB && (numB%2 == 0) && (regC == 1) && (stride == 1) && (numButterfly%2 == 0) && (algLS%2 == 0) && (flag == SR_WRITE) &&
+				(nextPass == NULL) && interleaved && (component == SR_COMP_BOTH) && linearRegs && enableGrouping )
+			{
+				assert((numButterfly * workGroupSize) == algLS);
+				assert(bufferRe.compare(bufferIm) == 0); // Make sure Real & Imag buffer strings are same for interleaved data
+
+				passStr += "\n\t";
+				passStr += "__global "; passStr += RegBaseType<PR>(4);
+				passStr += " *buff4g = "; passStr += bufferRe; passStr += ";\n\t"; // Assuming 'outOffset' is 0, so not adding it here
+
+				for(size_t r=0; r<radix; r++) // setting the radix loop outside to facilitate grouped writing
+				{
+					butterflyIndex = numPrev;
+
+					for(size_t i=0; i<(numB/2); i++)
+					{
+						std::string regIndexA = "(*R";
+						std::string regIndexB = "(*R";
+
+						RegBaseAndCountAndPos("", (2*i + 0)*radix + r, regIndexA); regIndexA += ")";
+						RegBaseAndCountAndPos("", (2*i + 1)*radix + r, regIndexB); regIndexB += ")";
+
+						passStr += "\n\t";
+						passStr += "buff4g"; passStr += "[ ";
+						passStr += SztToStr(numButterfly/2); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+						passStr += " + ";
+						passStr += SztToStr(r*(algLS/2)); passStr += " ]";
+						passStr += " = "; passStr += "("; passStr += RegBaseType<PR>(4); passStr += ")(";
+						passStr += regIndexA; passStr += ".x, ";
+						passStr += regIndexA; passStr += ".y, ";
+						passStr += regIndexB; passStr += ".x, ";
+						passStr += regIndexB; passStr += ".y) ";
+						if(scale != 1.0f) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); }
+						passStr += ";";
+
+						butterflyIndex++;
+					}
+				}
+
+				return;
+			}
+
+			for(size_t i=0; i<numB; i++)
+			{
+				std::string regBaseCount = regBase;
+				RegBaseAndCount(i, regBaseCount);
+
+				if(flag == SR_READ) // read operation
+				{
+					// the 'r' (radix index) loop is placed outer to the
+					// 'v' (vector index) loop to make possible vectorized reads
+
+					for(size_t r=0; r<radix; r++)
+					{
+						for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
+						{
+							std::string tail;
+							std::string regIndex;
+							regIndex = linearRegs ? "(*R" : regBaseCount;
+							std::string buffer;
+
+							// Read real & imag at once
+							if(interleaved && (component == SR_COMP_BOTH) && linearRegs)
+							{
+								assert(bufferRe.compare(bufferIm) == 0); // Make sure Real & Imag buffer strings are same for interleaved data
+								buffer = bufferRe;
+								RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ")";
+								tail = ";";
+							}
+							else
+							{
+								if(c == 0)
+								{
+									if(linearRegs) { RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").x"; }
+									else		   { RegBaseAndCountAndPos("R", r, regIndex); }
+									buffer = bufferRe;
+									tail = interleaved ? ".x;" : ";";
+								}
+								else
+								{
+									if(linearRegs) { RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").y"; }
+									else		   { RegBaseAndCountAndPos("I", r, regIndex); }
+									buffer = bufferIm;
+									tail = interleaved ? ".y;" : ";";
+								}
+							}
+
+							for(size_t v=0; v<regC; v++) // TODO: vectorize the reads; instead of reading individually for consecutive reads of vector elements
+							{
+								std::string regIndexSub(regIndex);
+								if(regC != 1)
+								{
+									regIndexSub += ".s";
+									regIndexSub += SztToStr(v);
+								}
+
+								passStr += "\n\t";
+								passStr += regIndexSub;
+								passStr += " = "; passStr += buffer;
+								passStr += "["; passStr += offset; passStr += " + ( "; passStr += SztToStr(numPrev); passStr += " + ";
+								passStr += "me*"; passStr += SztToStr(numButterfly); passStr += " + ";
+								passStr += SztToStr(i*regC + v); passStr += " + ";
+								passStr += SztToStr(r*length/radix); passStr += " )*";
+								passStr += SztToStr(stride); passStr += "]"; passStr += tail;
+							}
+
+							// Since we read real & imag at once, we break the loop
+							if(interleaved && (component == SR_COMP_BOTH) && linearRegs)
+								break;
+						}
+					}
+				}
+				else if( (flag == SR_TWMUL) || (flag == SR_TWMUL_3STEP) ) // twiddle multiplies and writes require that 'r' loop be innermost
+				{
+					for(size_t v=0; v<regC; v++)
+					{
+						for(size_t r=0; r<radix; r++)
+						{
+
+							std::string regRealIndex, regImagIndex;
+							regRealIndex = linearRegs ? "(*R" : regBaseCount;
+							regImagIndex = linearRegs ? "(*R" : regBaseCount;
+
+							if(linearRegs)
+							{
+								RegBaseAndCountAndPos("", i*radix + r, regRealIndex); regRealIndex += ").x";
+								RegBaseAndCountAndPos("", i*radix + r, regImagIndex); regImagIndex += ").y";
+							}
+							else
+							{
+								RegBaseAndCountAndPos("R", r, regRealIndex);
+								RegBaseAndCountAndPos("I", r, regImagIndex);
+							}
+
+							if(regC != 1)
+							{
+								regRealIndex += ".s"; regRealIndex += SztToStr(v);
+								regImagIndex += ".s"; regImagIndex += SztToStr(v);
+							}
+
+
+							if(flag == SR_TWMUL) // twiddle multiply operation
+							{
+								if(r == 0) // no twiddle muls needed
+									continue;
+
+								passStr += "\n\t{\n\t\t"; passStr += twType; passStr += " W = ";
+								passStr += twTable; passStr += "["; passStr += SztToStr(algLS-1); passStr += " + ";
+								passStr += SztToStr(radix-1); passStr += "*(("; passStr += SztToStr(numButterfly);
+								passStr += "*me + "; passStr += SztToStr(butterflyIndex); passStr += ")%";
+								passStr += SztToStr(algLS); passStr += ") + "; passStr += SztToStr(r-1);
+								passStr += "];\n\t\t";
+							}
+							else	// 3-step twiddle
+							{
+								passStr += "\n\t{\n\t\t"; passStr += twType; passStr += " W = ";
+								passStr += tw3StepFunc; passStr += "( ";
+								passStr += "(("; passStr += SztToStr(numButterfly); passStr += "*me + ";
+								passStr += SztToStr(butterflyIndex);
+								passStr += ")%"; passStr += SztToStr(algLS); passStr += " + ";
+								passStr += SztToStr(r*algLS); passStr += ") * b "; passStr += ");\n\t\t";
+							}
+
+							passStr += rType; passStr += " TR, TI;\n\t\t";
+							if(fwd)
+							{
+								passStr += "TR = (W.x * "; passStr += regRealIndex; passStr += ") - (W.y * ";
+								passStr += regImagIndex; passStr += ");\n\t\t";
+								passStr += "TI = (W.y * "; passStr += regRealIndex; passStr += ") + (W.x * ";
+								passStr += regImagIndex; passStr += ");\n\t\t";
+							}
+							else
+							{
+								passStr += "TR =  (W.x * "; passStr += regRealIndex; passStr += ") + (W.y * ";
+								passStr += regImagIndex; passStr += ");\n\t\t";
+								passStr += "TI = -(W.y * "; passStr += regRealIndex; passStr += ") + (W.x * ";
+								passStr += regImagIndex; passStr += ");\n\t\t";
+							}
+
+							passStr += regRealIndex; passStr += " = TR;\n\t\t";
+							passStr += regImagIndex; passStr += " = TI;\n\t}\n";
+
+						}
+
+						butterflyIndex++;
+					}
+				}
+				else // write operation
+				{
+					for(size_t v=0; v<regC; v++)
+					{
+						for(size_t r=0; r<radix; r++)
+						{
+							for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
+							{
+								std::string tail;
+								std::string regIndex;
+								regIndex = linearRegs ? "(*R" : regBaseCount;
+								std::string buffer;
+
+								// Write real & imag at once
+								if(interleaved && (component == SR_COMP_BOTH) && linearRegs)
+								{
+									assert(bufferRe.compare(bufferIm) == 0); // Make sure Real & Imag buffer strings are same for interleaved data
+									buffer = bufferRe;
+									RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ")";
+									tail = "";
+								}
+								else
+								{
+									if(c == 0)
+									{
+										if(linearRegs) { RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").x"; }
+										else		   { RegBaseAndCountAndPos("R", r, regIndex); }
+										buffer = bufferRe;
+										tail = interleaved ? ".x" : "";
+									}
+									else
+									{
+										if(linearRegs) { RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").y"; }
+										else		   { RegBaseAndCountAndPos("I", r, regIndex); }
+										buffer = bufferIm;
+										tail = interleaved ? ".y" : "";
+									}
+								}
+
+								if(regC != 1)
+								{
+									regIndex += ".s";
+									regIndex += SztToStr(v);
+								}
+
+								passStr += "\n\t";
+								passStr += buffer; passStr += "["; passStr += offset; passStr += " + ( ";
+
+								if( (numButterfly * workGroupSize) > algLS )
+								{
+									passStr += "(("; passStr += SztToStr(numButterfly);
+									passStr += "*me + "; passStr += SztToStr(butterflyIndex); passStr += ")/";
+									passStr += SztToStr(algLS); passStr += ")*"; passStr += SztToStr(algL); passStr += " + (";
+									passStr += SztToStr(numButterfly); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+									passStr += ")%"; passStr += SztToStr(algLS); passStr += " + ";
+								}
+								else
+								{
+									passStr += SztToStr(numButterfly); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+									passStr += " + ";
+								}
+
+								passStr += SztToStr(r*algLS); passStr += " )*"; passStr += SztToStr(stride); passStr += "]";
+								passStr += tail; passStr += " = "; passStr += regIndex;
+								if(scale != 1.0f) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); }
+								passStr += ";";
+
+								// Since we write real & imag at once, we break the loop
+								if(interleaved && (component == SR_COMP_BOTH) && linearRegs)
+									break;
+							}
+						}
+
+						butterflyIndex++;
+					}
+
+				}
+			}
+
+			assert(butterflyIndex <= numButterfly);
+		}
+
+
+		// Special SweepRegs function to carry out some R-C/C-R specific operations
+		void SweepRegsRC(	size_t flag, bool fwd, bool interleaved, size_t stride, size_t component,
+							double scale, bool setZero, bool batch2, bool oddt,
+							const std::string &bufferRe, const std::string &bufferIm, const std::string &offset,
+							std::string &passStr) const
+		{
+			assert( (flag == SR_READ ) ||
+					(flag == SR_WRITE) );
+
+
+			// component: 0 - real, 1 - imaginary, 2 - both
+			size_t cStart, cEnd;
+			switch(component)
+			{
+			case SR_COMP_REAL:	cStart = 0; cEnd = 1; break;
+			case SR_COMP_IMAG:	cStart = 1; cEnd = 2; break;
+			case SR_COMP_BOTH:	cStart = 0; cEnd = 2; break;
+			default:	assert(false);
+			}
+
+			std::string rType  = RegBaseType<PR>(1);
+
+			assert(r2c || c2r);
+			assert(linearRegs);
+			bool singlePass = ((position == 0) && (nextPass == NULL));
+
+			size_t numCR = numButterfly * radix;
+			if(!(numCR%2)) assert(!oddt);
+
+			size_t rStart = 0;
+			size_t rEnd = numCR;
+
+			bool oddp = ((numCR%2) && (numCR > 1) && !setZero);
+			if(oddp)
+			{
+				if(oddt)	{ rStart = numCR-1; rEnd = numCR+1; }
+				else		{ rStart = 0;		rEnd = numCR-1; }
+			}
+
+			if(!oddp) assert(!oddt);
+
+			for(size_t r=rStart; r<rEnd; r++)
+			{
+				for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
+				{
+					if(flag == SR_READ) // read operation
+					{
+						std::string tail, tail2;
+						std::string regIndex = "(*R";
+						std::string buffer;
+
+						if(c == 0)
+						{
+							RegBaseAndCountAndPos("", r, regIndex); regIndex += ").x";
+							buffer = bufferRe;
+							tail  = interleaved ? ".x;" : ";";
+							tail2 = interleaved ? ".y;" : ";";
+						}
+						else
+						{
+							RegBaseAndCountAndPos("", r, regIndex); regIndex += ").y";
+							buffer = bufferIm;
+							tail  = interleaved ? ".y;" : ";";
+							tail2 = interleaved ? ".x;" : ";";
+						}
+
+
+						size_t bid = numCR/2;
+						bid = bid ? bid : 1;
+						size_t cid, lid;
+
+						if(oddt)
+						{
+							cid = r%2;
+							lid = 1 + (numCR/2);
+						}
+						else
+						{
+							cid = r/bid;
+							lid = 1 + r%bid;
+						}
+
+						std::string oddpadd = oddp ? " (me/2) + " : " ";
+
+						std::string idxStr, idxStrRev;
+						idxStr += SztToStr(bid); idxStr += "*me +"; idxStr += oddpadd; idxStr += SztToStr(lid);
+						idxStrRev += SztToStr(length); idxStrRev += " - ("; idxStrRev += idxStr; idxStrRev += " )";
+
+						bool act = ( fwd || ((cid == 0) && (!batch2)) || ((cid != 0) && batch2) );
+						if(act)
+						{
+							passStr += "\n\t";
+							passStr += regIndex;
+							passStr += " = ";
+						}
+
+						if(setZero)
+						{
+							if(act) passStr += "0;";
+						}
+						else
+						{
+							if(act)
+							{
+								passStr += buffer;
+								passStr += "["; passStr += offset; passStr += " + ( ";
+							}
+
+							if(fwd)
+							{
+								if(cid == 0)	passStr += idxStr;
+								else			passStr += idxStrRev;
+							}
+							else
+							{
+								if(cid == 0)	{ if(!batch2) passStr += idxStr; }
+								else			{ if(batch2)  passStr += idxStr; }
+							}
+
+							if(act)
+							{
+								passStr += " )*"; passStr += SztToStr(stride); passStr += "]";
+
+								if(fwd) { passStr += tail; }
+								else	{ if(!batch2) passStr += tail; else passStr += tail2; }
+							}
+						}
+					}
+					else // write operation
+					{
+						std::string tail;
+						std::string regIndex = "(*R";
+						std::string regIndexPair = "(*R";
+						std::string buffer;
+
+						// Write real & imag at once
+						if(interleaved && (component == SR_COMP_BOTH))
+						{
+							assert(bufferRe.compare(bufferIm) == 0); // Make sure Real & Imag buffer strings are same for interleaved data
+							buffer = bufferRe;
+						}
+						else
+						{
+							if(c == 0)
+							{
+								buffer = bufferRe;
+								tail = interleaved ? ".x" : "";
+							}
+							else
+							{
+								buffer = bufferIm;
+								tail = interleaved ? ".y" : "";
+							}
+						}
+
+
+						size_t bid, cid, lid;
+						if(singlePass && fwd)
+						{
+							bid = 1 + radix/2;
+							lid = r;
+							cid = r/bid;
+
+							RegBaseAndCountAndPos("", r, regIndex); regIndex += ")";
+							RegBaseAndCountAndPos("", (radix - r)%radix , regIndexPair); regIndexPair += ")";
+						}
+						else
+						{
+							bid = numCR/2;
+
+							if(oddt)
+							{
+								cid = r%2;
+								lid = 1 + (numCR/2);
+
+								RegBaseAndCountAndPos("", r, regIndex); regIndex += ")";
+								RegBaseAndCountAndPos("", r + 1, regIndexPair); regIndexPair += ")";
+							}
+							else
+							{
+								cid = r/bid;
+								lid = 1 + r%bid;
+
+								RegBaseAndCountAndPos("", r, regIndex); regIndex += ")";
+								RegBaseAndCountAndPos("", r + bid, regIndexPair); regIndexPair += ")";
+							}
+						}
+
+
+						if(!cid)
+						{
+							std::string oddpadd = oddp ? " (me/2) + " : " ";
+
+							std::string sclStr = "";
+							if(scale != 1.0f) { sclStr += " * "; sclStr += FloatToStr(scale); sclStr += FloatSuffix<PR>(); }
+
+							if(fwd)
+							{
+								std::string idxStr, idxStrRev;
+								idxStr += SztToStr(length/(2*workGroupSize)); idxStr += "*me +"; idxStr += oddpadd; idxStr += SztToStr(lid);
+								idxStrRev += SztToStr(length); idxStrRev += " - ("; idxStrRev += idxStr; idxStrRev += " )";
+
+								std::string val1Str, val2Str;
+
+								val1Str += "\n\t";
+								val1Str += buffer; val1Str += "["; val1Str += offset; val1Str += " + ( ";
+								val1Str += idxStr; val1Str += " )*"; val1Str += SztToStr(stride); val1Str += "]";
+								val1Str += tail; val1Str += " = ";
+
+								val2Str += "\n\t";
+								val2Str += buffer; val2Str += "["; val2Str += offset; val2Str += " + ( ";
+								val2Str += idxStrRev; val2Str += " )*"; val2Str += SztToStr(stride); val2Str += "]";
+								val2Str += tail; val2Str += " = ";
+
+								std::string real1, imag1, real2, imag2;
+
+								real1 +=  "("; real1 += regIndex; real1 += ".x + "; real1 += regIndexPair; real1 += ".x)*0.5";
+								imag1 +=  "("; imag1 += regIndex; imag1 += ".y - "; imag1 += regIndexPair; imag1 += ".y)*0.5";
+								real2 +=  "("; real2 += regIndex; real2 += ".y + "; real2 += regIndexPair; real2 += ".y)*0.5";
+								imag2 += "(-"; imag2 += regIndex; imag2 += ".x + "; imag2 += regIndexPair; imag2 += ".x)*0.5";
+
+								if(interleaved && (component == SR_COMP_BOTH))
+								{
+									val1Str += "("; val1Str += RegBaseType<PR>(2); val1Str += ")( ";
+									val2Str += "("; val2Str += RegBaseType<PR>(2); val2Str += ")( ";
+
+									if(!batch2) { val1Str += real1; val1Str += ", "; val1Str += "+"; val1Str += imag1;
+												  val2Str += real1; val2Str += ", "; val2Str += "-"; val2Str += imag1; }
+									else		{ val1Str += real2; val1Str += ", "; val1Str += "+"; val1Str += imag2;
+												  val2Str += real2; val2Str += ", "; val2Str += "-"; val2Str += imag2; }
+
+									val1Str += " )";
+									val2Str += " )";
+								}
+								else
+								{
+									val1Str += " (";
+									val2Str += " (";
+									if(c == 0)
+									{
+										if(!batch2) { val1Str += real1;
+													  val2Str += real1; }
+										else		{ val1Str += real2;
+													  val2Str += real2; }
+									}
+									else
+									{
+										if(!batch2) { val1Str += "+"; val1Str += imag1;
+													  val2Str += "-"; val2Str += imag1; }
+										else		{ val1Str += "+"; val1Str += imag2;
+													  val2Str += "-"; val2Str += imag2; }
+									}
+									val1Str += " )";
+									val2Str += " )";
+								}
+
+								val1Str += sclStr;
+								val2Str += sclStr;
+
+												passStr += val1Str; passStr += ";";
+								if(rcFull)	{	passStr += val2Str; passStr += ";"; }
+							}
+							else
+							{
+								std::string idxStr, idxStrRev;
+								idxStr += SztToStr(bid); idxStr += "*me +"; idxStr += oddpadd; idxStr += SztToStr(lid);
+								idxStrRev += SztToStr(length); idxStrRev += " - ("; idxStrRev += idxStr; idxStrRev += " )";
+
+								passStr += "\n\t";
+								passStr += buffer; passStr += "["; passStr += offset; passStr += " + ( ";
+
+								if(!batch2)	passStr += idxStr;
+								else		passStr += idxStrRev;
+
+								passStr += " )*"; passStr += SztToStr(stride); passStr += "]";
+								passStr += tail; passStr += " = ";
+
+								passStr += "( ";
+								if(c == 0)
+								{
+									regIndex += ".x"; regIndexPair += ".x";
+
+									if(!batch2)	{ passStr += regIndex; passStr += " - "; passStr += regIndexPair; }
+									else		{ passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
+								}
+								else
+								{
+									regIndex += ".y"; regIndexPair += ".y";
+
+									if(!batch2)	{					passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
+									else		{ passStr += " - "; passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
+								}
+								passStr += " )";
+								passStr += sclStr;
+								passStr += ";";
+							}
+
+
+
+							// Since we write real & imag at once, we break the loop
+							if(interleaved && (component == SR_COMP_BOTH))
+								break;
+						}
+					}
+				}
+			}
+
+		}
+
+
+		void CallButterfly(const std::string &bflyName, size_t regC, size_t numB, std::string &passStr) const
+		{
+			std::string regBase;
+			RegBase(regC, regBase);
+
+			for(size_t i=0; i<numB; i++)
+			{
+				std::string regBaseCount = regBase;
+				RegBaseAndCount(i, regBaseCount);
+
+				passStr += "\n\t";
+				passStr += bflyName;
+				passStr += "(";
+
+				for(size_t r=0; ; r++)
+				{
+					if(linearRegs)
+					{
+						std::string regIndex = "R";
+						RegBaseAndCountAndPos("", i*radix + r, regIndex);
+
+						passStr += regIndex;
+					}
+					else
+					{
+						std::string regRealIndex(regBaseCount);
+						std::string regImagIndex(regBaseCount);
+						RegBaseAndCountAndPos("R", r, regRealIndex);
+						RegBaseAndCountAndPos("I", r, regImagIndex);
+
+						passStr += "&"; passStr += regRealIndex; passStr += ", ";
+						passStr += "&"; passStr += regImagIndex;
+					}
+
+					if(r == radix-1)
+					{
+						passStr += ");";
+						break;
+					}
+					else
+					{
+						passStr += ", ";
+					}
+				}
+			}
+		}
+
+    public:
+		Pass(	size_t positionVal, size_t lengthVal, size_t radixVal, size_t cnPerWIVal,
+				size_t L, size_t LS, size_t R, bool linearRegsVal, bool r2cVal, bool c2rVal, bool rcFullVal, bool rcSimpleVal) :
+			position(positionVal), length(lengthVal), radix(radixVal), cnPerWI(cnPerWIVal),
+			algL(L), algLS(LS), algR(R), linearRegs(linearRegsVal),
+			r2c(r2cVal), c2r(c2rVal), rcFull(rcFullVal), rcSimple(rcSimpleVal),
+			enableGrouping(true),
+			numB1(0), numB2(0), numB4(0),
+			nextPass(NULL)
+		{
+			assert(radix <= length);
+			assert(length%radix == 0);
+
+			numButterfly = cnPerWI/radix;
+			workGroupSize = length/cnPerWI;
+
+			// Total number of butterflies (over all work-tems) must be divisible by LS
+			assert( ((numButterfly*workGroupSize)%algLS) == 0 );
+
+			// All butterflies in one work-item should always be part of no more than 1 FFT transform.
+			// In other words, there should not be more than 1 FFT transform per work-item.
+			assert(cnPerWI <= length);
+
+			// Calculate the different types of Butterflies needed
+			if(linearRegs || r2c || c2r)
+			{
+				numB1 = numButterfly;
+			}
+			else
+			{
+				numB4 = numButterfly/4;
+				numB2 = (numButterfly%4)/2; // can be 0 or 1
+				numB1 = (numButterfly%2); // can be 0 or 1
+
+				assert(numButterfly == (numB4*4 + numB2*2 + numB1));
+			}
+		}
+
+		size_t GetNumB1() const { return numB1; }
+		size_t GetNumB2() const { return numB2; }
+		size_t GetNumB4() const { return numB4; }
+
+		size_t GetPosition() const { return position; }
+		size_t GetRadix() const { return radix; }
+
+		void SetNextPass(Pass<PR> *np) { nextPass = np; }
+		void SetGrouping(bool grp) { enableGrouping = grp; }
+
+		void GeneratePass(	bool fwd, std::string &passStr, bool fft_3StepTwiddle,
+							bool inInterleaved, bool outInterleaved,
+							bool inReal, bool outReal,
+							size_t inStride, size_t outStride, double scale,
+							bool gIn = false, bool gOut = false) const
+		{
+			const std::string bufferInRe  = (inReal || inInterleaved) ?   "bufIn"  : "bufInRe";
+			const std::string bufferInIm  = (inReal || inInterleaved) ?   "bufIn"  : "bufInIm";
+			const std::string bufferOutRe = (outReal || outInterleaved) ? "bufOut" : "bufOutRe";
+			const std::string bufferOutIm = (outReal || outInterleaved) ? "bufOut" : "bufOutIm";
+
+			const std::string bufferInRe2  = (inReal || inInterleaved) ?   "bufIn2"  : "bufInRe2";
+			const std::string bufferInIm2  = (inReal || inInterleaved) ?   "bufIn2"  : "bufInIm2";
+			const std::string bufferOutRe2 = (outReal || outInterleaved) ? "bufOut2" : "bufOutRe2";
+			const std::string bufferOutIm2 = (outReal || outInterleaved) ? "bufOut2" : "bufOutIm2";
+
+			// for real transforms we use only B1 butteflies (regC = 1)
+			if(r2c || c2r)
+			{
+				assert(numB1 == numButterfly);
+				assert(linearRegs);
+			}
+
+			// Check if it is single pass transform
+			bool singlePass = ((position == 0) && (nextPass == NULL));
+			if(singlePass) assert(numButterfly == 1); // for single pass transforms, there can be only 1 butterfly per transform
+			if(singlePass) assert(workGroupSize == 1);
+
+			// Register types
+			std::string regB1Type = RegBaseType<PR>(1);
+			std::string regB2Type = RegBaseType<PR>(2);
+			std::string regB4Type = RegBaseType<PR>(4);
+
+			//Function attribute
+			passStr += "__attribute__((always_inline)) void\n";
+
+			//Function name
+			passStr += PassName(position, fwd);
+
+			// Function arguments
+			passStr += "(";
+			passStr += "uint rw, uint b, uint me, uint inOffset, uint outOffset, ";
+
+			// For now, interleaved support is there for only global buffers
+			// TODO : add support for LDS interleaved
+			if(inInterleaved)  assert(gIn);
+			if(outInterleaved) assert(gOut);
+
+			if(r2c || c2r)
+			{
+				if(gIn)
+				{
+					if(inInterleaved)
+					{
+										passStr += "__global "; passStr += regB2Type; passStr += " *"; passStr += bufferInRe;  passStr += ", ";
+						if(!rcSimple) {	passStr += "__global "; passStr += regB2Type; passStr += " *"; passStr += bufferInRe2; passStr += ", "; }
+					}
+					else if(inReal)
+					{
+										passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe;  passStr += ", ";
+						if(!rcSimple) {	passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe2; passStr += ", "; }
+					}
+					else
+					{
+										passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe;  passStr += ", ";
+						if(!rcSimple) {	passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe2; passStr += ", "; }
+										passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInIm;  passStr += ", ";
+						if(!rcSimple) {	passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInIm2; passStr += ", "; }
+					}
+				}
+				else
+				{
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe; passStr += ", ";
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferInIm; passStr += ", ";
+				}
+
+				if(gOut)
+				{
+					if(outInterleaved)
+					{
+															passStr += "__global "; passStr += regB2Type; passStr += " *"; passStr += bufferOutRe;
+						if(!rcSimple) { passStr += ", ";	passStr += "__global "; passStr += regB2Type; passStr += " *"; passStr += bufferOutRe2; }
+					}
+					else if(outReal)
+					{
+															passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe;
+						if(!rcSimple) { passStr += ", ";	passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe2; }
+					}
+					else
+					{
+															passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe;  passStr += ", ";
+						if(!rcSimple) {						passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe2; passStr += ", "; }
+															passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutIm;
+						if(!rcSimple) { passStr += ", ";	passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutIm2; }
+					}
+				}
+				else
+				{
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe; passStr += ", ";
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferOutIm;
+				}
+			}
+			else
+			{
+				if(gIn)
+				{
+					if(inInterleaved)
+					{
+						passStr += "__global "; passStr += regB2Type; passStr += " *"; passStr += bufferInRe;  passStr += ", ";
+					}
+					else
+					{
+						passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe;  passStr += ", ";
+						passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferInIm;  passStr += ", ";
+					}
+				}
+				else
+				{
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferInRe; passStr += ", ";
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferInIm; passStr += ", ";
+				}
+
+
+				if(gOut)
+				{
+					if(outInterleaved)
+					{
+						passStr += "__global "; passStr += regB2Type; passStr += " *"; passStr += bufferOutRe;
+					}
+					else
+					{
+						passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe;  passStr += ", ";
+						passStr += "__global "; passStr += regB1Type; passStr += " *"; passStr += bufferOutIm;
+					}
+				}
+				else
+				{
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferOutRe; passStr += ", ";
+					passStr += "__local "; passStr += regB1Type; passStr += " *"; passStr += bufferOutIm;
+				}
+			}
+
+			// Register arguments
+			if(linearRegs)
+			{
+				passStr += ", "; passStr += IterRegArgs();
+			}
+			passStr += ")\n{\n";
+
+			// Register Declarations
+			if(!linearRegs)
+			{
+				DeclareRegs(regB1Type, 1, numB1, passStr);
+				DeclareRegs(regB2Type, 2, numB2, passStr);
+				DeclareRegs(regB4Type, 4, numB4, passStr);
+			}
+
+			// odd cnPerWI processing
+			bool oddp = false;
+			oddp = ((cnPerWI%2) && (length > 1) && (!singlePass));
+
+			// additional register for odd
+			if( !rcSimple && oddp && ((r2c && (nextPass == NULL)) || (c2r && (position == 0))) )
+			{
+				passStr += "\n\t";
+				passStr += "uint brv = 0;\n\t";
+				passStr += "\n\t";
+				passStr += regB2Type; passStr += " R"; passStr += SztToStr(cnPerWI); passStr += "[1];\n\t";
+				passStr += "(*R"; passStr += SztToStr(cnPerWI); passStr += ").x = 0; ";
+				passStr += "(*R"; passStr += SztToStr(cnPerWI); passStr += ").y = 0;\n";
+			}
+
+			// Special private memory for c-r 1 pass transforms
+			if( !rcSimple && (c2r && (position == 0)) && singlePass )
+			{
+				assert(radix == length);
+
+				passStr += "\n\t";
+				passStr += regB1Type;
+				passStr += " mpvt["; passStr += SztToStr(length); passStr += "];\n";
+			}
+
+			passStr += "\n";
+
+			// Read into registers
+			if(r2c)
+			{
+				if(position == 0)
+				{
+					passStr += "\n\tif(rw)\n\t{";
+					SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, bufferInRe, bufferInIm, "inOffset", 1, numB1, 0, passStr);
+					passStr += "\n\t}\n";
+
+					if(rcSimple)
+					{
+						passStr += "\n";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInIm2, "inOffset", passStr);
+						passStr += "\n";
+					}
+					else
+					{
+						passStr += "\n\tif(rw > 1)\n\t{";
+						SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, bufferInRe2, bufferInIm2, "inOffset", 1, numB1, 0, passStr);
+						passStr += "\n\t}\n";
+
+						passStr += "\telse\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInIm2, "inOffset", passStr);
+						passStr += "\n\t}\n";
+					}
+				}
+			}
+			else if(c2r && !rcSimple)
+			{
+				if(position == 0)
+				{
+					std::string processBufRe = bufferOutRe;
+					std::string processBufIm = bufferOutIm;
+					std::string processBufOffset = "outOffset";
+					size_t processBufStride = outStride;
+
+					if(singlePass)
+					{
+						processBufRe = "mpvt";
+						processBufIm = "mpvt";
+						processBufOffset = "0";
+						processBufStride = 1;
+					}
+
+					passStr += "\n\tif(rw && !me)\n\t{\n\t";
+					passStr += processBufRe; passStr += "["; passStr += processBufOffset; passStr += "] = ";
+					passStr += bufferInRe; passStr+= "[inOffset]";
+					if(inInterleaved) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
+
+					if(length > 1)
+					{
+						passStr += "\n\n\tif(rw)\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInRe, "inOffset", passStr);
+						passStr += "\n\t}\n";
+
+						passStr += "\n\tif(rw > 1)\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, false, bufferInIm2, bufferInIm2, "inOffset", passStr);
+						passStr += "\n\t}\n\telse\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, true, true, false, bufferInIm2, bufferInIm2, "inOffset", passStr);
+						passStr += "\n\t}\n";
+
+						if(oddp)
+						{
+							passStr += "\n\tif(rw && (me%2))\n\t{";
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, true, bufferInRe, bufferInRe, "inOffset", passStr);
+							passStr += "\n\t}";
+							passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInIm2, bufferInIm2, "inOffset", passStr);
+							passStr += "\n\t}\n";
+						}
+
+
+						SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, true, false, processBufRe, processBufIm, processBufOffset, passStr);
+						if(oddp)
+						{
+							passStr += "\n\tif(me%2)\n\t{";
+							SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, true, true, processBufRe, processBufIm, processBufOffset, passStr);
+							passStr += "\n\t}\n";
+						}
+						SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, false, false, processBufRe, processBufIm, processBufOffset, passStr);
+						if(oddp)
+						{
+							passStr += "\n\tif(me%2)\n\t{";
+							SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, false, true, processBufRe, processBufIm, processBufOffset, passStr);
+							passStr += "\n\t}\n";
+						}
+					}
+
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+					SweepRegs(SR_READ, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, processBufRe, processBufIm, processBufOffset, 1, numB1, 0, passStr);
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+
+
+					passStr += "\n\tif((rw > 1) && !me)\n\t{\n\t";
+					passStr += processBufIm; passStr += "["; passStr += processBufOffset; passStr += "] = ";
+					passStr += bufferInRe2; passStr+= "[inOffset]";
+					if(inInterleaved) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
+					passStr += "\n\tif((rw == 1) && !me)\n\t{\n\t"; passStr += processBufIm; passStr += "["; passStr += processBufOffset; passStr += "] = 0;\n\t}";
+
+
+					if(length > 1)
+					{
+						passStr += "\n\n\tif(rw)\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, false, bufferInIm, bufferInIm, "inOffset", passStr);
+						passStr += "\n\t}\n";
+
+						passStr += "\n\tif(rw > 1)\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
+						passStr += "\n\t}\n\telse\n\t{";
+						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
+						passStr += "\n\t}\n";
+
+						if(oddp)
+						{
+							passStr += "\n\tif(rw && (me%2))\n\t{";
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, true, bufferInIm, bufferInIm, "inOffset", passStr);
+							passStr += "\n\t}";
+							passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, true, bufferInRe2, bufferInRe2, "inOffset", passStr);
+							passStr += "\n\t}\n";
+						}
+
+						SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, false, true, false, processBufRe, processBufIm, processBufOffset, passStr);
+						if(oddp)
+						{
+							passStr += "\n\tif(me%2)\n\t{";
+							SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, false, true, true, processBufRe, processBufIm, processBufOffset, passStr);
+							passStr += "\n\t}\n";
+						}
+						SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, false, false, false, processBufRe, processBufIm, processBufOffset, passStr);
+						if(oddp)
+						{
+							passStr += "\n\tif(me%2)\n\t{";
+							SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, false, false, true, processBufRe, processBufIm, processBufOffset, passStr);
+							passStr += "\n\t}\n";
+						}
+					}
+
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+					SweepRegs(SR_READ, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, processBufRe, processBufIm, processBufOffset, 1, numB1, 0, passStr);
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+				}
+			}
+			else
+			{
+				if( (!linearRegs) || (linearRegs && (position == 0)) )
+				{
+					passStr += "\n\tif(rw)\n\t{";
+					SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "inOffset", 1, numB1, 0, passStr);
+					SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "inOffset", 2, numB2, numB1, passStr);
+					SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "inOffset", 4, numB4, 2*numB2 + numB1, passStr);
+					passStr += "\n\t}\n";
+				}
+			}
+
+
+			passStr += "\n";
+
+			// Twiddle multiply
+			if( (position > 0) && (radix > 1) )
+			{
+				SweepRegs(SR_TWMUL, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 1, numB1, 0, passStr);
+				SweepRegs(SR_TWMUL, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 2, numB2, numB1, passStr);
+				SweepRegs(SR_TWMUL, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 4, numB4, 2*numB2 + numB1, passStr);
+			}
+
+			// Butterfly calls
+			if(radix > 1)
+			{
+				if(numB1) CallButterfly(ButterflyName(radix, 1, fwd), 1, numB1, passStr);
+				if(numB2) CallButterfly(ButterflyName(radix, 2, fwd), 2, numB2, passStr);
+				if(numB4) CallButterfly(ButterflyName(radix, 4, fwd), 4, numB4, passStr);
+			}
+
+			passStr += "\n";
+
+			if( (position != 0) && (!linearRegs) && (nextPass != NULL) )
+				passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+
+			passStr += "\n";
+
+			// 3-step twiddle multiplies
+			if(fft_3StepTwiddle)
+			{
+				assert(nextPass == NULL);
+				if(linearRegs)
+				{
+					SweepRegs(SR_TWMUL_3STEP, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 1, numB1, 0, passStr);
+				}
+				else
+				{
+					SweepRegs(SR_TWMUL_3STEP, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 1, numB1, 0, passStr);
+					SweepRegs(SR_TWMUL_3STEP, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 2, numB2, numB1, passStr);
+					SweepRegs(SR_TWMUL_3STEP, fwd, false, 1, SR_COMP_BOTH, 1.0f, bufferInRe, bufferInIm, "", 4, numB4, 2*numB2 + numB1, passStr);
+				}
+			}
+
+			// Write back from registers
+			if(linearRegs)
+			{
+				// In this case, we have to write & again read back for the next pass since we are
+				// using only half the lds. Number of barriers will increase at the cost of halving the lds.
+
+				if(nextPass == NULL) // last pass
+				{
+					if(r2c && !rcSimple)
+					{
+						if(!singlePass)
+						{
+							SweepRegs(SR_WRITE, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, bufferInRe, bufferInIm, "inOffset", 1, numB1, 0, passStr);
+							passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInIm, "inOffset", passStr);
+							if(oddp)
+							{
+								passStr += "\n\tif(me%2)\n\t{";
+								SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, true, bufferInRe, bufferInIm, "inOffset", passStr);
+								passStr += "\n\t}\n";
+							}
+
+							passStr += "\n\tif(rw && !me)\n\t{\n\t";
+							if(outInterleaved)
+							{
+								passStr += bufferOutRe; passStr+= "[outOffset].x = "; passStr += bufferInRe; passStr += "[inOffset]";
+								if(scale != 1.0) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); } passStr += ";\n\t";
+								passStr += bufferOutIm; passStr+= "[outOffset].y = "; passStr += "0;\n\t}";
+							}
+							else
+							{
+								passStr += bufferOutRe; passStr+= "[outOffset] = ";   passStr += bufferInRe; passStr += "[inOffset]";
+								if(scale != 1.0) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); } passStr += ";\n\t";
+								passStr += bufferOutIm; passStr+= "[outOffset] = ";   passStr += "0;\n\t}";
+							}
+							passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+
+
+							SweepRegs(SR_WRITE, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, bufferInRe, bufferInIm, "inOffset", 1, numB1, 0, passStr);
+							passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, false, bufferInRe, bufferInIm, "inOffset", passStr);
+							if(oddp)
+							{
+								passStr += "\n\tif(me%2)\n\t{";
+								SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, true, bufferInRe, bufferInIm, "inOffset", passStr);
+								passStr += "\n\t}\n";
+							}
+
+							passStr += "\n\tif((rw > 1) && !me)\n\t{\n\t";
+							if(outInterleaved)
+							{
+								passStr += bufferOutRe2; passStr+= "[outOffset].x = "; passStr += bufferInIm; passStr += "[inOffset]";
+								if(scale != 1.0) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); } passStr += ";\n\t";
+								passStr += bufferOutIm2; passStr+= "[outOffset].y = "; passStr += "0;\n\t}";
+							}
+							else
+							{
+								passStr += bufferOutRe2; passStr+= "[outOffset] = ";   passStr += bufferInIm; passStr += "[inOffset]";
+								if(scale != 1.0) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); } passStr += ";\n\t";
+								passStr += bufferOutIm2; passStr+= "[outOffset] = ";   passStr += "0;\n\t}";
+							}
+							passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+						}
+
+
+						passStr += "\n\n\tif(rw)\n\t{";
+						SweepRegsRC(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, false, false, false, bufferOutRe, bufferOutIm, "outOffset", passStr);
+						passStr += "\n\t}\n";
+						if(oddp)
+						{
+							passStr += "\n\n\tbrv = ((rw != 0) & (me%2 == 1));\n\t";
+							passStr += "if(brv)\n\t{";
+							SweepRegsRC(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, false, false, true, bufferOutRe, bufferOutIm, "outOffset", passStr);
+							passStr += "\n\t}\n";
+						}
+
+						passStr += "\n\n\tif(rw > 1)\n\t{";
+						SweepRegsRC(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, false, true, false, bufferOutRe2, bufferOutIm2, "outOffset", passStr);
+						passStr += "\n\t}\n";
+						if(oddp)
+						{
+							passStr += "\n\n\tbrv = ((rw > 1) & (me%2 == 1));\n\t";
+							passStr += "if(brv)\n\t{";
+							SweepRegsRC(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, false, true, true, bufferOutRe2, bufferOutIm2, "outOffset", passStr);
+							passStr += "\n\t}\n";
+						}
+
+					}
+					else if(c2r)
+					{
+						passStr += "\n\tif(rw)\n\t{";
+						SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_REAL, scale, bufferOutRe, bufferOutIm, "outOffset", 1, numB1, 0, passStr);
+						passStr += "\n\t}\n";
+
+						if(!rcSimple)
+						{
+							passStr += "\n\tif(rw > 1)\n\t{";
+							SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_IMAG, scale, bufferOutRe2, bufferOutIm2, "outOffset", 1, numB1, 0, passStr);
+							passStr += "\n\t}\n";
+						}
+					}
+					else
+					{
+						passStr += "\n\tif(rw)\n\t{";
+						SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, bufferOutRe, bufferOutIm, "outOffset", 1, numB1, 0, passStr);
+						passStr += "\n\t}\n";
+					}
+				}
+				else
+				{
+					passStr += "\n\tif(rw)\n\t{";
+					SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_REAL, scale, bufferOutRe, bufferOutIm, "outOffset", 1, numB1, 0, passStr);
+					passStr += "\n\t}\n";
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+					passStr += "\n\tif(rw)\n\t{";
+					nextPass->SweepRegs(SR_READ, fwd, outInterleaved, outStride, SR_COMP_REAL, scale, bufferOutRe, bufferOutIm, "outOffset", 1, nextPass->GetNumB1(), 0, passStr);
+					passStr += "\n\t}\n";
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+					passStr += "\n\tif(rw)\n\t{";
+					SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_IMAG, scale, bufferOutRe, bufferOutIm, "outOffset", 1, numB1, 0, passStr);
+					passStr += "\n\t}\n";
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+					passStr += "\n\tif(rw)\n\t{";
+					nextPass->SweepRegs(SR_READ, fwd, outInterleaved, outStride, SR_COMP_IMAG, scale, bufferOutRe, bufferOutIm, "outOffset", 1, nextPass->GetNumB1(), 0, passStr);
+					passStr += "\n\t}\n";
+					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+				}
+			}
+			else
+			{
+				passStr += "\n\tif(rw)\n\t{";
+				SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, bufferOutRe, bufferOutIm, "outOffset", 1, numB1, 0, passStr);
+				SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, bufferOutRe, bufferOutIm, "outOffset", 2, numB2, numB1, passStr);
+				SweepRegs(SR_WRITE, fwd, outInterleaved, outStride, SR_COMP_BOTH, scale, bufferOutRe, bufferOutIm, "outOffset", 4, numB4, 2*numB2 + numB1, passStr);
+				passStr += "\n\t}\n";
+			}
+
+
+			passStr += "\n}\n\n";
+		}
+    };
+
+    // FFT kernel
+    template <Precision PR>
+    class Kernel
+    {
+        size_t length;							// Length of FFT
+        size_t workGroupSize;					// Work group size
+		size_t cnPerWI;							// complex numbers per work-item
+
+		size_t numTrans;						// Number of transforms per work-group
+		size_t workGroupSizePerTrans;			// Work group subdivision per transform
+		size_t numPasses;						// Number of FFT passes
+        std::vector<size_t> radices;			// Base radix at each pass
+        std::vector<Pass<PR> > passes;			// Array of pass objects
+
+		bool halfLds;							// LDS used to store one component (either real or imaginary) at a time
+												// for passing intermediate data between the passes, if this is set
+												// then each pass-function should accept same set of registers
+
+		// Future optimization ideas
+		// bool limitRegs;							// TODO: Incrementally write to LDS, thereby using same set of registers for more than 1 butterflies
+		// bool combineReadTwMul;					// TODO: Combine reading into registers and Twiddle multiply
+
+		bool r2c2r;								// real to complex or complex to real transform
+		bool r2c, c2r;
+		bool rcFull;
+		bool rcSimple;
+
+		const FFTKernelGenKeyParams params;		// key params
+
+
+		inline std::string IterRegs(const std::string &pfx, bool initComma = true)
+		{
+			std::string str = "";
+
+			if(halfLds)
+			{
+				if(initComma) str += ", ";
+
+				for(size_t i=0; i<cnPerWI; i++)
+				{
+					if(i != 0) str += ", ";
+					str += pfx; str += "R";
+					str += SztToStr(i);
+				}
+			}
+
+			return str;
+		}
+
+		inline bool IsGroupedReadWritePossible()
+		{
+			bool possible = true;
+			const size_t *iStride, *oStride;
+
+			if(r2c2r)
+				return false;
+
+			if(params.fft_placeness == CLFFT_INPLACE)
+			{
+				iStride = oStride = params.fft_inStride;
+			}
+			else
+			{
+				iStride = params.fft_inStride;
+				oStride = params.fft_outStride;
+			}
+
+			for(size_t i=1; i < params.fft_DataDim; i++)
+			{
+				if(iStride[i] % 2) { possible = false; break; }
+				if(oStride[i] % 2) { possible = false; break; }
+			}
+
+			return possible;
+		}
+
+		inline std::string OffsetCalc(const std::string &off, bool input = true, bool rc_second_index = false)
+		{
+			std::string str;
+
+			const size_t *pStride = input ? params.fft_inStride : params.fft_outStride;
+
+			std::string batch;
+			if(r2c2r && !rcSimple)
+			{
+				batch += "(batch*"; batch += SztToStr(2*numTrans);
+				if(rc_second_index) batch += " + 1";
+				else				batch += " + 0";
+
+				if(numTrans != 1)	{ batch += " + 2*(me/"; batch += SztToStr(workGroupSizePerTrans); batch += "))"; }
+				else				{ batch += ")"; }
+			}
+			else
+			{
+				if(numTrans == 1)	{	batch += "batch"; }
+				else				{	batch += "(batch*"; batch += SztToStr(numTrans);
+										batch += " + (me/"; batch += SztToStr(workGroupSizePerTrans); batch += "))"; }
+			}
+
+			switch(params.fft_DataDim)
+			{
+			case 5:
+				{
+					str += "\t{\n\tuint ocalc1 = ";
+					str += batch; str += "%"; str += SztToStr(params.fft_N[1] * params.fft_N[2] * params.fft_N[3]);
+					str += ";\n";
+
+					str += "\tuint ocalc0 = ";
+					str += "ocalc1"; str += "%"; str += SztToStr(params.fft_N[1] * params.fft_N[2]);
+					str += ";\n";
+
+					str += "\t"; str += off; str += " = ";
+					str += "("; str += batch; str += "/"; str += SztToStr(params.fft_N[1] * params.fft_N[2] * params.fft_N[3]);
+					str += ")*"; str += SztToStr(pStride[4]); str += " + ";
+
+					str += "(ocalc1"; str += "/"; str += SztToStr(params.fft_N[1] * params.fft_N[2]); str += ")*";
+					str += SztToStr(pStride[3]); str += " + ";
+
+					str += "(ocalc0"; str += "/"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[2]); str += " + ";
+					str += "(ocalc0"; str += "%"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[1]); str += ";\n";
+
+					str += "\t}\n";
+				}
+				break;
+			case 4:
+				{
+					str += "\t{\n\tuint ocalc0 = ";
+					str += batch; str += "%"; str += SztToStr(params.fft_N[1] * params.fft_N[2]);
+					str += ";\n";
+
+					str += "\t"; str += off; str += " = ";
+					str += "("; str += batch; str += "/"; str += SztToStr(params.fft_N[1] * params.fft_N[2]); str += ")*";
+					str += SztToStr(pStride[3]); str += " + ";
+
+					str += "(ocalc0"; str += "/"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[2]); str += " + ";
+					str += "(ocalc0"; str += "%"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[1]); str += ";\n";
+
+					str += "\t}\n";
+				}
+				break;
+			case 3:
+				{
+					str += "\t"; str += off; str += " = ";
+					str += "("; str += batch; str += "/"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[2]); str += " + ";
+					str += "("; str += batch; str += "%"; str += SztToStr(params.fft_N[1]); str += ")*";
+					str += SztToStr(pStride[1]); str += ";\n";
+				}
+				break;
+			case 2:
+				{
+					str += "\t"; str += off; str += " = ";
+					str += batch; str += "*"; str += SztToStr(pStride[1]); str += ";\n";
+				}
+				break;
+			default:
+				assert(false);
+			}
+
+			return str;
+		}
+
+    public:
+        Kernel( const FFTKernelGenKeyParams &paramsVal) :
+					params(paramsVal), r2c2r(false)
+
+        {
+			length = params.fft_N[0];
+			workGroupSize = params.fft_SIMD;
+			numTrans = (workGroupSize * params.fft_R) / length;
+
+			r2c = false;
+			c2r = false;
+			// Check if it is R2C or C2R transform
+			if(params.fft_inputLayout == CLFFT_REAL)  r2c = true;
+			if(params.fft_outputLayout == CLFFT_REAL) c2r = true;
+			r2c2r = (r2c || c2r);
+
+			if(r2c)
+			{
+				rcFull = (	(params.fft_outputLayout == CLFFT_COMPLEX_INTERLEAVED) ||
+							(params.fft_outputLayout == CLFFT_COMPLEX_PLANAR) ) ? true : false;
+			}
+			if(c2r)
+			{
+				rcFull = (	(params.fft_inputLayout  == CLFFT_COMPLEX_INTERLEAVED) ||
+							(params.fft_inputLayout  == CLFFT_COMPLEX_PLANAR) ) ? true : false;
+			}
+
+			rcSimple = params.fft_RCsimple;
+
+			// Set half lds only for power-of-2 problem sizes & interleaved data
+			halfLds = ( (params.fft_inputLayout == CLFFT_COMPLEX_INTERLEAVED) &&
+						(params.fft_outputLayout == CLFFT_COMPLEX_INTERLEAVED) ) ? true : false;
+			halfLds = halfLds ? ((length & (length-1)) ? false : true) : false;
+			//halfLds = false;
+
+			// Set half lds for real transforms
+			halfLds = r2c2r ? true : halfLds;
+
+			bool linearRegs = halfLds ? true : false;
+
+			assert( ((length*numTrans)%workGroupSize) == 0 );
+			cnPerWI = (numTrans * length) / workGroupSize;
+			workGroupSizePerTrans = workGroupSize/numTrans;
+
+			// !!!! IMPORTANT !!!! Keep these assertions unchanged, algorithm depend on these to be true
+			assert( (cnPerWI * workGroupSize) == (numTrans * length) );
+			assert( cnPerWI <= length ); // Don't do more than 1 fft per work-item
+
+			// Breakdown into passes
+
+			size_t LS = 1;
+			size_t L;
+			size_t R = length;
+			size_t pid = 0;
+
+			// See if we can get radices from the lookup table
+			const size_t *pRadices = NULL;
+			size_t nPasses;
+			KernelCoreSpecs<PR> kcs;
+			kcs.GetRadices(length, nPasses, pRadices);
+			if((params.fft_MaxWorkGroupSize >= 256) && (pRadices != NULL))
+			{
+				for(size_t i=0; i<nPasses; i++)
+				{
+					size_t rad = pRadices[i];
+					L = LS * rad;
+					R /= rad;
+
+					radices.push_back(rad);
+					passes.push_back(Pass<PR>(i, length, rad, cnPerWI, L, LS, R, linearRegs, r2c, c2r, rcFull, rcSimple));
+
+					LS *= rad;
+				}
+				assert(R == 1); // this has to be true for correct radix composition of the length
+				numPasses = nPasses;
+			}
+			else
+			{
+				// Possible radices
+				size_t cRad[] = {10,8,6,5,4,3,2,1}; // Must be in descending order
+				size_t cRadSize = (sizeof(cRad)/sizeof(cRad[0]));
+
+				while(true)
+				{
+					size_t rad;
+
+					assert(cRadSize >= 1);
+					for(size_t r=0; r<cRadSize; r++)
+					{
+						rad = cRad[r];
+
+						if( (rad == 16) && !linearRegs ) continue; // temporary - fix this !!!
+
+						if((rad > cnPerWI) || (cnPerWI%rad))
+							continue;
+
+						if(!(R % rad))
+							break;
+					}
+
+					assert((cnPerWI%rad) == 0);
+
+					L = LS * rad;
+					R /= rad;
+
+					radices.push_back(rad);
+					passes.push_back(Pass<PR>(pid, length, rad, cnPerWI, L, LS, R, linearRegs, r2c, c2r, rcFull, rcSimple));
+
+					pid++;
+					LS *= rad;
+
+					assert(R >= 1);
+					if(R == 1)
+						break;
+				}
+				numPasses = pid;
+			}
+
+			assert(numPasses == passes.size());
+			assert(numPasses == radices.size());
+
+#ifdef PARMETERS_TO_BE_READ
+
+			ParamRead pr;
+			ReadParameterFile(pr);
+
+			radices.clear();
+			passes.clear();
+
+			radices = pr.radices;
+			numPasses = radices.size();
+
+			LS = 1;
+			R = length;
+			for(size_t i=0; i<numPasses; i++)
+			{
+				size_t rad = radices[i];
+				L = LS * rad;
+				R /= rad;
+
+				passes.push_back(Pass<PR>(i, length, rad, cnPerWI, L, LS, R, linearRegs));
+
+				LS *= rad;
+			}
+			assert(R == 1);
+#endif
+
+			// Grouping read/writes ok?
+			bool grp = IsGroupedReadWritePossible();
+			for(size_t i=0; i < numPasses; i++)
+				passes[i].SetGrouping(grp);
+
+			// Store the next pass-object pointers
+			if(numPasses > 1)
+				for(size_t i=0; i < (numPasses - 1); i++)
+					passes[i].SetNextPass(&passes[i+1]);
+
+		}
+
+        void GenerateKernel(std::string &str)
+		{
+			std::string twType = RegBaseType<PR>(2);
+			std::string rType  = RegBaseType<PR>(1);
+			std::string r2Type  = RegBaseType<PR>(2);
+
+			bool inInterleaved;	 // Input is interleaved format
+			bool outInterleaved; // Output is interleaved format
+			inInterleaved  = (	(params.fft_inputLayout == CLFFT_COMPLEX_INTERLEAVED) ||
+								(params.fft_inputLayout == CLFFT_HERMITIAN_INTERLEAVED) ) ? true : false;
+			outInterleaved = (	(params.fft_outputLayout == CLFFT_COMPLEX_INTERLEAVED) ||
+								(params.fft_outputLayout == CLFFT_HERMITIAN_INTERLEAVED) ) ? true : false;
+
+			bool inReal;  // Input is real format
+			bool outReal; // Output is real format
+			inReal  = (params.fft_inputLayout == CLFFT_REAL) ? true : false;
+			outReal = (params.fft_outputLayout == CLFFT_REAL) ? true : false;
+
+			size_t large1D = params.fft_N[0] * params.fft_N[1];
+
+			// Pragma
+			str += ClPragma<PR>();
+
+			// Twiddle table
+			if(length > 1)
+			{
+				TwiddleTable twTable(length);
+
+				str += "\n__constant ";
+				str += twType; str += " ";
+				str += TwTableName();
+				str += "["; str += SztToStr(length-1); str += "] = {\n";
+				twTable.GenerateTwiddleTable<PR>(radices, str);
+				str += "};\n\n";
+			}
+			str += "\n";
+
+			// twiddle factors for 1d-large 3-step algorithm
+			if(params.fft_3StepTwiddle)
+			{
+				TwiddleTableLarge twLarge(large1D);
+				twLarge.GenerateTwiddleTable<PR>(str);
+			}
+
+			std::string sfx = FloatSuffix<PR>();
+
+			// Vector type
+			str += "#define fvect2 "; str += RegBaseType<PR>(2); str += "\n\n";
+
+			//constants
+			str += "#define C8Q  0.70710678118654752440084436210485"; str += sfx; str += "\n";
+
+			str += "#define C5QA 0.30901699437494742410229341718282"; str += sfx; str += "\n";
+			str += "#define C5QB 0.95105651629515357211643933337938"; str += sfx; str += "\n";
+			str += "#define C5QC 0.50000000000000000000000000000000"; str += sfx; str += "\n";
+			str += "#define C5QD 0.58778525229247312916870595463907"; str += sfx; str += "\n";
+			str += "#define C5QE 0.80901699437494742410229341718282"; str += sfx; str += "\n";
+
+			str += "#define C3QA 0.50000000000000000000000000000000"; str += sfx; str += "\n";
+			str += "#define C3QB 0.86602540378443864676372317075294"; str += sfx; str += "\n";
+			str += "\n";
+
+			bool cReg = halfLds ? true : false;
+
+			// Generate butterflies for all unique radices
+			std::list<size_t> uradices;
+			for(std::vector<size_t>::const_iterator r = radices.begin(); r != radices.end(); r++)
+				uradices.push_back(*r);
+
+			uradices.sort();
+			uradices.unique();
+
+			typename std::vector< Pass<PR> >::const_iterator p;
+			if(length > 1)
+			{
+				for(std::list<size_t>::const_iterator r = uradices.begin(); r != uradices.end(); r++)
+				{
+					size_t rad = *r;
+					p = passes.begin();
+					while(p->GetRadix() != rad) p++;
+
+					for(size_t d=0; d<2; d++)
+					{
+						bool fwd = d ? false : true;
+
+						if(p->GetNumB1()) { Butterfly<PR> bfly(rad, 1, fwd, cReg); bfly.GenerateButterfly(str); str += "\n"; }
+						if(p->GetNumB2()) { Butterfly<PR> bfly(rad, 2, fwd, cReg); bfly.GenerateButterfly(str); str += "\n"; }
+						if(p->GetNumB4()) { Butterfly<PR> bfly(rad, 4, fwd, cReg); bfly.GenerateButterfly(str); str += "\n"; }
+					}
+				}
+			}
+
+			// Generate passes
+			for(size_t d=0; d<2; d++)
+			{
+				bool fwd;
+
+				if(r2c2r)
+				{
+					fwd = r2c;
+				}
+				else
+				{
+					fwd = d ? false : true;
+				}
+
+				double scale = fwd ? params.fft_fwdScale : params.fft_backScale;
+				bool tw3Step = false;
+
+				for(p = passes.begin(); p != passes.end(); p++)
+				{
+					double s = 1.0;
+					size_t ins = 1, outs = 1;
+					bool gIn = false, gOut = false;
+					bool inIlvd = false, outIlvd = false;
+					bool inRl = false, outRl = false;
+					if(p == passes.begin())		{ inIlvd  = inInterleaved;  inRl  = inReal;  gIn  = true; ins  = params.fft_inStride[0];  }
+					if((p+1) == passes.end())	{ outIlvd = outInterleaved; outRl = outReal; gOut = true; outs = params.fft_outStride[0]; s = scale; tw3Step = params.fft_3StepTwiddle; }
+
+					p->GeneratePass(fwd, str, tw3Step, inIlvd, outIlvd, inRl, outRl, ins, outs, s, gIn, gOut);
+				}
+
+				// if real transform we do only 1 direction
+				if(r2c2r)
+					break;
+			}
+
+			// TODO : address this kludge
+			str += " typedef union  { uint u; int i; } cb_t;\n\n";
+
+			for(size_t d=0; d<2; d++)
+			{
+				bool fwd;
+
+				if(r2c2r)
+				{
+					fwd = inReal ? true : false;
+				}
+				else
+				{
+					fwd = d ? false : true;
+				}
+
+				// FFT kernel begin
+				// Function attribute
+				str += "__kernel __attribute__((reqd_work_group_size (";
+				str += SztToStr(workGroupSize); str += ",1,1)))\nvoid ";
+
+				// Function name
+				if(fwd) str += "fft_fwd";
+				else	str += "fft_back";
+				str += "(";
+
+				// TODO : address this kludge
+				str += "__constant cb_t *cb __attribute__((max_constant_size(32))), ";
+
+				// Function attributes
+				if(params.fft_placeness == CLFFT_INPLACE)
+				{
+					if(r2c2r)
+					{
+						if(outInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " * restrict gb)\n";
+						}
+						else
+						{
+							str += "__global "; str += rType; str += " * restrict gb)\n";
+						}
+					}
+					else
+					{
+						assert(inInterleaved == outInterleaved);
+						assert(params.fft_inStride[1] == params.fft_outStride[1]);
+						assert(params.fft_inStride[0] == params.fft_outStride[0]);
+
+						if(inInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " * restrict gb)\n";
+						}
+						else
+						{
+							str += "__global "; str += rType; str += " * restrict gbRe, ";
+							str += "__global "; str += rType; str += " * restrict gbIm)\n";
+						}
+					}
+				}
+				else
+				{
+					if(r2c2r)
+					{
+						if(inInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " * restrict gbIn, ";
+						}
+						else if(inReal)
+						{
+							str += "__global "; str += rType; str += " * restrict gbIn, ";
+						}
+						else
+						{
+							str += "__global const "; str += rType; str += " * restrict gbInRe, ";
+							str += "__global const "; str += rType; str += " * restrict gbInIm, ";
+						}
+
+						if(outInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " * restrict gbOut)\n";
+						}
+						else if(outReal)
+						{
+							str += "__global "; str += rType; str += " * restrict gbOut)\n";
+						}
+						else
+						{
+							str += "__global const "; str += rType; str += " * restrict gbOutRe, ";
+							str += "__global const "; str += rType; str += " * restrict gbOutIm)\n";
+						}
+					}
+					else
+					{
+						if(inInterleaved)
+						{
+							str += "__global const "; str += r2Type; str += " * restrict gbIn, ";
+						}
+						else
+						{
+							str += "__global const "; str += rType; str += " * restrict gbInRe, ";
+							str += "__global const "; str += rType; str += " * restrict gbInIm, ";
+						}
+
+						if(outInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " * restrict gbOut)\n";
+						}
+						else
+						{
+							str += "__global "; str += rType; str += " * restrict gbOutRe, ";
+							str += "__global "; str += rType; str += " * restrict gbOutIm)\n";
+						}
+					}
+				}
+
+				str += "{\n";
+
+				// Initialize
+				str += "\t";
+				str += "uint me = get_local_id(0);\n\t";
+				str += "uint batch = get_group_id(0);";
+				str += "\n";
+
+				// Allocate LDS
+				size_t ldsSize = halfLds ? length*numTrans : 2*length*numTrans;
+				if(numPasses > 1)
+				{
+					str += "\n\t";
+					str += "__local "; str += rType; str += " lds[";
+					str += SztToStr(ldsSize); str += "];\n";
+				}
+
+				// Declare memory pointers
+				str += "\n\t";
+				if(r2c2r)
+				{
+					str += "uint iOffset;\n\t";
+					str += "uint oOffset;\n\n\t";
+					if(!rcSimple)
+					{
+						str += "uint iOffset2;\n\t";
+						str += "uint oOffset2;\n\n\t";
+					}
+
+					if(inInterleaved)
+					{
+						if(!rcSimple)	{	str += "__global "; str += r2Type; str += " *lwbIn2;\n\t"; }
+											str += "__global "; str += r2Type; str += " *lwbIn;\n\t";
+					}
+					else if(inReal)
+					{
+						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbIn2;\n\t"; }
+											str += "__global "; str += rType; str += " *lwbIn;\n\t";
+
+					}
+					else
+					{
+						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbInRe2;\n\t"; }
+						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbInIm2;\n\t"; }
+											str += "__global "; str += rType; str += " *lwbInRe;\n\t";
+											str += "__global "; str += rType; str += " *lwbInIm;\n\t";
+					}
+
+					if(outInterleaved)
+					{
+						if(!rcSimple)	{	str += "__global "; str += r2Type; str += " *lwbOut2;\n\t"; }
+											str += "__global "; str += r2Type; str += " *lwbOut;\n\n";
+
+					}
+					else if(outReal)
+					{
+						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbOut2;\n\t"; }
+											str += "__global "; str += rType; str += " *lwbOut;\n\n";
+
+					}
+					else
+					{
+						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbOutRe2;\n\t"; }
+						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbOutIm2;\n\t"; }
+											str += "__global "; str += rType; str += " *lwbOutRe;\n\t";
+											str += "__global "; str += rType; str += " *lwbOutIm;\n\n";
+
+					}
+				}
+				else
+				{
+					if(params.fft_placeness == CLFFT_INPLACE)
+					{
+						str += "uint ioOffset;\n\t";
+
+						if(inInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " *lwb;\n\n";
+						}
+						else
+						{
+							str += "__global "; str += rType; str += " *lwbRe;\n\t";
+							str += "__global "; str += rType; str += " *lwbIm;\n\n";
+						}
+					}
+					else
+					{
+						str += "uint iOffset;\n\t";
+						str += "uint oOffset;\n\t";
+
+						if(inInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " *lwbIn;\n\t";
+						}
+						else
+						{
+							str += "__global "; str += rType; str += " *lwbInRe;\n\t";
+							str += "__global "; str += rType; str += " *lwbInIm;\n\t";
+						}
+
+						if(outInterleaved)
+						{
+							str += "__global "; str += r2Type; str += " *lwbOut;\n\n";
+						}
+						else
+						{
+							str += "__global "; str += rType; str += " *lwbOutRe;\n\t";
+							str += "__global "; str += rType; str += " *lwbOutIm;\n\n";
+						}
+					}
+				}
+
+				// Setup registers if needed
+				if(halfLds)
+				{
+					str += "\t"; str += RegBaseType<PR>(2);
+					str += " "; str += IterRegs("", false);
+					str += ";\n\n";
+				}
+
+				// Calculate total transform count
+				std::string totalBatch = "(";
+				size_t i = 0;
+				while(i < (params.fft_DataDim - 2))
+				{
+					totalBatch += SztToStr(params.fft_N[i+1]); totalBatch += " * ";
+					i++;
+				}
+				totalBatch += "cb["; totalBatch += SztToStr(i); totalBatch += "].u)";
+
+				// Conditional read-write ('rw') for arbitrary batch number
+				if(r2c2r && !rcSimple)
+				{
+					str += "\tuint this = "; str += totalBatch; str += " - batch*";
+					str +=  SztToStr(2*numTrans); str += ";\n";
+					str += "\tuint rw = (me < ((this+1)/2)*"; str += SztToStr(workGroupSizePerTrans);
+					str += ") ? (this - 2*(me/"; str += SztToStr(workGroupSizePerTrans); str += ")) : 0;\n\n";
+				}
+				else
+				{
+					if(numTrans > 1)
+					{
+						str += "\tuint rw = (me < ("; str += totalBatch;
+						str += " - batch*"; str += SztToStr(numTrans); str += ")*";
+						str += SztToStr(workGroupSizePerTrans); str += ") ? 1 : 0;\n\n";
+					}
+				}
+
+				// Transform index for 3-step twiddles
+				if(params.fft_3StepTwiddle)
+				{
+					if(numTrans == 1)
+					{
+						str += "\tuint b = batch%";
+					}
+					else
+					{
+						str += "\tuint b = (batch*"; str += SztToStr(numTrans); str += " + (me/";
+						str += SztToStr(workGroupSizePerTrans); str += "))%";
+					}
+
+					str += SztToStr(params.fft_N[1]); str += ";\n\n";
+				}
+				else
+				{
+					str += "\tuint b = 0;\n\n";
+				}
+
+				// Setup memory pointers
+				if(r2c2r)
+				{
+					str += OffsetCalc("iOffset", true);
+					str += OffsetCalc("oOffset", false);
+					if(!rcSimple) { str += OffsetCalc("iOffset2",  true, true); }
+					if(!rcSimple) { str += OffsetCalc("oOffset2", false, true); }
+
+					str += "\n\t";
+					if(params.fft_placeness == CLFFT_INPLACE)
+					{
+						if(inInterleaved)
+						{
+							if(!rcSimple) {	str += "lwbIn2 = (__global "; str += r2Type; str += " *)gb + iOffset2;\n\t"; }
+											str += "lwbIn  = (__global "; str += r2Type; str += " *)gb + iOffset;\n\t";
+						}
+						else
+						{
+							if(!rcSimple) {	str += "lwbIn2 = (__global "; str += rType; str += " *)gb + iOffset2;\n\t"; }
+											str += "lwbIn  = (__global "; str += rType; str += " *)gb + iOffset;\n\t";
+
+						}
+
+						if(!rcSimple) {	str += "lwbOut2 = gb + oOffset2;\n\t"; }
+										str += "lwbOut = gb + oOffset;\n\n";
+
+					}
+					else
+					{
+						if(inInterleaved || inReal)
+						{
+							if(!rcSimple) {	str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
+											str += "lwbIn = gbIn + iOffset;\n\t";
+						}
+						else
+						{
+							if(!rcSimple) {	str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
+							if(!rcSimple) {	str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
+											str += "lwbInRe = gbInRe + iOffset;\n\t";
+											str += "lwbInIm = gbInIm + iOffset;\n\t";
+
+						}
+
+						if(outInterleaved || outReal)
+						{
+							if(!rcSimple) {	str += "lwbOut2 = gbOut + oOffset2;\n\t"; }
+											str += "lwbOut = gbOut + oOffset;\n\n";
+						}
+						else
+						{
+							if(!rcSimple) {	str += "lwbOutRe2 = gbOutRe + oOffset2;\n\t"; }
+							if(!rcSimple) {	str += "lwbOutIm2 = gbOutIm + oOffset2;\n\t"; }
+											str += "lwbOutRe = gbOutRe + oOffset;\n\t";
+											str += "lwbOutIm = gbOutIm + oOffset;\n\n";
+						}
+					}
+				}
+				else
+				{
+					if(params.fft_placeness == CLFFT_INPLACE)
+					{
+						str += OffsetCalc("ioOffset", true);
+
+						str += "\t";
+						if(inInterleaved)
+						{
+							str += "lwb = gb + ioOffset;\n\n";
+						}
+						else
+						{
+							str += "lwbRe = gbRe + ioOffset;\n\t";
+							str += "lwbIm = gbIm + ioOffset;\n\n";
+						}
+					}
+					else
+					{
+						str += OffsetCalc("iOffset", true);
+						str += OffsetCalc("oOffset", false);
+
+						str += "\t";
+						if(inInterleaved)
+						{
+							str += "lwbIn = gbIn + iOffset;\n\t";
+						}
+						else
+						{
+							str += "lwbInRe = gbInRe + iOffset;\n\t";
+							str += "lwbInIm = gbInIm + iOffset;\n\t";
+						}
+
+						if(outInterleaved)
+						{
+							str += "lwbOut = gbOut + oOffset;\n\n";
+						}
+						else
+						{
+							str += "lwbOutRe = gbOutRe + oOffset;\n\t";
+							str += "lwbOutIm = gbOutIm + oOffset;\n\n";
+						}
+					}
+				}
+
+				// Set rw and 'me' per transform
+				// rw string also contains 'b'
+				std::string rw, me;
+
+				if(r2c2r && !rcSimple)	rw = "rw, b, ";
+				else					rw = (numTrans > 1) ? "rw, b, " : "1, b, ";
+
+				if(numTrans > 1)	{ me += "me%"; me += SztToStr(workGroupSizePerTrans); me += ", "; }
+				else				{ me += "me, "; }
+
+				// Buffer strings
+				std::string inBuf, outBuf;
+				if(r2c2r)
+				{
+					if(rcSimple)
+					{
+						if(inInterleaved || inReal)		inBuf  = "lwbIn, ";
+						else							inBuf  = "lwbInRe, lwbInIm, ";
+						if(outInterleaved || outReal)	outBuf = "lwbOut";
+						else							outBuf = "lwbOutRe, lwbOutIm";
+					}
+					else
+					{
+						if(inInterleaved || inReal)		inBuf  = "lwbIn, lwbIn2, ";
+						else							inBuf  = "lwbInRe, lwbInRe2, lwbInIm, lwbInIm2, ";
+						if(outInterleaved || outReal)	outBuf = "lwbOut, lwbOut2";
+						else							outBuf = "lwbOutRe, lwbOutRe2, lwbOutIm, lwbOutIm2";
+					}
+				}
+				else
+				{
+					if(params.fft_placeness == CLFFT_INPLACE)
+					{
+						if(inInterleaved)	{ inBuf = "lwb, "; outBuf = "lwb"; }
+						else				{ inBuf = "lwbRe, lwbIm, "; outBuf = "lwbRe, lwbIm"; }
+					}
+					else
+					{
+						if(inInterleaved)	inBuf  = "lwbIn, ";
+						else				inBuf  = "lwbInRe, lwbInIm, ";
+						if(outInterleaved)	outBuf = "lwbOut";
+						else				outBuf = "lwbOutRe, lwbOutIm";
+					}
+				}
+
+				// Call passes
+				if(numPasses == 1)
+				{
+					str += "\t";
+					str += PassName(0, fwd);
+					str += "("; str += rw; str += me;
+					str += "0, 0, ";
+					str += inBuf; str += outBuf;
+					str += IterRegs("&");
+					str += ");\n";
+				}
+				else
+				{
+					for(typename std::vector<Pass<PR> >::const_iterator p = passes.begin(); p != passes.end(); p++)
+					{
+						str += "\t";
+						str += PassName(p->GetPosition(), fwd);
+						str += "(";
+
+						std::string ldsOff;
+						if(numTrans > 1)
+						{
+							ldsOff += "(me/"; ldsOff += SztToStr(workGroupSizePerTrans);
+							ldsOff += ")*"; ldsOff += SztToStr(length);
+						}
+						else
+						{
+							ldsOff += "0";
+						}
+
+						std::string ldsArgs;
+						if(halfLds) { ldsArgs += "lds, lds"; }
+						else		{ ldsArgs += "lds, lds + "; ldsArgs += SztToStr(length*numTrans); }
+
+						str += rw; str += me;
+						if(p == passes.begin()) // beginning pass
+						{
+							str += "0, ";
+							str += ldsOff;
+							str += ", ";
+							str += inBuf;
+							str += ldsArgs; str += IterRegs("&"); str += ");\n";
+							if(!halfLds) str += "\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+						}
+						else if((p+1) == passes.end()) // ending pass
+						{
+							str += ldsOff;
+							str += ", ";
+							str += "0, ";
+							str += ldsArgs; str += ", ";
+							str += outBuf;
+							str += IterRegs("&"); str += ");\n";
+						}
+						else // intermediate pass
+						{
+							str += ldsOff;
+							str += ", ";
+							str += ldsOff;
+							str += ", ";
+							str += ldsArgs; str += ", ";
+							str += ldsArgs; str += IterRegs("&"); str += ");\n";
+							if(!halfLds) str += "\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
+						}
+					}
+				}
+
+				str += "}\n\n";
+
+				if(r2c2r)
+					break;
+			}
+		}
+    };
+};
+
+using namespace StockhamGenerator;
+
+template<>
+clfftStatus FFTPlan::GetKernelGenKeyPvt<Stockham> (FFTKernelGenKeyParams & params) const
+{
+
+    //    Query the devices in this context for their local memory sizes
+    //    How we generate a kernel depends on the *minimum* LDS size for all devices.
+    //
+    const FFTEnvelope * pEnvelope = NULL;
+    OPENCL_V(const_cast<FFTPlan*>(this)->GetEnvelope (& pEnvelope), _T("GetEnvelope failed"));
+    BUG_CHECK (NULL != pEnvelope);
+
+    ::memset( &params, 0, sizeof( params ) );
+    params.fft_precision    = this->precision;
+    params.fft_placeness    = this->placeness;
+    params.fft_inputLayout  = this->inputLayout;
+	params.fft_MaxWorkGroupSize = this->envelope.limit_WorkGroupSize;
+
+    ARG_CHECK (this->inStride.size() == this->outStride.size())
+
+	bool real_transform = ((this->inputLayout == CLFFT_REAL) || (this->outputLayout == CLFFT_REAL));
+
+    if ( (CLFFT_INPLACE == this->placeness) && (!real_transform) ) {
+        //    If this is an in-place transform the
+        //    input and output layout, dimensions and strides
+        //    *MUST* be the same.
+        //
+        ARG_CHECK (this->inputLayout == this->outputLayout)
+        params.fft_outputLayout = this->inputLayout;
+        for (size_t u = this->inStride.size(); u-- > 0; ) {
+            ARG_CHECK (this->inStride[u] == this->outStride[u]);
+        }
+    } else {
+        params.fft_outputLayout = this->outputLayout;
+    }
+
+    switch (this->inStride.size()) {
+        //    1-D array is a 2-D data structure.
+        //    1-D unit is a special case of 1-D array.
+    case 1:
+        ARG_CHECK(this->length   .size() > 0);
+        ARG_CHECK(this->outStride.size() > 0);
+        params.fft_DataDim      = 2;
+        params.fft_N[0]         = this->length[0];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->oDist;
+        break;
+
+        //    2-D array is a 3-D data structure
+        //    2-D unit is a speical case of 2-D array.
+    case 2:
+        ARG_CHECK(this->length   .size() > 1);
+        ARG_CHECK(this->outStride.size() > 1);
+        params.fft_DataDim      = 3;
+        params.fft_N[0]         = this->length[0];
+        params.fft_N[1]         = this->length[1];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->inStride[1];
+        params.fft_inStride[2]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->outStride[1];
+        params.fft_outStride[2] = this->oDist;
+        break;
+
+        //    3-D array is a 4-D data structure
+        //    3-D unit is a special case of 3-D array.
+    case 3:
+        ARG_CHECK(this->length   .size() > 2);
+        ARG_CHECK(this->outStride.size() > 2);
+        params.fft_DataDim      = 4;
+        params.fft_N[0]         = this->length[0];
+        params.fft_N[1]         = this->length[1];
+        params.fft_N[2]         = this->length[2];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->inStride[1];
+        params.fft_inStride[2]  = this->inStride[2];
+        params.fft_inStride[3]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->outStride[1];
+        params.fft_outStride[2] = this->outStride[2];
+        params.fft_outStride[3] = this->oDist;
+        break;
+
+        //    5-D data structure
+        //    This can occur when a large dimension is split into two for
+        //    the "3-step" algorithm.
+        //
+    case 4:
+        ARG_CHECK(this->length   .size() > 3);
+        ARG_CHECK(this->outStride.size() > 3);
+        params.fft_DataDim      = 5;
+        params.fft_N[0]         = this->length[0];
+        params.fft_N[1]         = this->length[1];
+        params.fft_N[2]         = this->length[2];
+        params.fft_N[3]         = this->length[3];
+        params.fft_inStride[0]  = this->inStride[0];
+        params.fft_inStride[1]  = this->inStride[1];
+        params.fft_inStride[2]  = this->inStride[2];
+        params.fft_inStride[3]  = this->inStride[3];
+        params.fft_inStride[4]  = this->iDist;
+        params.fft_outStride[0] = this->outStride[0];
+        params.fft_outStride[1] = this->outStride[1];
+        params.fft_outStride[2] = this->outStride[2];
+        params.fft_outStride[3] = this->outStride[3];
+        params.fft_outStride[4] = this->oDist;
+        break;
+    default:
+        ARG_CHECK (false);
+    }
+
+    //    TODO:  we could simplify the address calculations in the kernel
+    //    when the input data is contiguous.
+    //    For example, a 3-D data structure with
+    //        lengths: [*, 64, *]
+    //        strides: [*, 1024, 65536]
+    //    could be reduced to a 2-D data structure.
+
+    params.fft_LdsComplex = this->bLdsComplex;
+
+	params.fft_RCsimple = this->RCsimple;
+
+	size_t wgs, nt;
+#ifdef PARMETERS_TO_BE_READ
+	ParamRead pr;
+	ReadParameterFile(pr);
+	wgs = pr.workGroupSize;
+	nt = pr.numTransformsPerWg;
+#else
+	size_t t_wgs, t_nt;
+	Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
+	switch(pr)
+	{
+	case P_SINGLE:
+		{
+			KernelCoreSpecs<P_SINGLE> kcs;
+			kcs.GetWGSAndNT(params.fft_N[0], t_wgs, t_nt);
+		} break;
+	case P_DOUBLE:
+		{
+			KernelCoreSpecs<P_DOUBLE> kcs;
+			kcs.GetWGSAndNT(params.fft_N[0], t_wgs, t_nt);
+		} break;
+	}
+
+	if((t_wgs != 0) && (t_nt != 0) && (this->envelope.limit_WorkGroupSize >= 256))
+	{
+		wgs = t_wgs;
+		nt = t_nt;
+	}
+	else
+		DetermineSizes(this->envelope.limit_WorkGroupSize, params.fft_N[0], wgs, nt);
+#endif
+
+	assert((nt * params.fft_N[0]) >= wgs);
+	assert((nt * params.fft_N[0])%wgs == 0);
+
+	params.fft_R = (nt * params.fft_N[0])/wgs;
+	params.fft_SIMD = wgs;
+
+
+    params.fft_MaxRadix     = params.fft_R;
+    params.fft_UseFMA       = true;
+
+    if (this->large1D != 0) {
+        ARG_CHECK (params.fft_N[0] != 0)
+        ARG_CHECK ((this->large1D % params.fft_N[0]) == 0)
+        params.fft_3StepTwiddle = true;
+        params.fft_N[1] = this->large1D / params.fft_N[0];
+    }
+
+    params.fft_fwdScale  = this->forwardScale;
+    params.fft_backScale = this->backwardScale;
+
+    return CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GetWorkSizesPvt<Stockham> (std::vector<size_t> & globalWS, std::vector<size_t> & localWS) const
+{
+    //    How many complex numbers in the input mutl-dimensional array?
+    //
+    unsigned long long count = 1;
+    for (unsigned u = 0; u < length.size(); ++u) {
+        count *= std::max<size_t> (1, this->length[ u ]);
+    }
+    count *= this->batchsize;
+
+
+    FFTKernelGenKeyParams fftParams;
+    //    Translate the user plan into the structure that we use to map plans to clPrograms
+    OPENCL_V( this->GetKernelGenKeyPvt<Stockham>( fftParams ), _T("GetKernelGenKey() failed!") );
+
+    count = DivRoundingUp<unsigned long long> (count, fftParams.fft_R);      // count of WorkItems
+    count = DivRoundingUp<unsigned long long> (count, fftParams.fft_SIMD);   // count of WorkGroups
+
+	// for real transforms we only need half the work groups since we do twice the work in 1 work group
+	if( !(fftParams.fft_RCsimple) && ((fftParams.fft_inputLayout == CLFFT_REAL) || (fftParams.fft_outputLayout == CLFFT_REAL)) )
+		count = DivRoundingUp<unsigned long long> (count, 2);
+
+    count = std::max<unsigned long long> (count, 1) * fftParams.fft_SIMD;
+        // .. count of WorkItems, rounded up to next multiple of fft_SIMD.
+
+	// 1 dimension work group size
+	globalWS.push_back( static_cast< size_t >( count ) );
+
+    localWS.push_back( fftParams.fft_SIMD );
+
+    return    CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GetMax1DLengthPvt<Stockham> (size_t * longest) const
+{
+	// TODO  The caller has already acquired the lock on *this
+	//	However, we shouldn't depend on it.
+
+	//	Query the devices in this context for their local memory sizes
+	//	How large a kernel we can generate depends on the *minimum* LDS
+	//	size for all devices.
+	//
+	const FFTEnvelope * pEnvelope = NULL;
+	OPENCL_V(this->GetEnvelope (& pEnvelope), _T("GetEnvelope failed"));
+	BUG_CHECK (NULL != pEnvelope);
+
+	ARG_CHECK (NULL != longest)
+	size_t LdsperElement = this->ElementSize();
+	size_t result = pEnvelope->limit_LocalMemSize /
+		(1 * LdsperElement);
+	result = FloorPo2 (result);
+	*longest = result;
+	return CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
+{
+    FFTKernelGenKeyParams params;
+    OPENCL_V( this->GetKernelGenKeyPvt<Stockham> (params), _T("GetKernelGenKey() failed!") );
+
+	std::string programCode;
+	Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
+	switch(pr)
+	{
+	case P_SINGLE:
+		{
+			Kernel<P_SINGLE> kernel(params);
+			kernel.GenerateKernel(programCode);
+		} break;
+	case P_DOUBLE:
+		{
+			Kernel<P_DOUBLE> kernel(params);
+			kernel.GenerateKernel(programCode);
+		} break;
+	}
+
+#ifdef KERNEL_INTERJECT
+	ReadKernelFromFile(programCode);
+#endif
+
+    OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
+    OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
+
+    return CLFFT_SUCCESS;
+}
diff --git a/src/library/generator.stockham.h b/src/library/generator.stockham.h
new file mode 100644
index 00000000..bba7d640
--- /dev/null
+++ b/src/library/generator.stockham.h
@@ -0,0 +1,1401 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( AMD_CLFFT_generator_stockham_H )
+#define AMD_CLFFT_generator_stockham_H
+#include "private.h"
+#include "repo.h"
+#include "plan.h"
+
+typedef union {
+	cl_float f;
+	cl_uint  u;
+	cl_int   i;
+} cb_t;
+
+namespace StockhamGenerator
+{
+	// Precision
+	enum Precision
+	{
+		P_SINGLE,
+		P_DOUBLE,
+	};
+
+	template <Precision PR>
+	inline size_t PrecisionWidth()
+	{
+		switch(PR)
+		{
+		case P_SINGLE:	return 1;
+		case P_DOUBLE:	return 2;
+		default:		assert(false); return 1;
+		}
+	}
+
+	template <Precision PR>
+	inline std::string ClPragma()
+	{
+		switch(PR)
+		{
+		case P_SINGLE:	return "";
+		case P_DOUBLE:	return	"\n#ifdef cl_khr_fp64\n"
+								"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+								"#else\n"
+								"#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
+								"#endif\n\n";
+		default:		assert(false); return "";
+		}
+	}
+
+	// Convert unsigned integers to string
+	inline std::string SztToStr(size_t i)
+	{
+		std::stringstream ss;
+		ss << i;
+		return ss.str();
+	}
+
+	inline std::string FloatToStr(double f)
+	{
+		std::stringstream ss;
+		ss.precision(16);
+		ss << std::scientific << f;
+		return ss.str();
+	}
+
+	//	Find the smallest power of 2 that is >= n; return its power of 2 factor
+	//	e.g., CeilPo2 (7) returns 3 : (2^3 >= 7)
+	inline size_t CeilPo2 (size_t n)
+	{
+		size_t v = 1, t = 0;
+		while(v < n)
+		{
+			v <<= 1;
+			t++;
+		}
+
+		return t;
+	}
+
+	inline size_t FloorPo2 (size_t n)
+	//	return the largest power of 2 that is <= n.
+	//	e.g., FloorPo2 (7) returns 4.
+	// *** TODO use x86 BSR instruction, using compiler intrinsics.
+	{
+		size_t tmp;
+		while (0 != (tmp = n & (n-1)))
+			n = tmp;
+		return n;
+	}
+
+	typedef std::pair<std::string,std::string> stringpair;
+	inline stringpair ComplexMul(const char *type, const char * a, const char * b, bool forward = true)
+	{
+		stringpair result;
+		result.first = "(";
+		result.first += type;
+		result.first += ") ((";
+		result.first += a;
+		result.first += ".x * ";
+		result.first += b;
+		result.first += (forward ? ".x - " : ".x + ");
+		result.first += a;
+		result.first += ".y * ";
+		result.first += b;
+		result.first += ".y),";
+		result.second = "(";
+		result.second += a;
+		result.second += ".y * ";
+		result.second += b;
+		result.second += (forward ? ".x + " : ".x - ");
+		result.second += a;
+		result.second += ".x * ";
+		result.second += b;
+		result.second += ".y))";
+		return result;
+	}
+
+	// Register data base types
+	template <Precision PR>
+	inline std::string RegBaseType(size_t count)
+	{
+		switch(PR)
+		{
+		case P_SINGLE:
+			switch(count)
+			{
+			case 1: return "float";
+			case 2: return "float2";
+			case 4: return "float4";
+			default: assert(false); return "";
+			}
+			break;
+		case P_DOUBLE:
+			switch(count)
+			{
+			case 1: return "double";
+			case 2: return "double2";
+			case 4: return "double4";
+			default: assert(false); return "";
+			}
+			break;
+		default:
+			assert(false); return "";
+		}
+	}
+
+	template <Precision PR>
+	inline std::string FloatSuffix()
+	{
+		// Suffix for constants
+		std::string sfx;
+		switch(PR)
+		{
+		case P_SINGLE: sfx = "f"; break;
+		case P_DOUBLE: sfx = "";  break;
+		default: assert(false);
+		}
+
+		return sfx;
+	}
+
+	inline std::string ButterflyName(size_t radix, size_t count, bool fwd)
+	{
+		std::string str;
+		if(fwd) str += "Fwd";
+		else	str += "Inv";
+		str += "Rad"; str += SztToStr(radix);
+		str += "B"; str += SztToStr(count);
+		return str;
+	}
+
+	inline std::string PassName(size_t pos, bool fwd)
+	{
+		std::string str;
+		if(fwd) str += "Fwd";
+		else	str += "Inv";
+		str += "Pass"; str += SztToStr(pos);
+		return str;
+	}
+
+	inline std::string TwTableName()
+	{
+		return "twiddles";
+	}
+
+	inline std::string TwTableLargeName()
+	{
+		return "twiddle_dee";
+	}
+
+	inline std::string TwTableLargeFunc()
+	{
+		return "TW3step";
+	}
+
+	// FFT butterfly
+    template <Precision PR>
+    class Butterfly
+    {
+		size_t radix;		// Base radix
+        size_t count;       // Number of basic butterflies, valid values: 1,2,4
+		bool fwd;			// FFT direction
+		bool cReg;			// registers are complex numbers, .x (real), .y(imag)
+
+		size_t BitReverse (size_t n, size_t N) const
+		{
+			return (N < 2) ? n : (BitReverse (n >> 1, N >> 1) | ((n & 1) != 0 ? (N >> 1) : 0));
+		}
+
+		void GenerateButterflyStr(std::string &bflyStr) const
+		{
+			std::string regType = cReg ? RegBaseType<PR>(2) : RegBaseType<PR>(count);
+
+			// Function attribute
+			bflyStr += "__attribute__((always_inline)) void \n";
+
+			// Function name
+			bflyStr += ButterflyName(radix, count, fwd);
+
+			// Function Arguments
+			bflyStr += "(";
+			for(size_t i=0;;i++)
+			{
+				if(cReg)
+				{
+					bflyStr += regType; bflyStr += " *R";
+					if(radix & (radix-1))	bflyStr += SztToStr(i);
+					else					bflyStr += SztToStr(BitReverse(i,radix));
+				}
+				else
+				{
+					bflyStr += regType; bflyStr += " *R"; bflyStr += SztToStr(i); bflyStr += ", ";	// real arguments
+					bflyStr += regType; bflyStr += " *I"; bflyStr += SztToStr(i);					// imaginary arguments
+				}
+
+				if(i == radix-1)
+				{
+					bflyStr += ")";
+					break;
+				}
+				else
+				{
+					bflyStr += ", ";
+				}
+			}
+
+			bflyStr += "\n{\n\n";
+
+
+			// Temporary variables
+			// Allocate temporary variables if we are not using complex registers (cReg = 0) or if cReg is true, then
+			// allocate temporary variables only for non power-of-2 radices
+			if( (radix & (radix-1)) || (!cReg) )
+			{
+				bflyStr += "\t";
+				if(cReg)
+					bflyStr += RegBaseType<PR>(1);
+				else
+					bflyStr += regType;
+
+				for(size_t i=0;;i++)
+				{
+					bflyStr += " TR"; bflyStr += SztToStr(i); bflyStr += ",";	// real arguments
+					bflyStr += " TI"; bflyStr += SztToStr(i);					// imaginary arguments
+
+					if(i == radix-1)
+					{
+						bflyStr += ";";
+						break;
+					}
+					else
+					{
+						bflyStr += ",";
+					}
+				}
+			}
+			else
+			{
+				bflyStr += "\t";
+				bflyStr += RegBaseType<PR>(2);
+				bflyStr += " T;";
+			}
+
+
+			bflyStr += "\n\n\t";
+
+			// Butterfly for different radices
+			switch(radix)
+			{
+			case 2:
+				{
+					if(cReg)
+					{
+						bflyStr +=
+						"(*R1) = (*R0) - (*R1);\n\t"
+						"(*R0) = 2.0f * (*R0) - (*R1);\n\t";
+					}
+					else
+					{
+						bflyStr +=
+						"TR0 = (*R0) + (*R1);\n\t"
+						"TI0 = (*I0) + (*I1);\n\t"
+						"TR1 = (*R0) - (*R1);\n\t"
+						"TI1 = (*I0) - (*I1);\n\t";
+					}
+
+				} break;
+			case 3:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R1).x + (*R2).x;\n\t"
+							"TR1 = ((*R0).x - C3QA*((*R1).x + (*R2).x)) + C3QB*((*R1).y - (*R2).y);\n\t"
+							"TR2 = ((*R0).x - C3QA*((*R1).x + (*R2).x)) - C3QB*((*R1).y - (*R2).y);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R1).y + (*R2).y;\n\t"
+							"TI1 = ((*R0).y - C3QA*((*R1).y + (*R2).y)) - C3QB*((*R1).x - (*R2).x);\n\t"
+							"TI2 = ((*R0).y - C3QA*((*R1).y + (*R2).y)) + C3QB*((*R1).x - (*R2).x);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R1 + *R2;\n\t"
+							"TR1 = (*R0 - C3QA*(*R1 + *R2)) + C3QB*(*I1 - *I2);\n\t"
+							"TR2 = (*R0 - C3QA*(*R1 + *R2)) - C3QB*(*I1 - *I2);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I1 + *I2;\n\t"
+							"TI1 = (*I0 - C3QA*(*I1 + *I2)) - C3QB*(*R1 - *R2);\n\t"
+							"TI2 = (*I0 - C3QA*(*I1 + *I2)) + C3QB*(*R1 - *R2);\n\t";
+						}
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R1).x + (*R2).x;\n\t"
+							"TR1 = ((*R0).x - C3QA*((*R1).x + (*R2).x)) - C3QB*((*R1).y - (*R2).y);\n\t"
+							"TR2 = ((*R0).x - C3QA*((*R1).x + (*R2).x)) + C3QB*((*R1).y - (*R2).y);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R1).y + (*R2).y;\n\t"
+							"TI1 = ((*R0).y - C3QA*((*R1).y + (*R2).y)) + C3QB*((*R1).x - (*R2).x);\n\t"
+							"TI2 = ((*R0).y - C3QA*((*R1).y + (*R2).y)) - C3QB*((*R1).x - (*R2).x);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R1 + *R2;\n\t"
+							"TR1 = (*R0 - C3QA*(*R1 + *R2)) - C3QB*(*I1 - *I2);\n\t"
+							"TR2 = (*R0 - C3QA*(*R1 + *R2)) + C3QB*(*I1 - *I2);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I1 + *I2;\n\t"
+							"TI1 = (*I0 - C3QA*(*I1 + *I2)) + C3QB*(*R1 - *R2);\n\t"
+							"TI2 = (*I0 - C3QA*(*I1 + *I2)) - C3QB*(*R1 - *R2);\n\t";
+						}
+					}
+				} break;
+			case 4:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"(*R1) = (*R0) - (*R1);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R1);\n\t"
+							"(*R3) = (*R2) - (*R3);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R3);\n\t"
+							"\n\t"
+							"(*R2) = (*R0) - (*R2);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R2);\n\t"
+							"(*R3) = (*R1) + (fvect2)(-(*R3).y, (*R3).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R3);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = (*R0) + (*R2) + (*R1) + (*R3);\n\t"
+							"TR1 = (*R0) - (*R2) + (*I1) - (*I3);\n\t"
+							"TR2 = (*R0) + (*R2) - (*R1) - (*R3);\n\t"
+							"TR3 = (*R0) - (*R2) - (*I1) + (*I3);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*I0) + (*I2) + (*I1) + (*I3);\n\t"
+							"TI1 = (*I0) - (*I2) - (*R1) + (*R3);\n\t"
+							"TI2 = (*I0) + (*I2) - (*I1) - (*I3);\n\t"
+							"TI3 = (*I0) - (*I2) + (*R1) - (*R3);\n\t";
+						}
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"(*R1) = (*R0) - (*R1);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R1);\n\t"
+							"(*R3) = (*R2) - (*R3);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R3);\n\t"
+							"\n\t"
+							"(*R2) = (*R0) - (*R2);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R2);\n\t"
+							"(*R3) = (*R1) + (fvect2)((*R3).y, -(*R3).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R3);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = (*R0) + (*R2) + (*R1) + (*R3);\n\t"
+							"TR1 = (*R0) - (*R2) - (*I1) + (*I3);\n\t"
+							"TR2 = (*R0) + (*R2) - (*R1) - (*R3);\n\t"
+							"TR3 = (*R0) - (*R2) + (*I1) - (*I3);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*I0) + (*I2) + (*I1) + (*I3);\n\t"
+							"TI1 = (*I0) - (*I2) + (*R1) - (*R3);\n\t"
+							"TI2 = (*I0) + (*I2) - (*I1) - (*I3);\n\t"
+							"TI3 = (*I0) - (*I2) - (*R1) + (*R3);\n\t";
+						}
+					}
+				} break;
+			case 5:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R1).x + (*R2).x + (*R3).x + (*R4).x;\n\t"
+							"TR1 = ((*R0).x - C5QC*((*R2).x + (*R3).x)) + C5QB*((*R1).y - (*R4).y) + C5QD*((*R2).y - (*R3).y) + C5QA*(((*R1).x - (*R2).x) + ((*R4).x - (*R3).x));\n\t"
+							"TR4 = ((*R0).x - C5QC*((*R2).x + (*R3).x)) - C5QB*((*R1).y - (*R4).y) - C5QD*((*R2).y - (*R3).y) + C5QA*(((*R1).x - (*R2).x) + ((*R4).x - (*R3).x));\n\t"
+							"TR2 = ((*R0).x - C5QC*((*R1).x + (*R4).x)) - C5QB*((*R2).y - (*R3).y) + C5QD*((*R1).y - (*R4).y) + C5QA*(((*R2).x - (*R1).x) + ((*R3).x - (*R4).x));\n\t"
+							"TR3 = ((*R0).x - C5QC*((*R1).x + (*R4).x)) + C5QB*((*R2).y - (*R3).y) - C5QD*((*R1).y - (*R4).y) + C5QA*(((*R2).x - (*R1).x) + ((*R3).x - (*R4).x));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R1).y + (*R2).y + (*R3).y + (*R4).y;\n\t"
+							"TI1 = ((*R0).y - C5QC*((*R2).y + (*R3).y)) - C5QB*((*R1).x - (*R4).x) - C5QD*((*R2).x - (*R3).x) + C5QA*(((*R1).y - (*R2).y) + ((*R4).y - (*R3).y));\n\t"
+							"TI4 = ((*R0).y - C5QC*((*R2).y + (*R3).y)) + C5QB*((*R1).x - (*R4).x) + C5QD*((*R2).x - (*R3).x) + C5QA*(((*R1).y - (*R2).y) + ((*R4).y - (*R3).y));\n\t"
+							"TI2 = ((*R0).y - C5QC*((*R1).y + (*R4).y)) + C5QB*((*R2).x - (*R3).x) - C5QD*((*R1).x - (*R4).x) + C5QA*(((*R2).y - (*R1).y) + ((*R3).y - (*R4).y));\n\t"
+							"TI3 = ((*R0).y - C5QC*((*R1).y + (*R4).y)) - C5QB*((*R2).x - (*R3).x) + C5QD*((*R1).x - (*R4).x) + C5QA*(((*R2).y - (*R1).y) + ((*R3).y - (*R4).y));\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R1 + *R2 + *R3 + *R4;\n\t"
+							"TR1 = (*R0 - C5QC*(*R2 + *R3)) + C5QB*(*I1 - *I4) + C5QD*(*I2 - *I3) + C5QA*((*R1 - *R2) + (*R4 - *R3));\n\t"
+							"TR4 = (*R0 - C5QC*(*R2 + *R3)) - C5QB*(*I1 - *I4) - C5QD*(*I2 - *I3) + C5QA*((*R1 - *R2) + (*R4 - *R3));\n\t"
+							"TR2 = (*R0 - C5QC*(*R1 + *R4)) - C5QB*(*I2 - *I3) + C5QD*(*I1 - *I4) + C5QA*((*R2 - *R1) + (*R3 - *R4));\n\t"
+							"TR3 = (*R0 - C5QC*(*R1 + *R4)) + C5QB*(*I2 - *I3) - C5QD*(*I1 - *I4) + C5QA*((*R2 - *R1) + (*R3 - *R4));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I1 + *I2 + *I3 + *I4;\n\t"
+							"TI1 = (*I0 - C5QC*(*I2 + *I3)) - C5QB*(*R1 - *R4) - C5QD*(*R2 - *R3) + C5QA*((*I1 - *I2) + (*I4 - *I3));\n\t"
+							"TI4 = (*I0 - C5QC*(*I2 + *I3)) + C5QB*(*R1 - *R4) + C5QD*(*R2 - *R3) + C5QA*((*I1 - *I2) + (*I4 - *I3));\n\t"
+							"TI2 = (*I0 - C5QC*(*I1 + *I4)) + C5QB*(*R2 - *R3) - C5QD*(*R1 - *R4) + C5QA*((*I2 - *I1) + (*I3 - *I4));\n\t"
+							"TI3 = (*I0 - C5QC*(*I1 + *I4)) - C5QB*(*R2 - *R3) + C5QD*(*R1 - *R4) + C5QA*((*I2 - *I1) + (*I3 - *I4));\n\t";
+						}
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R1).x + (*R2).x + (*R3).x + (*R4).x;\n\t"
+							"TR1 = ((*R0).x - C5QC*((*R2).x + (*R3).x)) - C5QB*((*R1).y - (*R4).y) - C5QD*((*R2).y - (*R3).y) + C5QA*(((*R1).x - (*R2).x) + ((*R4).x - (*R3).x));\n\t"
+							"TR4 = ((*R0).x - C5QC*((*R2).x + (*R3).x)) + C5QB*((*R1).y - (*R4).y) + C5QD*((*R2).y - (*R3).y) + C5QA*(((*R1).x - (*R2).x) + ((*R4).x - (*R3).x));\n\t"
+							"TR2 = ((*R0).x - C5QC*((*R1).x + (*R4).x)) + C5QB*((*R2).y - (*R3).y) - C5QD*((*R1).y - (*R4).y) + C5QA*(((*R2).x - (*R1).x) + ((*R3).x - (*R4).x));\n\t"
+							"TR3 = ((*R0).x - C5QC*((*R1).x + (*R4).x)) - C5QB*((*R2).y - (*R3).y) + C5QD*((*R1).y - (*R4).y) + C5QA*(((*R2).x - (*R1).x) + ((*R3).x - (*R4).x));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R1).y + (*R2).y + (*R3).y + (*R4).y;\n\t"
+							"TI1 = ((*R0).y - C5QC*((*R2).y + (*R3).y)) + C5QB*((*R1).x - (*R4).x) + C5QD*((*R2).x - (*R3).x) + C5QA*(((*R1).y - (*R2).y) + ((*R4).y - (*R3).y));\n\t"
+							"TI4 = ((*R0).y - C5QC*((*R2).y + (*R3).y)) - C5QB*((*R1).x - (*R4).x) - C5QD*((*R2).x - (*R3).x) + C5QA*(((*R1).y - (*R2).y) + ((*R4).y - (*R3).y));\n\t"
+							"TI2 = ((*R0).y - C5QC*((*R1).y + (*R4).y)) - C5QB*((*R2).x - (*R3).x) + C5QD*((*R1).x - (*R4).x) + C5QA*(((*R2).y - (*R1).y) + ((*R3).y - (*R4).y));\n\t"
+							"TI3 = ((*R0).y - C5QC*((*R1).y + (*R4).y)) + C5QB*((*R2).x - (*R3).x) - C5QD*((*R1).x - (*R4).x) + C5QA*(((*R2).y - (*R1).y) + ((*R3).y - (*R4).y));\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R1 + *R2 + *R3 + *R4;\n\t"
+							"TR1 = (*R0 - C5QC*(*R2 + *R3)) - C5QB*(*I1 - *I4) - C5QD*(*I2 - *I3) + C5QA*((*R1 - *R2) + (*R4 - *R3));\n\t"
+							"TR4 = (*R0 - C5QC*(*R2 + *R3)) + C5QB*(*I1 - *I4) + C5QD*(*I2 - *I3) + C5QA*((*R1 - *R2) + (*R4 - *R3));\n\t"
+							"TR2 = (*R0 - C5QC*(*R1 + *R4)) + C5QB*(*I2 - *I3) - C5QD*(*I1 - *I4) + C5QA*((*R2 - *R1) + (*R3 - *R4));\n\t"
+							"TR3 = (*R0 - C5QC*(*R1 + *R4)) - C5QB*(*I2 - *I3) + C5QD*(*I1 - *I4) + C5QA*((*R2 - *R1) + (*R3 - *R4));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I1 + *I2 + *I3 + *I4;\n\t"
+							"TI1 = (*I0 - C5QC*(*I2 + *I3)) + C5QB*(*R1 - *R4) + C5QD*(*R2 - *R3) + C5QA*((*I1 - *I2) + (*I4 - *I3));\n\t"
+							"TI4 = (*I0 - C5QC*(*I2 + *I3)) - C5QB*(*R1 - *R4) - C5QD*(*R2 - *R3) + C5QA*((*I1 - *I2) + (*I4 - *I3));\n\t"
+							"TI2 = (*I0 - C5QC*(*I1 + *I4)) - C5QB*(*R2 - *R3) + C5QD*(*R1 - *R4) + C5QA*((*I2 - *I1) + (*I3 - *I4));\n\t"
+							"TI3 = (*I0 - C5QC*(*I1 + *I4)) + C5QB*(*R2 - *R3) - C5QD*(*R1 - *R4) + C5QA*((*I2 - *I1) + (*I3 - *I4));\n\t";
+						}
+					}
+				} break;
+			case 6:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R2).x + (*R4).x;\n\t"
+							"TR2 = ((*R0).x - C3QA*((*R2).x + (*R4).x)) + C3QB*((*R2).y - (*R4).y);\n\t"
+							"TR4 = ((*R0).x - C3QA*((*R2).x + (*R4).x)) - C3QB*((*R2).y - (*R4).y);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R2).y + (*R4).y;\n\t"
+							"TI2 = ((*R0).y - C3QA*((*R2).y + (*R4).y)) - C3QB*((*R2).x - (*R4).x);\n\t"
+							"TI4 = ((*R0).y - C3QA*((*R2).y + (*R4).y)) + C3QB*((*R2).x - (*R4).x);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = (*R1).x + (*R3).x + (*R5).x;\n\t"
+							"TR3 = ((*R1).x - C3QA*((*R3).x + (*R5).x)) + C3QB*((*R3).y - (*R5).y);\n\t"
+							"TR5 = ((*R1).x - C3QA*((*R3).x + (*R5).x)) - C3QB*((*R3).y - (*R5).y);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = (*R1).y + (*R3).y + (*R5).y;\n\t"
+							"TI3 = ((*R1).y - C3QA*((*R3).y + (*R5).y)) - C3QB*((*R3).x - (*R5).x);\n\t"
+							"TI5 = ((*R1).y - C3QA*((*R3).y + (*R5).y)) + C3QB*((*R3).x - (*R5).x);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).x = TR0 + TR1;\n\t"
+							"(*R1).x = TR2 + ( C3QA*TR3 + C3QB*TI3);\n\t"
+							"(*R2).x = TR4 + (-C3QA*TR5 + C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).y = TI0 + TI1;\n\t"
+							"(*R1).y = TI2 + (-C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*R2).y = TI4 + (-C3QB*TR5 - C3QA*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R3).x = TR0 - TR1;\n\t"
+							"(*R4).x = TR2 - ( C3QA*TR3 + C3QB*TI3);\n\t"
+							"(*R5).x = TR4 - (-C3QA*TR5 + C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R3).y = TI0 - TI1;\n\t"
+							"(*R4).y = TI2 - (-C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*R5).y = TI4 - (-C3QB*TR5 - C3QA*TI5);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R2 + *R4;\n\t"
+							"TR2 = (*R0 - C3QA*(*R2 + *R4)) + C3QB*(*I2 - *I4);\n\t"
+							"TR4 = (*R0 - C3QA*(*R2 + *R4)) - C3QB*(*I2 - *I4);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I2 + *I4;\n\t"
+							"TI2 = (*I0 - C3QA*(*I2 + *I4)) - C3QB*(*R2 - *R4);\n\t"
+							"TI4 = (*I0 - C3QA*(*I2 + *I4)) + C3QB*(*R2 - *R4);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = *R1 + *R3 + *R5;\n\t"
+							"TR3 = (*R1 - C3QA*(*R3 + *R5)) + C3QB*(*I3 - *I5);\n\t"
+							"TR5 = (*R1 - C3QA*(*R3 + *R5)) - C3QB*(*I3 - *I5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = *I1 + *I3 + *I5;\n\t"
+							"TI3 = (*I1 - C3QA*(*I3 + *I5)) - C3QB*(*R3 - *R5);\n\t"
+							"TI5 = (*I1 - C3QA*(*I3 + *I5)) + C3QB*(*R3 - *R5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0) = TR0 + TR1;\n\t"
+							"(*R1) = TR2 + ( C3QA*TR3 + C3QB*TI3);\n\t"
+							"(*R2) = TR4 + (-C3QA*TR5 + C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I0) = TI0 + TI1;\n\t"
+							"(*I1) = TI2 + (-C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*I2) = TI4 + (-C3QB*TR5 - C3QA*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R3) = TR0 - TR1;\n\t"
+							"(*R4) = TR2 - ( C3QA*TR3 + C3QB*TI3);\n\t"
+							"(*R5) = TR4 - (-C3QA*TR5 + C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I3) = TI0 - TI1;\n\t"
+							"(*I4) = TI2 - (-C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*I5) = TI4 - (-C3QB*TR5 - C3QA*TI5);\n\t";
+						}
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R2).x + (*R4).x;\n\t"
+							"TR2 = ((*R0).x - C3QA*((*R2).x + (*R4).x)) - C3QB*((*R2).y - (*R4).y);\n\t"
+							"TR4 = ((*R0).x - C3QA*((*R2).x + (*R4).x)) + C3QB*((*R2).y - (*R4).y);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R2).y + (*R4).y;\n\t"
+							"TI2 = ((*R0).y - C3QA*((*R2).y + (*R4).y)) + C3QB*((*R2).x - (*R4).x);\n\t"
+							"TI4 = ((*R0).y - C3QA*((*R2).y + (*R4).y)) - C3QB*((*R2).x - (*R4).x);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = (*R1).x + (*R3).x + (*R5).x;\n\t"
+							"TR3 = ((*R1).x - C3QA*((*R3).x + (*R5).x)) - C3QB*((*R3).y - (*R5).y);\n\t"
+							"TR5 = ((*R1).x - C3QA*((*R3).x + (*R5).x)) + C3QB*((*R3).y - (*R5).y);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = (*R1).y + (*R3).y + (*R5).y;\n\t"
+							"TI3 = ((*R1).y - C3QA*((*R3).y + (*R5).y)) + C3QB*((*R3).x - (*R5).x);\n\t"
+							"TI5 = ((*R1).y - C3QA*((*R3).y + (*R5).y)) - C3QB*((*R3).x - (*R5).x);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).x = TR0 + TR1;\n\t"
+							"(*R1).x = TR2 + ( C3QA*TR3 - C3QB*TI3);\n\t"
+							"(*R2).x = TR4 + (-C3QA*TR5 - C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).y = TI0 + TI1;\n\t"
+							"(*R1).y = TI2 + ( C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*R2).y = TI4 + ( C3QB*TR5 - C3QA*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R3).x = TR0 - TR1;\n\t"
+							"(*R4).x = TR2 - ( C3QA*TR3 - C3QB*TI3);\n\t"
+							"(*R5).x = TR4 - (-C3QA*TR5 - C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R3).y = TI0 - TI1;\n\t"
+							"(*R4).y = TI2 - ( C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*R5).y = TI4 - ( C3QB*TR5 - C3QA*TI5);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R2 + *R4;\n\t"
+							"TR2 = (*R0 - C3QA*(*R2 + *R4)) - C3QB*(*I2 - *I4);\n\t"
+							"TR4 = (*R0 - C3QA*(*R2 + *R4)) + C3QB*(*I2 - *I4);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I2 + *I4;\n\t"
+							"TI2 = (*I0 - C3QA*(*I2 + *I4)) + C3QB*(*R2 - *R4);\n\t"
+							"TI4 = (*I0 - C3QA*(*I2 + *I4)) - C3QB*(*R2 - *R4);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = *R1 + *R3 + *R5;\n\t"
+							"TR3 = (*R1 - C3QA*(*R3 + *R5)) - C3QB*(*I3 - *I5);\n\t"
+							"TR5 = (*R1 - C3QA*(*R3 + *R5)) + C3QB*(*I3 - *I5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = *I1 + *I3 + *I5;\n\t"
+							"TI3 = (*I1 - C3QA*(*I3 + *I5)) + C3QB*(*R3 - *R5);\n\t"
+							"TI5 = (*I1 - C3QA*(*I3 + *I5)) - C3QB*(*R3 - *R5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0) = TR0 + TR1;\n\t"
+							"(*R1) = TR2 + ( C3QA*TR3 - C3QB*TI3);\n\t"
+							"(*R2) = TR4 + (-C3QA*TR5 - C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I0) = TI0 + TI1;\n\t"
+							"(*I1) = TI2 + ( C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*I2) = TI4 + ( C3QB*TR5 - C3QA*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R3) = TR0 - TR1;\n\t"
+							"(*R4) = TR2 - ( C3QA*TR3 - C3QB*TI3);\n\t"
+							"(*R5) = TR4 - (-C3QA*TR5 - C3QB*TI5);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I3) = TI0 - TI1;\n\t"
+							"(*I4) = TI2 - ( C3QB*TR3 + C3QA*TI3);\n\t"
+							"(*I5) = TI4 - ( C3QB*TR5 - C3QA*TI5);\n\t";
+						}
+					}
+				} break;
+			case 8:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"(*R1) = (*R0) - (*R1);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R1);\n\t"
+							"(*R3) = (*R2) - (*R3);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R3);\n\t"
+							"(*R5) = (*R4) - (*R5);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R5);\n\t"
+							"(*R7) = (*R6) - (*R7);\n\t"
+							"(*R6) = 2.0f * (*R6) - (*R7);\n\t"
+							"\n\t"
+							"(*R2) = (*R0) - (*R2);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R2);\n\t"
+							"(*R3) = (*R1) + (fvect2)(-(*R3).y, (*R3).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R3);\n\t"
+							"(*R6) = (*R4) - (*R6);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R6);\n\t"
+							"(*R7) = (*R5) + (fvect2)(-(*R7).y, (*R7).x);\n\t"
+							"(*R5) = 2.0f * (*R5) - (*R7);\n\t"
+							"\n\t"
+							"(*R4) = (*R0) - (*R4);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R4);\n\t"
+							"(*R5) = ((*R1) - C8Q * (*R5)) - C8Q * (fvect2)((*R5).y, -(*R5).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R5);\n\t"
+							"(*R6) = (*R2) + (fvect2)(-(*R6).y, (*R6).x);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R6);\n\t"
+							"(*R7) = ((*R3) + C8Q * (*R7)) - C8Q * (fvect2)((*R7).y, -(*R7).x);\n\t"
+							"(*R3) = 2.0f * (*R3) - (*R7);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = (*R0) + (*R4) + (*R2) + (*R6) +     (*R1)             +     (*R3)             +     (*R5)             +     (*R7)            ;\n\t"
+							"TR1 = (*R0) - (*R4) + (*I2) - (*I6) + C8Q*(*R1) + C8Q*(*I1) - C8Q*(*R3) + C8Q*(*I3) - C8Q*(*R5) - C8Q*(*I5) + C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TR2 = (*R0) + (*R4) - (*R2) - (*R6)             +     (*I1)             -     (*I3)             +     (*I5)             -     (*I7);\n\t"
+							"TR3 = (*R0) - (*R4) - (*I2) + (*I6) - C8Q*(*R1) + C8Q*(*I1) + C8Q*(*R3) + C8Q*(*I3) + C8Q*(*R5) - C8Q*(*I5) - C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TR4 = (*R0) + (*R4) + (*R2) + (*R6) -     (*R1)             -     (*R3)             -     (*R5)             -     (*R7)            ;\n\t"
+							"TR5 = (*R0) - (*R4) + (*I2) - (*I6) - C8Q*(*R1) - C8Q*(*I1) + C8Q*(*R3) - C8Q*(*I3) + C8Q*(*R5) + C8Q*(*I5) - C8Q*(*R7) + C8Q*(*I7);\n\t"
+							"TR6 = (*R0) + (*R4) - (*R2) - (*R6)             -    (*I1)              +     (*I3)             -     (*I5)             +     (*I7);\n\t"
+							"TR7 = (*R0) - (*R4) - (*I2) + (*I6) + C8Q*(*R1) - C8Q*(*I1) - C8Q*(*R3) - C8Q*(*I3) - C8Q*(*R5) + C8Q*(*I5) + C8Q*(*R7) + C8Q*(*I7);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*I0) + (*I4) + (*I2) + (*I6)             +     (*I1)             +     (*I3)             +     (*I5)             +     (*I7);\n\t"
+							"TI1 = (*I0) - (*I4) - (*R2) + (*R6) - C8Q*(*R1) + C8Q*(*I1) - C8Q*(*R3) - C8Q*(*I3) + C8Q*(*R5) - C8Q*(*I5) + C8Q*(*R7) + C8Q*(*I7);\n\t"
+							"TI2 = (*I0) + (*I4) - (*I2) - (*I6) -     (*R1)             +     (*R3)             -     (*R5)             +     (*R7)            ;\n\t"
+							"TI3 = (*I0) - (*I4) + (*R2) - (*R6) - C8Q*(*R1) - C8Q*(*I1) - C8Q*(*R3) + C8Q*(*I3) + C8Q*(*R5) + C8Q*(*I5) + C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TI4 = (*I0) + (*I4) + (*I2) + (*I6)             -    (*I1)              -     (*I3)             -     (*I5)             -     (*I7);\n\t"
+							"TI5 = (*I0) - (*I4) - (*R2) + (*R6) + C8Q*(*R1) - C8Q*(*I1) + C8Q*(*R3) + C8Q*(*I3) - C8Q*(*R5) + C8Q*(*I5) - C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TI6 = (*I0) + (*I4) - (*I2) - (*I6) +     (*R1)             -     (*R3)             +     (*R5)             -     (*R7)            ;\n\t"
+							"TI7 = (*I0) - (*I4) + (*R2) - (*R6) + C8Q*(*R1) + C8Q*(*I1) + C8Q*(*R3) - C8Q*(*I3) - C8Q*(*R5) - C8Q*(*I5) - C8Q*(*R7) + C8Q*(*I7);\n\t";
+						}
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"(*R1) = (*R0) - (*R1);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R1);\n\t"
+							"(*R3) = (*R2) - (*R3);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R3);\n\t"
+							"(*R5) = (*R4) - (*R5);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R5);\n\t"
+							"(*R7) = (*R6) - (*R7);\n\t"
+							"(*R6) = 2.0f * (*R6) - (*R7);\n\t"
+							"\n\t"
+							"(*R2) = (*R0) - (*R2);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R2);\n\t"
+							"(*R3) = (*R1) + (fvect2)((*R3).y, -(*R3).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R3);\n\t"
+							"(*R6) = (*R4) - (*R6);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R6);\n\t"
+							"(*R7) = (*R5) + (fvect2)((*R7).y, -(*R7).x);\n\t"
+							"(*R5) = 2.0f * (*R5) - (*R7);\n\t"
+							"\n\t"
+							"(*R4) = (*R0) - (*R4);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R4);\n\t"
+							"(*R5) = ((*R1) - C8Q * (*R5)) + C8Q * (fvect2)((*R5).y, -(*R5).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R5);\n\t"
+							"(*R6) = (*R2) + (fvect2)((*R6).y, -(*R6).x);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R6);\n\t"
+							"(*R7) = ((*R3) + C8Q * (*R7)) + C8Q * (fvect2)((*R7).y, -(*R7).x);\n\t"
+							"(*R3) = 2.0f * (*R3) - (*R7);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = (*R0) + (*R4) + (*R2) + (*R6) +     (*R1)             +     (*R3)             +     (*R5)             +     (*R7)            ;\n\t"
+							"TR1 = (*R0) - (*R4) - (*I2) + (*I6) + C8Q*(*R1) - C8Q*(*I1) - C8Q*(*R3) - C8Q*(*I3) - C8Q*(*R5) + C8Q*(*I5) + C8Q*(*R7) + C8Q*(*I7);\n\t"
+							"TR2 = (*R0) + (*R4) - (*R2) - (*R6)             -     (*I1)             +     (*I3)             -     (*I5)             +     (*I7);\n\t"
+							"TR3 = (*R0) - (*R4) + (*I2) - (*I6) - C8Q*(*R1) - C8Q*(*I1) + C8Q*(*R3) - C8Q*(*I3) + C8Q*(*R5) + C8Q*(*I5) - C8Q*(*R7) + C8Q*(*I7);\n\t"
+							"TR4 = (*R0) + (*R4) + (*R2) + (*R6) -     (*R1)             -    (*R3)              -     (*R5)             -     (*R7)            ;\n\t"
+							"TR5 = (*R0) - (*R4) - (*I2) + (*I6) - C8Q*(*R1) + C8Q*(*I1) + C8Q*(*R3) + C8Q*(*I3) + C8Q*(*R5) - C8Q*(*I5) - C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TR6 = (*R0) + (*R4) - (*R2) - (*R6)             +     (*I1)             -     (*I3)             +     (*I5)             -     (*I7);\n\t"
+							"TR7 = (*R0) - (*R4) + (*I2) - (*I6) + C8Q*(*R1) + C8Q*(*I1) - C8Q*(*R3) + C8Q*(*I3) - C8Q*(*R5) - C8Q*(*I5) + C8Q*(*R7) - C8Q*(*I7);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*I0) + (*I4) + (*I2) + (*I6)             +     (*I1)             +    (*I3)              +     (*I5)             +     (*I7);\n\t"
+							"TI1 = (*I0) - (*I4) + (*R2) - (*R6) + C8Q*(*R1) + C8Q*(*I1) + C8Q*(*R3) - C8Q*(*I3) - C8Q*(*R5) - C8Q*(*I5) - C8Q*(*R7) + C8Q*(*I7);\n\t"
+							"TI2 = (*I0) + (*I4) - (*I2) - (*I6) +     (*R1)             -     (*R3)             +     (*R5)             -     (*R7)            ;\n\t"
+							"TI3 = (*I0) - (*I4) - (*R2) + (*R6) + C8Q*(*R1) - C8Q*(*I1) + C8Q*(*R3) + C8Q*(*I3) - C8Q*(*R5) + C8Q*(*I5) - C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TI4 = (*I0) + (*I4) + (*I2) + (*I6)             -     (*I1)             -     (*I3)             -     (*I5)             -     (*I7);\n\t"
+							"TI5 = (*I0) - (*I4) + (*R2) - (*R6) - C8Q*(*R1) - C8Q*(*I1) - C8Q*(*R3) + C8Q*(*I3) + C8Q*(*R5) + C8Q*(*I5) + C8Q*(*R7) - C8Q*(*I7);\n\t"
+							"TI6 = (*I0) + (*I4) - (*I2) - (*I6) -     (*R1)             +     (*R3)             -     (*R5)             +     (*R7)            ;\n\t"
+							"TI7 = (*I0) - (*I4) - (*R2) + (*R6) - C8Q*(*R1) + C8Q*(*I1) - C8Q*(*R3) - C8Q*(*I3) + C8Q*(*R5) - C8Q*(*I5) + C8Q*(*R7) + C8Q*(*I7);\n\t";
+						}
+					}
+				} break;
+			case 10:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R2).x + (*R4).x + (*R6).x + (*R8).x;\n\t"
+							"TR2 = ((*R0).x - C5QC*((*R4).x + (*R6).x)) + C5QB*((*R2).y - (*R8).y) + C5QD*((*R4).y - (*R6).y) + C5QA*(((*R2).x - (*R4).x) + ((*R8).x - (*R6).x));\n\t"
+							"TR8 = ((*R0).x - C5QC*((*R4).x + (*R6).x)) - C5QB*((*R2).y - (*R8).y) - C5QD*((*R4).y - (*R6).y) + C5QA*(((*R2).x - (*R4).x) + ((*R8).x - (*R6).x));\n\t"
+							"TR4 = ((*R0).x - C5QC*((*R2).x + (*R8).x)) - C5QB*((*R4).y - (*R6).y) + C5QD*((*R2).y - (*R8).y) + C5QA*(((*R4).x - (*R2).x) + ((*R6).x - (*R8).x));\n\t"
+							"TR6 = ((*R0).x - C5QC*((*R2).x + (*R8).x)) + C5QB*((*R4).y - (*R6).y) - C5QD*((*R2).y - (*R8).y) + C5QA*(((*R4).x - (*R2).x) + ((*R6).x - (*R8).x));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R2).y + (*R4).y + (*R6).y + (*R8).y;\n\t"
+							"TI2 = ((*R0).y - C5QC*((*R4).y + (*R6).y)) - C5QB*((*R2).x - (*R8).x) - C5QD*((*R4).x - (*R6).x) + C5QA*(((*R2).y - (*R4).y) + ((*R8).y - (*R6).y));\n\t"
+							"TI8 = ((*R0).y - C5QC*((*R4).y + (*R6).y)) + C5QB*((*R2).x - (*R8).x) + C5QD*((*R4).x - (*R6).x) + C5QA*(((*R2).y - (*R4).y) + ((*R8).y - (*R6).y));\n\t"
+							"TI4 = ((*R0).y - C5QC*((*R2).y + (*R8).y)) + C5QB*((*R4).x - (*R6).x) - C5QD*((*R2).x - (*R8).x) + C5QA*(((*R4).y - (*R2).y) + ((*R6).y - (*R8).y));\n\t"
+							"TI6 = ((*R0).y - C5QC*((*R2).y + (*R8).y)) - C5QB*((*R4).x - (*R6).x) + C5QD*((*R2).x - (*R8).x) + C5QA*(((*R4).y - (*R2).y) + ((*R6).y - (*R8).y));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = (*R1).x + (*R3).x + (*R5).x + (*R7).x + (*R9).x;\n\t"
+							"TR3 = ((*R1).x - C5QC*((*R5).x + (*R7).x)) + C5QB*((*R3).y - (*R9).y) + C5QD*((*R5).y - (*R7).y) + C5QA*(((*R3).x - (*R5).x) + ((*R9).x - (*R7).x));\n\t"
+							"TR9 = ((*R1).x - C5QC*((*R5).x + (*R7).x)) - C5QB*((*R3).y - (*R9).y) - C5QD*((*R5).y - (*R7).y) + C5QA*(((*R3).x - (*R5).x) + ((*R9).x - (*R7).x));\n\t"
+							"TR5 = ((*R1).x - C5QC*((*R3).x + (*R9).x)) - C5QB*((*R5).y - (*R7).y) + C5QD*((*R3).y - (*R9).y) + C5QA*(((*R5).x - (*R3).x) + ((*R7).x - (*R9).x));\n\t"
+							"TR7 = ((*R1).x - C5QC*((*R3).x + (*R9).x)) + C5QB*((*R5).y - (*R7).y) - C5QD*((*R3).y - (*R9).y) + C5QA*(((*R5).x - (*R3).x) + ((*R7).x - (*R9).x));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = (*R1).y + (*R3).y + (*R5).y + (*R7).y + (*R9).y;\n\t"
+							"TI3 = ((*R1).y - C5QC*((*R5).y + (*R7).y)) - C5QB*((*R3).x - (*R9).x) - C5QD*((*R5).x - (*R7).x) + C5QA*(((*R3).y - (*R5).y) + ((*R9).y - (*R7).y));\n\t"
+							"TI9 = ((*R1).y - C5QC*((*R5).y + (*R7).y)) + C5QB*((*R3).x - (*R9).x) + C5QD*((*R5).x - (*R7).x) + C5QA*(((*R3).y - (*R5).y) + ((*R9).y - (*R7).y));\n\t"
+							"TI5 = ((*R1).y - C5QC*((*R3).y + (*R9).y)) + C5QB*((*R5).x - (*R7).x) - C5QD*((*R3).x - (*R9).x) + C5QA*(((*R5).y - (*R3).y) + ((*R7).y - (*R9).y));\n\t"
+							"TI7 = ((*R1).y - C5QC*((*R3).y + (*R9).y)) - C5QB*((*R5).x - (*R7).x) + C5QD*((*R3).x - (*R9).x) + C5QA*(((*R5).y - (*R3).y) + ((*R7).y - (*R9).y));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).x = TR0 + TR1;\n\t"
+							"(*R1).x = TR2 + ( C5QE*TR3 + C5QD*TI3);\n\t"
+							"(*R2).x = TR4 + ( C5QA*TR5 + C5QB*TI5);\n\t"
+							"(*R3).x = TR6 + (-C5QA*TR7 + C5QB*TI7);\n\t"
+							"(*R4).x = TR8 + (-C5QE*TR9 + C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).y = TI0 + TI1;\n\t"
+							"(*R1).y = TI2 + (-C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*R2).y = TI4 + (-C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*R3).y = TI6 + (-C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*R4).y = TI8 + (-C5QD*TR9 - C5QE*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R5).x = TR0 - TR1;\n\t"
+							"(*R6).x = TR2 - ( C5QE*TR3 + C5QD*TI3);\n\t"
+							"(*R7).x = TR4 - ( C5QA*TR5 + C5QB*TI5);\n\t"
+							"(*R8).x = TR6 - (-C5QA*TR7 + C5QB*TI7);\n\t"
+							"(*R9).x = TR8 - (-C5QE*TR9 + C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R5).y = TI0 - TI1;\n\t"
+							"(*R6).y = TI2 - (-C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*R7).y = TI4 - (-C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*R8).y = TI6 - (-C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*R9).y = TI8 - (-C5QD*TR9 - C5QE*TI9);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R2 + *R4 + *R6 + *R8;\n\t"
+							"TR2 = (*R0 - C5QC*(*R4 + *R6)) + C5QB*(*I2 - *I8) + C5QD*(*I4 - *I6) + C5QA*((*R2 - *R4) + (*R8 - *R6));\n\t"
+							"TR8 = (*R0 - C5QC*(*R4 + *R6)) - C5QB*(*I2 - *I8) - C5QD*(*I4 - *I6) + C5QA*((*R2 - *R4) + (*R8 - *R6));\n\t"
+							"TR4 = (*R0 - C5QC*(*R2 + *R8)) - C5QB*(*I4 - *I6) + C5QD*(*I2 - *I8) + C5QA*((*R4 - *R2) + (*R6 - *R8));\n\t"
+							"TR6 = (*R0 - C5QC*(*R2 + *R8)) + C5QB*(*I4 - *I6) - C5QD*(*I2 - *I8) + C5QA*((*R4 - *R2) + (*R6 - *R8));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I2 + *I4 + *I6 + *I8;\n\t"
+							"TI2 = (*I0 - C5QC*(*I4 + *I6)) - C5QB*(*R2 - *R8) - C5QD*(*R4 - *R6) + C5QA*((*I2 - *I4) + (*I8 - *I6));\n\t"
+							"TI8 = (*I0 - C5QC*(*I4 + *I6)) + C5QB*(*R2 - *R8) + C5QD*(*R4 - *R6) + C5QA*((*I2 - *I4) + (*I8 - *I6));\n\t"
+							"TI4 = (*I0 - C5QC*(*I2 + *I8)) + C5QB*(*R4 - *R6) - C5QD*(*R2 - *R8) + C5QA*((*I4 - *I2) + (*I6 - *I8));\n\t"
+							"TI6 = (*I0 - C5QC*(*I2 + *I8)) - C5QB*(*R4 - *R6) + C5QD*(*R2 - *R8) + C5QA*((*I4 - *I2) + (*I6 - *I8));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = *R1 + *R3 + *R5 + *R7 + *R9;\n\t"
+							"TR3 = (*R1 - C5QC*(*R5 + *R7)) + C5QB*(*I3 - *I9) + C5QD*(*I5 - *I7) + C5QA*((*R3 - *R5) + (*R9 - *R7));\n\t"
+							"TR9 = (*R1 - C5QC*(*R5 + *R7)) - C5QB*(*I3 - *I9) - C5QD*(*I5 - *I7) + C5QA*((*R3 - *R5) + (*R9 - *R7));\n\t"
+							"TR5 = (*R1 - C5QC*(*R3 + *R9)) - C5QB*(*I5 - *I7) + C5QD*(*I3 - *I9) + C5QA*((*R5 - *R3) + (*R7 - *R9));\n\t"
+							"TR7 = (*R1 - C5QC*(*R3 + *R9)) + C5QB*(*I5 - *I7) - C5QD*(*I3 - *I9) + C5QA*((*R5 - *R3) + (*R7 - *R9));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = *I1 + *I3 + *I5 + *I7 + *I9;\n\t"
+							"TI3 = (*I1 - C5QC*(*I5 + *I7)) - C5QB*(*R3 - *R9) - C5QD*(*R5 - *R7) + C5QA*((*I3 - *I5) + (*I9 - *I7));\n\t"
+							"TI9 = (*I1 - C5QC*(*I5 + *I7)) + C5QB*(*R3 - *R9) + C5QD*(*R5 - *R7) + C5QA*((*I3 - *I5) + (*I9 - *I7));\n\t"
+							"TI5 = (*I1 - C5QC*(*I3 + *I9)) + C5QB*(*R5 - *R7) - C5QD*(*R3 - *R9) + C5QA*((*I5 - *I3) + (*I7 - *I9));\n\t"
+							"TI7 = (*I1 - C5QC*(*I3 + *I9)) - C5QB*(*R5 - *R7) + C5QD*(*R3 - *R9) + C5QA*((*I5 - *I3) + (*I7 - *I9));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0) = TR0 + TR1;\n\t"
+							"(*R1) = TR2 + ( C5QE*TR3 + C5QD*TI3);\n\t"
+							"(*R2) = TR4 + ( C5QA*TR5 + C5QB*TI5);\n\t"
+							"(*R3) = TR6 + (-C5QA*TR7 + C5QB*TI7);\n\t"
+							"(*R4) = TR8 + (-C5QE*TR9 + C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I0) = TI0 + TI1;\n\t"
+							"(*I1) = TI2 + (-C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*I2) = TI4 + (-C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*I3) = TI6 + (-C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*I4) = TI8 + (-C5QD*TR9 - C5QE*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R5) = TR0 - TR1;\n\t"
+							"(*R6) = TR2 - ( C5QE*TR3 + C5QD*TI3);\n\t"
+							"(*R7) = TR4 - ( C5QA*TR5 + C5QB*TI5);\n\t"
+							"(*R8) = TR6 - (-C5QA*TR7 + C5QB*TI7);\n\t"
+							"(*R9) = TR8 - (-C5QE*TR9 + C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I5) = TI0 - TI1;\n\t"
+							"(*I6) = TI2 - (-C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*I7) = TI4 - (-C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*I8) = TI6 - (-C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*I9) = TI8 - (-C5QD*TR9 - C5QE*TI9);\n\t";
+						}
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+							"TR0 = (*R0).x + (*R2).x + (*R4).x + (*R6).x + (*R8).x;\n\t"
+							"TR2 = ((*R0).x - C5QC*((*R4).x + (*R6).x)) - C5QB*((*R2).y - (*R8).y) - C5QD*((*R4).y - (*R6).y) + C5QA*(((*R2).x - (*R4).x) + ((*R8).x - (*R6).x));\n\t"
+							"TR8 = ((*R0).x - C5QC*((*R4).x + (*R6).x)) + C5QB*((*R2).y - (*R8).y) + C5QD*((*R4).y - (*R6).y) + C5QA*(((*R2).x - (*R4).x) + ((*R8).x - (*R6).x));\n\t"
+							"TR4 = ((*R0).x - C5QC*((*R2).x + (*R8).x)) + C5QB*((*R4).y - (*R6).y) - C5QD*((*R2).y - (*R8).y) + C5QA*(((*R4).x - (*R2).x) + ((*R6).x - (*R8).x));\n\t"
+							"TR6 = ((*R0).x - C5QC*((*R2).x + (*R8).x)) - C5QB*((*R4).y - (*R6).y) + C5QD*((*R2).y - (*R8).y) + C5QA*(((*R4).x - (*R2).x) + ((*R6).x - (*R8).x));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = (*R0).y + (*R2).y + (*R4).y + (*R6).y + (*R8).y;\n\t"
+							"TI2 = ((*R0).y - C5QC*((*R4).y + (*R6).y)) + C5QB*((*R2).x - (*R8).x) + C5QD*((*R4).x - (*R6).x) + C5QA*(((*R2).y - (*R4).y) + ((*R8).y - (*R6).y));\n\t"
+							"TI8 = ((*R0).y - C5QC*((*R4).y + (*R6).y)) - C5QB*((*R2).x - (*R8).x) - C5QD*((*R4).x - (*R6).x) + C5QA*(((*R2).y - (*R4).y) + ((*R8).y - (*R6).y));\n\t"
+							"TI4 = ((*R0).y - C5QC*((*R2).y + (*R8).y)) - C5QB*((*R4).x - (*R6).x) + C5QD*((*R2).x - (*R8).x) + C5QA*(((*R4).y - (*R2).y) + ((*R6).y - (*R8).y));\n\t"
+							"TI6 = ((*R0).y - C5QC*((*R2).y + (*R8).y)) + C5QB*((*R4).x - (*R6).x) - C5QD*((*R2).x - (*R8).x) + C5QA*(((*R4).y - (*R2).y) + ((*R6).y - (*R8).y));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = (*R1).x + (*R3).x + (*R5).x + (*R7).x + (*R9).x;\n\t"
+							"TR3 = ((*R1).x - C5QC*((*R5).x + (*R7).x)) - C5QB*((*R3).y - (*R9).y) - C5QD*((*R5).y - (*R7).y) + C5QA*(((*R3).x - (*R5).x) + ((*R9).x - (*R7).x));\n\t"
+							"TR9 = ((*R1).x - C5QC*((*R5).x + (*R7).x)) + C5QB*((*R3).y - (*R9).y) + C5QD*((*R5).y - (*R7).y) + C5QA*(((*R3).x - (*R5).x) + ((*R9).x - (*R7).x));\n\t"
+							"TR5 = ((*R1).x - C5QC*((*R3).x + (*R9).x)) + C5QB*((*R5).y - (*R7).y) - C5QD*((*R3).y - (*R9).y) + C5QA*(((*R5).x - (*R3).x) + ((*R7).x - (*R9).x));\n\t"
+							"TR7 = ((*R1).x - C5QC*((*R3).x + (*R9).x)) - C5QB*((*R5).y - (*R7).y) + C5QD*((*R3).y - (*R9).y) + C5QA*(((*R5).x - (*R3).x) + ((*R7).x - (*R9).x));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = (*R1).y + (*R3).y + (*R5).y + (*R7).y + (*R9).y;\n\t"
+							"TI3 = ((*R1).y - C5QC*((*R5).y + (*R7).y)) + C5QB*((*R3).x - (*R9).x) + C5QD*((*R5).x - (*R7).x) + C5QA*(((*R3).y - (*R5).y) + ((*R9).y - (*R7).y));\n\t"
+							"TI9 = ((*R1).y - C5QC*((*R5).y + (*R7).y)) - C5QB*((*R3).x - (*R9).x) - C5QD*((*R5).x - (*R7).x) + C5QA*(((*R3).y - (*R5).y) + ((*R9).y - (*R7).y));\n\t"
+							"TI5 = ((*R1).y - C5QC*((*R3).y + (*R9).y)) - C5QB*((*R5).x - (*R7).x) + C5QD*((*R3).x - (*R9).x) + C5QA*(((*R5).y - (*R3).y) + ((*R7).y - (*R9).y));\n\t"
+							"TI7 = ((*R1).y - C5QC*((*R3).y + (*R9).y)) + C5QB*((*R5).x - (*R7).x) - C5QD*((*R3).x - (*R9).x) + C5QA*(((*R5).y - (*R3).y) + ((*R7).y - (*R9).y));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).x = TR0 + TR1;\n\t"
+							"(*R1).x = TR2 + ( C5QE*TR3 - C5QD*TI3);\n\t"
+							"(*R2).x = TR4 + ( C5QA*TR5 - C5QB*TI5);\n\t"
+							"(*R3).x = TR6 + (-C5QA*TR7 - C5QB*TI7);\n\t"
+							"(*R4).x = TR8 + (-C5QE*TR9 - C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0).y = TI0 + TI1;\n\t"
+							"(*R1).y = TI2 + ( C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*R2).y = TI4 + ( C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*R3).y = TI6 + ( C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*R4).y = TI8 + ( C5QD*TR9 - C5QE*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R5).x = TR0 - TR1;\n\t"
+							"(*R6).x = TR2 - ( C5QE*TR3 - C5QD*TI3);\n\t"
+							"(*R7).x = TR4 - ( C5QA*TR5 - C5QB*TI5);\n\t"
+							"(*R8).x = TR6 - (-C5QA*TR7 - C5QB*TI7);\n\t"
+							"(*R9).x = TR8 - (-C5QE*TR9 - C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R5).y = TI0 - TI1;\n\t"
+							"(*R6).y = TI2 - ( C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*R7).y = TI4 - ( C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*R8).y = TI6 - ( C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*R9).y = TI8 - ( C5QD*TR9 - C5QE*TI9);\n\t";
+						}
+						else
+						{
+							bflyStr +=
+							"TR0 = *R0 + *R2 + *R4 + *R6 + *R8;\n\t"
+							"TR2 = (*R0 - C5QC*(*R4 + *R6)) - C5QB*(*I2 - *I8) - C5QD*(*I4 - *I6) + C5QA*((*R2 - *R4) + (*R8 - *R6));\n\t"
+							"TR8 = (*R0 - C5QC*(*R4 + *R6)) + C5QB*(*I2 - *I8) + C5QD*(*I4 - *I6) + C5QA*((*R2 - *R4) + (*R8 - *R6));\n\t"
+							"TR4 = (*R0 - C5QC*(*R2 + *R8)) + C5QB*(*I4 - *I6) - C5QD*(*I2 - *I8) + C5QA*((*R4 - *R2) + (*R6 - *R8));\n\t"
+							"TR6 = (*R0 - C5QC*(*R2 + *R8)) - C5QB*(*I4 - *I6) + C5QD*(*I2 - *I8) + C5QA*((*R4 - *R2) + (*R6 - *R8));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI0 = *I0 + *I2 + *I4 + *I6 + *I8;\n\t"
+							"TI2 = (*I0 - C5QC*(*I4 + *I6)) + C5QB*(*R2 - *R8) + C5QD*(*R4 - *R6) + C5QA*((*I2 - *I4) + (*I8 - *I6));\n\t"
+							"TI8 = (*I0 - C5QC*(*I4 + *I6)) - C5QB*(*R2 - *R8) - C5QD*(*R4 - *R6) + C5QA*((*I2 - *I4) + (*I8 - *I6));\n\t"
+							"TI4 = (*I0 - C5QC*(*I2 + *I8)) - C5QB*(*R4 - *R6) + C5QD*(*R2 - *R8) + C5QA*((*I4 - *I2) + (*I6 - *I8));\n\t"
+							"TI6 = (*I0 - C5QC*(*I2 + *I8)) + C5QB*(*R4 - *R6) - C5QD*(*R2 - *R8) + C5QA*((*I4 - *I2) + (*I6 - *I8));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TR1 = *R1 + *R3 + *R5 + *R7 + *R9;\n\t"
+							"TR3 = (*R1 - C5QC*(*R5 + *R7)) - C5QB*(*I3 - *I9) - C5QD*(*I5 - *I7) + C5QA*((*R3 - *R5) + (*R9 - *R7));\n\t"
+							"TR9 = (*R1 - C5QC*(*R5 + *R7)) + C5QB*(*I3 - *I9) + C5QD*(*I5 - *I7) + C5QA*((*R3 - *R5) + (*R9 - *R7));\n\t"
+							"TR5 = (*R1 - C5QC*(*R3 + *R9)) + C5QB*(*I5 - *I7) - C5QD*(*I3 - *I9) + C5QA*((*R5 - *R3) + (*R7 - *R9));\n\t"
+							"TR7 = (*R1 - C5QC*(*R3 + *R9)) - C5QB*(*I5 - *I7) + C5QD*(*I3 - *I9) + C5QA*((*R5 - *R3) + (*R7 - *R9));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"TI1 = *I1 + *I3 + *I5 + *I7 + *I9;\n\t"
+							"TI3 = (*I1 - C5QC*(*I5 + *I7)) + C5QB*(*R3 - *R9) + C5QD*(*R5 - *R7) + C5QA*((*I3 - *I5) + (*I9 - *I7));\n\t"
+							"TI9 = (*I1 - C5QC*(*I5 + *I7)) - C5QB*(*R3 - *R9) - C5QD*(*R5 - *R7) + C5QA*((*I3 - *I5) + (*I9 - *I7));\n\t"
+							"TI5 = (*I1 - C5QC*(*I3 + *I9)) - C5QB*(*R5 - *R7) + C5QD*(*R3 - *R9) + C5QA*((*I5 - *I3) + (*I7 - *I9));\n\t"
+							"TI7 = (*I1 - C5QC*(*I3 + *I9)) + C5QB*(*R5 - *R7) - C5QD*(*R3 - *R9) + C5QA*((*I5 - *I3) + (*I7 - *I9));\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R0) = TR0 + TR1;\n\t"
+							"(*R1) = TR2 + ( C5QE*TR3 - C5QD*TI3);\n\t"
+							"(*R2) = TR4 + ( C5QA*TR5 - C5QB*TI5);\n\t"
+							"(*R3) = TR6 + (-C5QA*TR7 - C5QB*TI7);\n\t"
+							"(*R4) = TR8 + (-C5QE*TR9 - C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I0) = TI0 + TI1;\n\t"
+							"(*I1) = TI2 + ( C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*I2) = TI4 + ( C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*I3) = TI6 + ( C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*I4) = TI8 + ( C5QD*TR9 - C5QE*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*R5) = TR0 - TR1;\n\t"
+							"(*R6) = TR2 - ( C5QE*TR3 - C5QD*TI3);\n\t"
+							"(*R7) = TR4 - ( C5QA*TR5 - C5QB*TI5);\n\t"
+							"(*R8) = TR6 - (-C5QA*TR7 - C5QB*TI7);\n\t"
+							"(*R9) = TR8 - (-C5QE*TR9 - C5QD*TI9);\n\t";
+
+							bflyStr += "\n\t";
+
+							bflyStr +=
+							"(*I5) = TI0 - TI1;\n\t"
+							"(*I6) = TI2 - ( C5QD*TR3 + C5QE*TI3);\n\t"
+							"(*I7) = TI4 - ( C5QB*TR5 + C5QA*TI5);\n\t"
+							"(*I8) = TI6 - ( C5QB*TR7 - C5QA*TI7);\n\t"
+							"(*I9) = TI8 - ( C5QD*TR9 - C5QE*TI9);\n\t";
+						}
+					}
+				} break;
+			case 16:
+				{
+					if(fwd)
+					{
+						if(cReg)
+						{
+							bflyStr +=
+
+							"(*R1) = (*R0) - (*R1);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R1);\n\t"
+							"(*R3) = (*R2) - (*R3);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R3);\n\t"
+							"(*R5) = (*R4) - (*R5);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R5);\n\t"
+							"(*R7) = (*R6) - (*R7);\n\t"
+							"(*R6) = 2.0f * (*R6) - (*R7);\n\t"
+							"(*R9) = (*R8) - (*R9);\n\t"
+							"(*R8) = 2.0f * (*R8) - (*R9);\n\t"
+							"(*R11) = (*R10) - (*R11);\n\t"
+							"(*R10) = 2.0f * (*R10) - (*R11);\n\t"
+							"(*R13) = (*R12) - (*R13);\n\t"
+							"(*R12) = 2.0f * (*R12) - (*R13);\n\t"
+							"(*R15) = (*R14) - (*R15);\n\t"
+							"(*R14) = 2.0f * (*R14) - (*R15);\n\t"
+							"\n\t"
+							"(*R2) = (*R0) - (*R2);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R2);\n\t"
+							"(*R3) = (*R1) + (fvect2)(-(*R3).y, (*R3).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R3);\n\t"
+							"(*R6) = (*R4) - (*R6);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R6);\n\t"
+							"(*R7) = (*R5) + (fvect2)(-(*R7).y, (*R7).x);\n\t"
+							"(*R5) = 2.0f * (*R5) - (*R7);\n\t"
+							"(*R10) = (*R8) - (*R10);\n\t"
+							"(*R8) = 2.0f * (*R8) - (*R10);\n\t"
+							"(*R11) = (*R9) + (fvect2)(-(*R11).y, (*R11).x);\n\t"
+							"(*R9) = 2.0f * (*R9) - (*R11);\n\t"
+							"(*R14) = (*R12) - (*R14);\n\t"
+							"(*R12) = 2.0f * (*R12) - (*R14);\n\t"
+							"(*R15) = (*R13) + (fvect2)(-(*R15).y, (*R15).x);\n\t"
+							"(*R13) = 2.0f * (*R13) - (*R15);\n\t"
+							"\n\t"
+							"(*R4) = (*R0) - (*R4);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R4);\n\t"
+							"(*R5) = ((*R1) - C8Q * (*R5)) - C8Q * (fvect2)((*R5).y, -(*R5).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R5);\n\t"
+							"(*R6) = (*R2) + (fvect2)(-(*R6).y, (*R6).x);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R6);\n\t"
+							"(*R7) = ((*R3) + C8Q * (*R7)) - C8Q * (fvect2)((*R7).y, -(*R7).x);\n\t"
+							"(*R3) = 2.0f * (*R3) - (*R7);\n\t"
+							"(*R12) = (*R8) - (*R12);\n\t"
+							"(*R8) = 2.0f * (*R8) - (*R12);\n\t"
+							"(*R13) = ((*R9) - C8Q * (*R13)) - C8Q * (fvect2)((*R13).y, -(*R13).x);\n\t"
+							"(*R9) = 2.0f * (*R9) - (*R13);\n\t"
+							"(*R14) = (*R10) + (fvect2)(-(*R14).y, (*R14).x);\n\t"
+							"(*R10) = 2.0f * (*R10) - (*R14);\n\t"
+							"(*R15) = ((*R11) + C8Q * (*R15)) - C8Q * (fvect2)((*R15).y, -(*R15).x);\n\t"
+							"(*R11) = 2.0f * (*R11) - (*R15);\n\t"
+							"\n\t"
+							"(*R8) = (*R0) - (*R8);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R8);\n\t"
+							"(*R9) = ((*R1) - 0.92387953251128675612818318939679 * (*R9)) - 0.3826834323650897717284599840304 * (fvect2)((*R9).y, -(*R9).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R9);\n\t"
+							"(*R10) = ((*R2) - C8Q * (*R10)) - C8Q * (fvect2)((*R10).y, -(*R10).x);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R10);\n\t"
+							"(*R11) = ((*R3) - 0.3826834323650897717284599840304 * (*R11)) - 0.92387953251128675612818318939679 * (fvect2)((*R11).y, -(*R11).x);\n\t"
+							"(*R3) = 2.0f * (*R3) - (*R11);\n\t"
+							"(*R12) = (*R4) + (fvect2)(-(*R12).y, (*R12).x);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R12);\n\t"
+							"(*R13) = ((*R5) + 0.3826834323650897717284599840304 * (*R13)) - 0.92387953251128675612818318939679 * (fvect2)((*R13).y, -(*R13).x);\n\t"
+							"(*R5) = 2.0f * (*R5) - (*R13);\n\t"
+							"(*R14) = ((*R6) + C8Q * (*R14)) - C8Q * (fvect2)((*R14).y, -(*R14).x);\n\t"
+							"(*R6) = 2.0f * (*R6) - (*R14);\n\t"
+							"(*R15) = ((*R7) + 0.92387953251128675612818318939679 * (*R15)) - 0.3826834323650897717284599840304 * (fvect2)((*R15).y, -(*R15).x);\n\t"
+							"(*R7) = 2.0f * (*R7) - (*R15);\n\t";
+
+						}
+						else
+							assert(false);
+					}
+					else
+					{
+						if(cReg)
+						{
+							bflyStr +=
+
+							"(*R1) = (*R0) - (*R1);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R1);\n\t"
+							"(*R3) = (*R2) - (*R3);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R3);\n\t"
+							"(*R5) = (*R4) - (*R5);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R5);\n\t"
+							"(*R7) = (*R6) - (*R7);\n\t"
+							"(*R6) = 2.0f * (*R6) - (*R7);\n\t"
+							"(*R9) = (*R8) - (*R9);\n\t"
+							"(*R8) = 2.0f * (*R8) - (*R9);\n\t"
+							"(*R11) = (*R10) - (*R11);\n\t"
+							"(*R10) = 2.0f * (*R10) - (*R11);\n\t"
+							"(*R13) = (*R12) - (*R13);\n\t"
+							"(*R12) = 2.0f * (*R12) - (*R13);\n\t"
+							"(*R15) = (*R14) - (*R15);\n\t"
+							"(*R14) = 2.0f * (*R14) - (*R15);\n\t"
+							"\n\t"
+							"(*R2) = (*R0) - (*R2);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R2);\n\t"
+							"(*R3) = (*R1) + (fvect2)((*R3).y, -(*R3).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R3);\n\t"
+							"(*R6) = (*R4) - (*R6);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R6);\n\t"
+							"(*R7) = (*R5) + (fvect2)((*R7).y, -(*R7).x);\n\t"
+							"(*R5) = 2.0f * (*R5) - (*R7);\n\t"
+							"(*R10) = (*R8) - (*R10);\n\t"
+							"(*R8) = 2.0f * (*R8) - (*R10);\n\t"
+							"(*R11) = (*R9) + (fvect2)((*R11).y, -(*R11).x);\n\t"
+							"(*R9) = 2.0f * (*R9) - (*R11);\n\t"
+							"(*R14) = (*R12) - (*R14);\n\t"
+							"(*R12) = 2.0f * (*R12) - (*R14);\n\t"
+							"(*R15) = (*R13) + (fvect2)((*R15).y, -(*R15).x);\n\t"
+							"(*R13) = 2.0f * (*R13) - (*R15);\n\t"
+							"\n\t"
+							"(*R4) = (*R0) - (*R4);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R4);\n\t"
+							"(*R5) = ((*R1) - C8Q * (*R5)) + C8Q * (fvect2)((*R5).y, -(*R5).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R5);\n\t"
+							"(*R6) = (*R2) + (fvect2)((*R6).y, -(*R6).x);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R6);\n\t"
+							"(*R7) = ((*R3) + C8Q * (*R7)) + C8Q * (fvect2)((*R7).y, -(*R7).x);\n\t"
+							"(*R3) = 2.0f * (*R3) - (*R7);\n\t"
+							"(*R12) = (*R8) - (*R12);\n\t"
+							"(*R8) = 2.0f * (*R8) - (*R12);\n\t"
+							"(*R13) = ((*R9) - C8Q * (*R13)) + C8Q * (fvect2)((*R13).y, -(*R13).x);\n\t"
+							"(*R9) = 2.0f * (*R9) - (*R13);\n\t"
+							"(*R14) = (*R10) + (fvect2)((*R14).y, -(*R14).x);\n\t"
+							"(*R10) = 2.0f * (*R10) - (*R14);\n\t"
+							"(*R15) = ((*R11) + C8Q * (*R15)) + C8Q * (fvect2)((*R15).y, -(*R15).x);\n\t"
+							"(*R11) = 2.0f * (*R11) - (*R15);\n\t"
+ 							"\n\t"
+							"(*R8) = (*R0) - (*R8);\n\t"
+							"(*R0) = 2.0f * (*R0) - (*R8);\n\t"
+							"(*R9) = ((*R1) - 0.92387953251128675612818318939679 * (*R9)) + 0.3826834323650897717284599840304 * (fvect2)((*R9).y, -(*R9).x);\n\t"
+							"(*R1) = 2.0f * (*R1) - (*R9);\n\t"
+							"(*R10) = ((*R2) - C8Q * (*R10)) + C8Q * (fvect2)((*R10).y, -(*R10).x);\n\t"
+							"(*R2) = 2.0f * (*R2) - (*R10);\n\t"
+							"(*R11) = ((*R3) - 0.3826834323650897717284599840304 * (*R11)) + 0.92387953251128675612818318939679 * (fvect2)((*R11).y, -(*R11).x);\n\t"
+							"(*R3) = 2.0f * (*R3) - (*R11);\n\t"
+							"(*R12) = (*R4) + (fvect2)((*R12).y, -(*R12).x);\n\t"
+							"(*R4) = 2.0f * (*R4) - (*R12);\n\t"
+							"(*R13) = ((*R5) + 0.3826834323650897717284599840304 * (*R13)) + 0.92387953251128675612818318939679 * (fvect2)((*R13).y, -(*R13).x);\n\t"
+							"(*R5) = 2.0f * (*R5) - (*R13);\n\t"
+							"(*R14) = ((*R6) + C8Q * (*R14)) + C8Q * (fvect2)((*R14).y, -(*R14).x);\n\t"
+							"(*R6) = 2.0f * (*R6) - (*R14);\n\t"
+							"(*R15) = ((*R7) + 0.92387953251128675612818318939679 * (*R15)) + 0.3826834323650897717284599840304 * (fvect2)((*R15).y, -(*R15).x);\n\t"
+							"(*R7) = 2.0f * (*R7) - (*R15);\n\t";
+
+						}
+						else
+							assert(false);
+					}
+				} break;
+			default:
+				assert(false);
+			}
+
+			bflyStr += "\n\t";
+
+			// Assign results
+			if( (radix & (radix-1)) || (!cReg) )
+			{
+				if( (radix != 10) && (radix != 6) )
+				{
+				for(size_t i=0; i<radix;i++)
+				{
+					if(cReg)
+					{
+						bflyStr += "((*R"; bflyStr += SztToStr(i); bflyStr += ").x) = TR"; bflyStr += SztToStr(i); bflyStr += "; ";
+						bflyStr += "((*R"; bflyStr += SztToStr(i); bflyStr += ").y) = TI"; bflyStr += SztToStr(i); bflyStr += ";\n\t";
+					}
+					else
+					{
+						bflyStr += "(*R"; bflyStr += SztToStr(i); bflyStr += ") = TR"; bflyStr += SztToStr(i); bflyStr += "; ";
+						bflyStr += "(*I"; bflyStr += SztToStr(i); bflyStr += ") = TI"; bflyStr += SztToStr(i); bflyStr += ";\n\t";
+					}
+				}
+				}
+			}
+			else
+			{
+				for(size_t i=0; i<radix;i++)
+				{
+					size_t j = BitReverse(i, radix);
+
+					if(i < j)
+					{
+						bflyStr += "T = (*R"; bflyStr += SztToStr(i); bflyStr += "); (*R";
+						bflyStr += SztToStr(i); bflyStr += ") = (*R"; bflyStr += SztToStr(j); bflyStr += "); (*R";
+						bflyStr += SztToStr(j); bflyStr += ") = T;\n\t";
+					}
+				}
+			}
+
+			bflyStr += "\n}\n";
+		}
+
+	public:
+		Butterfly(size_t radixVal, size_t countVal, bool fwdVal, bool cRegVal) : radix(radixVal), count(countVal), fwd(fwdVal), cReg(cRegVal) {}
+
+		void GenerateButterfly(std::string &bflyStr) const
+		{
+			assert(count <= 4);
+			if(count > 0)
+				GenerateButterflyStr(bflyStr);
+		}
+    };
+
+};
+
+#endif
+
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
new file mode 100644
index 00000000..3b12504f
--- /dev/null
+++ b/src/library/generator.transpose.cpp
@@ -0,0 +1,837 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.generator.Transpose.cpp : Dynamic run-time generator of openCL transpose kernels
+//
+
+// TODO: generalize the kernel to work with any size
+
+#include "stdafx.h"
+#include <math.h>
+#include "generator.transpose.h"
+
+#define QUOTEMARK(x) #x
+
+#define PLANNAR_READ(z0, z1, z2, z3, gp) \
+		ss	<< INDENT2 << QUOTEMARK(z0 = gp[0*HSTRIDE/4*8];\n) \
+			<< INDENT2 << QUOTEMARK(z1 = gp[1*HSTRIDE/4*8];\n) \
+			<< INDENT2 << QUOTEMARK(z2 = gp[2*HSTRIDE/4*8];\n) \
+			<< INDENT2 << QUOTEMARK(z3 = gp[3*HSTRIDE/4*8];\n);
+
+#define INTERLEAVED_READ(z00, z01, z10, z11, z20, z21, z30, z31) \
+		ss	<< INDENT2 << QUOTEMARK(z00 = gp[0*HSTRIDE/4*16];\n)     \
+			<< INDENT2 << QUOTEMARK(z01 = gp[0*HSTRIDE/4*16 + 1];\n) \
+			<< INDENT2 << QUOTEMARK(z10 = gp[1*HSTRIDE/4*16];\n)     \
+			<< INDENT2 << QUOTEMARK(z11 = gp[1*HSTRIDE/4*16 + 1];\n) \
+			<< INDENT2 << QUOTEMARK(z20 = gp[2*HSTRIDE/4*16];\n)     \
+			<< INDENT2 << QUOTEMARK(z21 = gp[2*HSTRIDE/4*16 + 1];\n) \
+			<< INDENT2 << QUOTEMARK(z30 = gp[3*HSTRIDE/4*16];\n)     \
+			<< INDENT2 << QUOTEMARK(z31 = gp[3*HSTRIDE/4*16 + 1];\n);
+
+#define PLANNAR_WRITE(z0, z1, z2, z3, gp) \
+		ss  << INDENT2 << QUOTEMARK(gp[0*VSTRIDE/4*8] = z0;\n) \
+			<< INDENT2 << QUOTEMARK(gp[1*VSTRIDE/4*8] = z1;\n) \
+			<< INDENT2 << QUOTEMARK(gp[2*VSTRIDE/4*8] = z2;\n) \
+			<< INDENT2 << QUOTEMARK(gp[3*VSTRIDE/4*8] = z3;\n);
+
+#define INTERLEAVED_WRITE(z00, z01, z10, z11, z20, z21, z30, z31) \
+		ss  << INDENT2 << QUOTEMARK(gp[0*VSTRIDE/4*16]   = z00;\n) \
+			<< INDENT2 << QUOTEMARK(gp[0*VSTRIDE/4*16+1] = z01;\n) \
+			<< INDENT2 << QUOTEMARK(gp[1*VSTRIDE/4*16]   = z10;\n) \
+			<< INDENT2 << QUOTEMARK(gp[1*VSTRIDE/4*16+1] = z11;\n) \
+			<< INDENT2 << QUOTEMARK(gp[2*VSTRIDE/4*16]   = z20;\n) \
+			<< INDENT2 << QUOTEMARK(gp[2*VSTRIDE/4*16+1] = z21;\n) \
+			<< INDENT2 << QUOTEMARK(gp[3*VSTRIDE/4*16]   = z30;\n) \
+			<< INDENT2 << QUOTEMARK(gp[3*VSTRIDE/4*16+1] = z31;\n);
+
+#define WRITE_TO_LDS(lp, jump, z0, z1, z2, z3, part) \
+		ss	<< INDENT2 << QUOTEMARK(lp[0*jump] = z0.part;\n) \
+			<< INDENT2 << QUOTEMARK(lp[1*jump] = z1.part;\n) \
+			<< INDENT2 << QUOTEMARK(lp[2*jump] = z2.part;\n) \
+			<< INDENT2 << QUOTEMARK(lp[3*jump] = z3.part;\n) \
+			<< INDENT2 << QUOTEMARK(lp += jump*4;\n)          \
+			<< "\n";
+
+typedef enum inputoutputflag_
+{
+	PLANNAR_PLANNAR		= 1,
+	PLANNAR_INTERLEAVED,
+	INTERLEAVED_PLANNAR,
+	INTERLEAVED_INTERLEAVED,
+	ENDTRANSIO
+} transio;
+
+static clfftStatus GenerateTransposeKernel (FFTKernelGenKeyParams & params,
+	std::string & kernel)
+{
+	kernel.reserve (8000);
+	std::stringstream ss         (std::stringstream::out);
+
+	const char * szIn0 = "";
+	const char * szIn1 = "";
+	const char * szOut0 = "";
+	const char * szOut1 = "";
+	const char * typeIn = "";
+	const char * typeOut = "";
+	const char * INDENT2 = "";
+	const char * INDENT = "    ";
+	const char * datatype="";
+	const char * datatype2="";
+	bool xyflag  = (params.fft_N[0] == params.fft_N[1]) ? false : true;
+	transio iotype;
+
+
+	if (params.fft_precision == CLFFT_SINGLE)
+	{
+		datatype  = "float";
+		datatype2 = "float2";
+	}
+	else
+	{
+		datatype  = "double";
+		datatype2 = "double2";
+		ss << "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n\n";
+	}
+
+	size_t hstride = params.fft_N[0];
+	size_t vstride = params.fft_N[1];
+
+	ss << "#define HSTRIDE " << hstride << "\n";
+	ss << "#define VSTRIDE " << vstride << "\n";
+	if (xyflag)
+	{
+		ss << "#define DIMX " << hstride/32 << "\n";
+		ss << "#define DIMY " << vstride/32 << "\n";
+		ss << "#define DIM ((DIMX > DIMY) ? DIMX : DIMY) \n\n";
+		INDENT2 = "        ";
+	}
+	else
+	{
+		ss << "#define DIM " << vstride/32 << "\n\n";
+		INDENT2 = "    ";
+	}
+
+	//	Generate the kernel entry point and parameter list
+	//
+	ss << "__attribute__((reqd_work_group_size(" << unsigned(params.fft_SIMD) << ",1,1)))\n"
+	   << "__kernel void\n"
+	   << "fft_trans" << "(";
+
+	if (xyflag && params.fft_placeness == CLFFT_INPLACE) return CLFFT_INVALID_ARG_VALUE;
+
+	switch (params.fft_inputLayout) {
+	case CLFFT_COMPLEX_INTERLEAVED:
+		typeIn = datatype2;
+		if (params.fft_placeness == CLFFT_INPLACE)
+		{
+			szIn0 = szOut0 = "gcomplx";
+			typeOut = datatype2;
+			ss  << "__global " << typeIn << " * restrict " << szIn0;
+			iotype = INTERLEAVED_INTERLEAVED;
+		}
+		else
+		{
+			szIn0  = "gcomplxIn";
+			ss  << "__global " << typeIn << " * restrict " << szIn0;
+
+			if (params.fft_outputLayout == CLFFT_COMPLEX_PLANAR)
+			{
+				szOut0 = "grealOut";
+				szOut1 = "gimagOut";
+				typeOut = datatype;
+				ss  <<", __global " << typeOut <<" * restrict " << szOut0
+					<<", __global " << typeOut <<" * restrict " << szOut1;
+				iotype = INTERLEAVED_PLANNAR;
+			}
+			else
+			{
+				szOut0 = "gcomplxOut";
+				typeOut = datatype2;
+				ss  <<", __global " << typeOut <<" * restrict " << szOut0;
+				iotype = INTERLEAVED_INTERLEAVED;
+			}
+		}
+		break;
+	case CLFFT_COMPLEX_PLANAR:
+		typeIn = datatype;
+		if (params.fft_placeness == CLFFT_INPLACE)
+		{
+			szIn0 = szOut0 = "greal";
+			szIn1 = szOut1 = "gimag";
+			typeOut = datatype;
+			ss << "__global " << typeIn << " * restrict " << szIn0 <<", __global " << typeIn <<" * restrict " << szIn1;
+			iotype = PLANNAR_PLANNAR;
+		}
+		else
+		{
+			szIn0  = "greadIn";
+			szIn1  = "gimagIn";
+			ss  <<"__global " << typeIn << " * restrict " << szIn0 <<", __global " << typeIn <<" * restrict " << szIn1;
+			if (params.fft_outputLayout == CLFFT_COMPLEX_PLANAR)
+			{
+				szOut0 = "grealOut";
+				szOut1 = "gimagOut";
+				typeOut = datatype;
+				ss  << ", __global " << typeOut <<" * restrict " << szOut0 <<", __global " << typeOut <<" * restrict " << szOut1;
+				iotype = PLANNAR_PLANNAR;
+			}
+			else
+			{
+				szOut0 = "gcomplxOut";
+				typeOut = datatype2;
+				ss  << ", __global " << typeOut <<" * restrict " << szOut0;
+				iotype = PLANNAR_INTERLEAVED;
+			}
+		}
+		break;
+	default:
+		return CLFFT_NOTIMPLEMENTED;
+	}
+	ss << ")\n{\n";
+
+	// Support plannar and interleaved format
+	switch (iotype)
+	{
+		case PLANNAR_INTERLEAVED:
+			ss  << INDENT << "__local " << typeIn << " ldsa[2048];\n"
+				<< INDENT << "__local " << typeIn << " ldsb[2048];\n";
+			break;
+		case INTERLEAVED_PLANNAR:
+		case PLANNAR_PLANNAR:
+		case INTERLEAVED_INTERLEAVED:
+			ss  << INDENT << "__local " << typeIn << " ldsa[1024];\n"
+				<< INDENT << "__local " << typeIn << " ldsb[1024];\n";
+			break;
+		default:
+		return CLFFT_NOTIMPLEMENTED;
+	}
+
+	ss	<< INDENT << "uint gid = get_global_id(0);\n"
+		<< INDENT << "uint me = gid & 0x3fU;\n"
+		<< INDENT << "uint k = (gid >> 6) % ";
+
+	// add batch support
+	size_t batchnum = (vstride > hstride) ? ((vstride/32) * (vstride/32 +1) /2)
+		: ((hstride/32) * (hstride/32 +1) /2);
+	ss  << batchnum
+		<< ";\n"
+		<< "\n";
+
+	ss	<< INDENT << "// Compute location of blocks\n"
+		<< INDENT << "int l = DIM+0.5f - native_sqrt((DIM+0.5f)*(DIM+0.5f) - 2.0f * (float)as_int(k));\n"
+		<< INDENT << "int kl = ((DIM*2+1 - l) * l) >> 1;\n"
+		<< INDENT << "uint j = k - kl;\n"
+		<< INDENT << "uint i = l + j;\n"
+		<< "\n";
+
+	ss  << INDENT << "uint goa, gob;\n"
+		<< INDENT << "uint go = ((me & 0x7U) << 2) + ((gid>>6)/" << batchnum << ") * VSTRIDE * HSTRIDE;\n"
+		<< INDENT << "__global " << datatype << "4 *gp;\n"
+		<< INDENT << "__local " << datatype << "4 *lp4;\n"
+		<< INDENT << "uint lo = ((me & 0x7U) << 7) + (me >> 3);\n"
+		<< INDENT << "uint lot = (me<<2); \n";
+
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		ss  << INDENT << datatype <<"4 z0, z1, z2, z3;\n\n"
+			<< INDENT << "__local " << typeIn <<" *lp;\n";
+		break;
+	case PLANNAR_INTERLEAVED:
+		ss  << INDENT << "__global " << datatype << "4 *gpi;\n"
+			<< INDENT << "__local " << typeIn <<" *lp;\n"
+			<< INDENT << datatype << "4 z00, z01, z10, z11, z20, z21, z30, z31;\n\n";
+		break;
+	case INTERLEAVED_PLANNAR:
+		ss  << INDENT << "__local " << typeOut <<" *lp;\n"
+			<< INDENT << datatype << "4 z00, z01, z10, z11, z20, z21, z30, z31;\n\n";
+		break;
+	case INTERLEAVED_INTERLEAVED:
+		ss  << INDENT << "__local " << typeIn <<" *lp;\n"
+			<< INDENT << datatype << "4 z00, z01, z10, z11, z20, z21, z30, z31;\n\n";
+		break;
+	}
+
+	if (xyflag)
+	{
+		ss	<< INDENT << "if ( i < DIMX  && j < DIMY) \n"
+			<< INDENT << "{\n";
+	}
+
+	ss  << INDENT2 << "// Array offsets\n"
+		<< INDENT2 << "goa = go + (i << 5) + j * (HSTRIDE*32) + (me >> 3)*HSTRIDE;\n"
+		<<"\n";
+
+	ss  << INDENT2 << "// Load A block\n"
+		<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szIn0 <<" + goa);\n";
+
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		PLANNAR_READ(z0, z1, z2, z3, gp);
+		break;
+	case PLANNAR_INTERLEAVED:
+		PLANNAR_READ(z00, z10, z20, z30, gp);
+		ss << INDENT2 << "gpi = (__global " << datatype << "4 *)(" << szIn1 <<" + goa);\n";
+		PLANNAR_READ(z01, z11, z21, z31, gpi);
+		break;
+	default:
+		INTERLEAVED_READ(z00, z01, z10, z11, z20, z21, z30, z31);
+		break;
+	}
+	ss	<< "\n";
+
+	ss  << INDENT2 << "// Save into LDS\n";
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		ss	<< INDENT2 << "lp = ldsa + lo;\n";
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, x);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, y);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, z);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, w);
+		break;
+	case PLANNAR_INTERLEAVED:
+		ss	<< INDENT2 << "lp = ldsa + lo*2;\n";
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, x);
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, y);
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, z);
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, w);
+		ss  << INDENT2 << "lp = ldsa + lo*2 + 1;\n";
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, x);
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, y);
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, z);
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, w);
+		break;
+	case INTERLEAVED_INTERLEAVED:
+		ss	<< INDENT2 << "lp = ldsa + lo;\n";
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, xy);
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, zw);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, xy);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, zw);
+		break;
+	case INTERLEAVED_PLANNAR:
+		ss	<< INDENT2 << "lp = (__local " << typeOut << "*)ldsa + lo;\n";
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, x);
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, z);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, x);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, z);
+		//next write to lp = ldsa+lo+1024
+		ss  << INDENT2 << "lp += (1024-32*4);\n";
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, y);
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, w);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, y);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, w);
+		break;
+	}
+
+	ss << INDENT;
+	if (xyflag)	ss << "} ";
+	ss <<"//End load A block\n\n";
+
+	if (xyflag)
+		ss  << INDENT << "if (i < DIMY  && j < DIMX) \n"
+			<< INDENT << "{\n";
+
+	ss  << INDENT2 << "//  Load B block\n"
+		<< INDENT2 << "gob = go + (j << 5) + i * (HSTRIDE*32) + (me >> 3)*HSTRIDE;\n"
+		<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szIn0 << " + gob);\n";
+
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		PLANNAR_READ(z0, z1, z2, z3, gp);
+		break;
+	case PLANNAR_INTERLEAVED:
+		PLANNAR_READ(z00, z10, z20, z30, gp);
+		ss << INDENT2 << "gpi = (__global " << datatype << "4 *)(" << szIn1 <<" + gob);\n";
+		PLANNAR_READ(z01, z11, z21, z31, gpi);
+		break;
+	default:
+		INTERLEAVED_READ(z00, z01, z10, z11, z20, z21, z30, z31);
+		break;
+	}
+	ss	<< "\n";
+
+	ss  << INDENT2 << "// Save into LDS\n";
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		ss	<< INDENT2 << "lp = ldsb + lo;\n";
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, x);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, y);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, z);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, w);
+		break;
+	case PLANNAR_INTERLEAVED:
+		ss	<< INDENT2 << "lp = ldsb + lo*2;\n";
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, x);
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, y);
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, z);
+		WRITE_TO_LDS(lp, 16, z00, z10, z20, z30, w);
+		ss  << INDENT2 << "lp = ldsb + lo*2 + 1;\n";
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, x);
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, y);
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, z);
+		WRITE_TO_LDS(lp, 16, z01, z11, z21, z31, w);
+		break;
+	case INTERLEAVED_INTERLEAVED:
+		ss	<< INDENT2 << "lp = ldsb + lo;\n";
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, xy);
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, zw);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, xy);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, zw);
+		break;
+	case INTERLEAVED_PLANNAR:
+		ss	<< INDENT2 << "lp = (__local " << typeOut << "*) ldsb + lo;\n";
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, x);
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, z);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, x);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, z);
+		//next write to lp = ldsa+lo+1024
+		ss  << INDENT2 << "lp += (1024-32*4);\n";
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, y);
+		WRITE_TO_LDS(lp, 8, z00, z10, z20, z30, w);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, y);
+		WRITE_TO_LDS(lp, 8, z01, z11, z21, z31, w);
+		break;
+	}
+
+	ss  << INDENT;
+	if (xyflag) ss<< "} ";
+	ss  << "// End load B block\n\n";
+
+	ss  << INDENT << "barrier(CLK_LOCAL_MEM_FENCE);\n"
+		<< "\n";
+
+	if (xyflag) ss  << INDENT << "if (i < DIMY  && j < DIMX)\n"
+		<< INDENT << "{\n";
+
+	ss  << INDENT2 << "// write A block\n";
+
+	ss  << INDENT2 << "goa = go + (i << 5) + j * (VSTRIDE*32) + (me >> 3)*VSTRIDE;\n"
+		<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szOut0 << " + goa);\n";
+
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsb + lot);\n\n";
+		ss  << INDENT2 << "z0 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z1 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z2 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z3 = lp4[0];\n\n";
+
+		PLANNAR_WRITE(z0, z1, z2, z3, gp);
+		break;
+	case INTERLEAVED_PLANNAR:
+		ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)((__local " << typeOut << "*)ldsb + lot);\n\n";
+		ss  << INDENT2 << "z00 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z10 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z20 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z30 = lp4[0];\n\n";
+
+		PLANNAR_WRITE(z00, z10, z20, z30, gp);
+
+		ss  << INDENT2 << "gp = (__global " << datatype << "4 *)(" << szOut1 << " + goa);\n";
+		ss  << INDENT2 << "lp4 += (256 - 32*6);\n";
+		ss  << INDENT2 << "z01 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z11 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z21 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z31 = lp4[0];\n\n";
+
+		PLANNAR_WRITE(z01, z11, z21, z31, gp);
+		break;
+
+	case PLANNAR_INTERLEAVED:
+	case INTERLEAVED_INTERLEAVED:
+		if (iotype == PLANNAR_INTERLEAVED)
+			ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsb + lot*2);\n";
+		else
+			ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsb + lot);\n";
+
+		ss  << INDENT2 << "z00 = lp4[0];\n"
+			<< INDENT2 << "z01 = lp4[1];\n"
+			<< INDENT2 << "lp4 += 32*4;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z10 = lp4[0];\n"
+			<< INDENT2 << "z11 = lp4[1];\n"
+			<< INDENT2 << "lp4 += 32*4;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z20 = lp4[0];\n"
+			<< INDENT2 << "z21 = lp4[1];\n"
+			<< INDENT2 << "lp4 += 32*4;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z30 = lp4[0];\n"
+			<< INDENT2 << "z31 = lp4[1];\n\n";
+
+		INTERLEAVED_WRITE(z00, z01, z10, z11, z20, z21, z30, z31);
+		break;
+	}
+	ss << "\n";
+
+	ss  << INDENT;
+	if (xyflag) ss << "} ";
+	ss  << "// End write A block;\n\n";
+
+	if (xyflag)	ss  << INDENT << "if (i < DIMX  && j < DIMY)\n"
+		<< INDENT << "{\n";
+
+	ss  << INDENT2 << "// write B block\n\n";
+	ss	<< INDENT2 << "gob = go + (j << 5) + i * (VSTRIDE*32) + (me >> 3)*VSTRIDE;\n"
+		<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szOut0 << " + gob);\n";
+
+	switch (iotype)
+	{
+	case PLANNAR_PLANNAR:
+		ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsa + lot);\n\n";
+		ss  << INDENT2 << "z0 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z1 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z2 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z3 = lp4[0];\n\n";
+
+		PLANNAR_WRITE(z0, z1, z2, z3, gp);
+		break;
+	case INTERLEAVED_PLANNAR:
+		ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)((__local " << typeOut << "*)ldsa + lot);\n\n";
+		ss  << INDENT2 << "z00 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z10 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z20 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z30 = lp4[0];\n\n";
+
+		PLANNAR_WRITE(z00, z10, z20, z30, gp);
+
+		ss  << INDENT2 << "gp = (__global " << datatype << "4 *)(" << szOut1 << " + gob);\n";
+		ss  << INDENT2 << "lp4 += (256 - 32*6);\n";
+		ss  << INDENT2 << "z01 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z11 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z21 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z31 = lp4[0];\n\n";
+
+		PLANNAR_WRITE(z01, z11, z21, z31, gp);
+		break;
+
+	case PLANNAR_INTERLEAVED:
+	case INTERLEAVED_INTERLEAVED:
+		if (iotype == PLANNAR_INTERLEAVED)
+			ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsa + lot*2);\n\n";
+		else
+			ss  << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsa + lot);\n\n";
+
+		ss  << INDENT2 << "z00 = lp4[0];\n"
+			<< INDENT2 << "z01 = lp4[1];\n"
+			<< INDENT2 << "lp4 += 32*4;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z10 = lp4[0];\n"
+			<< INDENT2 << "z11 = lp4[1];\n"
+			<< INDENT2 << "lp4 += 32*4;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z20 = lp4[0];\n"
+			<< INDENT2 << "z21 = lp4[1];\n"
+			<< INDENT2 << "lp4 += 32*4;\n"
+			<< "\n";
+
+		ss	<< INDENT2 << "z30 = lp4[0];\n"
+			<< INDENT2 << "z31 = lp4[1];\n\n";
+
+		INTERLEAVED_WRITE(z00, z01, z10, z11, z20, z21, z30, z31);
+		break;
+	}
+	ss << "\n";
+
+	ss  << INDENT;
+	if(xyflag) ss  << "} ";
+	ss  << "// End write B block;\n\n";
+
+	if (iotype == PLANNAR_PLANNAR)
+	{
+		ss  << INDENT << "// Identical handling for imaginary data\n"
+			<< INDENT << "barrier(CLK_LOCAL_MEM_FENCE);\n"
+			<< "\n";
+
+		if (xyflag) ss	<< INDENT << "if (i < DIMX  && j < DIMY)\n"
+			<< INDENT << "{\n";
+
+		ss  << INDENT2 << "//load A block\n"
+			<< INDENT2 << "goa = go + (i << 5) + j * (HSTRIDE*32) + (me >> 3)*HSTRIDE;\n"
+			<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szIn1 << " + goa);\n"
+			<< "\n";
+
+		PLANNAR_READ(z0, z1, z2, z3, gp);
+
+		ss  << INDENT2 << "lp = ldsa + lo;\n"
+			<< "\n";
+
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, x);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, y);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, z);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, w);
+
+		ss  << INDENT;
+		if (xyflag) ss  << "} ";
+		ss  << "//end load A block\n\n";
+
+		if (xyflag) ss	<< INDENT << "if (i < DIMY  && j < DIMX)\n"
+			<< INDENT << "{\n";
+
+		ss  << INDENT2 << "//load B block\n"
+			<< INDENT2 << "gob = go + (j << 5) + i * (HSTRIDE*32) + (me >> 3)*HSTRIDE;\n"
+			<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szIn1 <<" + gob);\n"
+			<< "\n";
+
+		PLANNAR_READ(z0, z1, z2, z3, gp);
+
+		ss  << INDENT2 << "lp = ldsb + lo;\n"
+			<< "\n";
+
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, x);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, y);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, z);
+		WRITE_TO_LDS(lp, 8, z0, z1, z2, z3, w);
+
+		ss  << INDENT;
+		if (xyflag) ss << "} ";
+		ss  << "// end load B block\n\n";
+
+		ss  << INDENT << "barrier(CLK_LOCAL_MEM_FENCE);\n"
+			<< "\n";
+
+		if (xyflag) ss	<< INDENT << "if (i < DIMY  && j < DIMX)\n"
+			<< INDENT << "{\n";
+
+		ss  << INDENT2 << "//Write A block\n"
+		    << INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsb + lot);\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z0 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z1 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z2 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z3 = lp4[0];\n"
+			<< "\n";
+
+		ss  << INDENT2 << "goa = go + (i << 5) + j * (VSTRIDE*32) + (me >> 3)*VSTRIDE;\n"
+			<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szOut1 << " + goa);\n"
+			<< "\n";
+
+		PLANNAR_WRITE(z0, z1, z2, z3, gp);
+
+		ss  << INDENT;
+		if (xyflag) ss  << "} ";
+		ss  << "// end write A block\n\n";
+
+		if (xyflag) ss	<< INDENT << "if (i < DIMX  && j < DIMY)\n"
+			<< INDENT << "{\n";
+
+		ss  << INDENT2 << "//write B block\n"
+			<< INDENT2 << "lp4 = (__local " << datatype << "4 *)(ldsa + lot);\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z0 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z1 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z2 = lp4[0];\n"
+			<< INDENT2 << "lp4 += 32*2;\n"
+			<< "\n";
+
+		ss  << INDENT2 << "z3 = lp4[0];\n"
+			<< "\n";
+
+		ss  << INDENT2 << "gob = go + (j << 5) + i * (VSTRIDE*32) + (me >> 3)*VSTRIDE;\n"
+			<< INDENT2 << "gp = (__global " << datatype << "4 *)(" << szOut1 << " + gob);\n";
+		PLANNAR_WRITE(z0, z1, z2, z3, gp);
+
+		ss  << INDENT;
+		if (xyflag) ss  << "} ";
+		ss << "// end write B block\n";
+	}
+
+	ss << "}\n\n";
+	kernel = ss.str();
+	return CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GetKernelGenKeyPvt<Transpose> (FFTKernelGenKeyParams & params) const
+{
+
+	//	Query the devices in this context for their local memory sizes
+	//	How we generate a kernel depends on the *minimum* LDS size for all devices.
+	//
+	const FFTEnvelope * pEnvelope = NULL;
+	OPENCL_V(const_cast<FFTPlan*>(this)->GetEnvelope (& pEnvelope), _T("GetEnvelope failed"));
+	BUG_CHECK (NULL != pEnvelope);
+
+	::memset( &params, 0, sizeof( params ) );
+	params.fft_precision    = this->precision;
+	params.fft_placeness    = this->placeness;
+	params.fft_inputLayout  = this->inputLayout;
+
+	ARG_CHECK (this->inStride.size() == this->outStride.size())
+
+	if (CLFFT_INPLACE == this->placeness) {
+		//	If this is an in-place transform the
+		//	input and output layout, dimensions and strides
+		//	*MUST* be the same.
+		//
+		ARG_CHECK (this->inputLayout == this->outputLayout)
+		params.fft_outputLayout = this->inputLayout;
+		for (size_t u = this->inStride.size(); u-- > 0; ) {
+			ARG_CHECK (this->inStride[u] == this->outStride[u]);
+		}
+	} else {
+		params.fft_outputLayout = this->outputLayout;
+	}
+
+	//we only support 2D transpose
+	switch (this->inStride.size()) {
+		//	2-D array is a 3-D data structure
+		//	2-D unit is a speical case of 2-D array.
+	case 2:
+		ARG_CHECK(this->length   .size() > 1);
+		ARG_CHECK(this->outStride.size() > 1);
+		params.fft_DataDim      = 3;
+		params.fft_N[0]         = this->length[0];
+		params.fft_N[1]         = this->length[1];
+		params.fft_inStride[0]  = this->inStride[0];
+		params.fft_inStride[1]  = this->inStride[1];
+		params.fft_inStride[2]  = this->iDist;
+		params.fft_outStride[0] = this->outStride[0];
+		params.fft_outStride[1] = this->outStride[1];
+		params.fft_outStride[2] = this->oDist;
+		break;
+	default:
+		ARG_CHECK (false);
+	}
+
+	//ToDO: work group size setup
+	params.fft_R = 32; // divide the element into 32x32 blocks
+	params.fft_SIMD = 64; //work group size
+
+	return CLFFT_SUCCESS;
+}
+
+template<>
+clfftStatus FFTPlan::GetWorkSizesPvt<Transpose> (std::vector<size_t> & globalWS, std::vector<size_t> & localWS) const
+{
+	//	How many numbers per workitem in the generated kernel?
+	FFTKernelGenKeyParams fftParams;
+	//	Translate the user plan into the structure that we use to map plans to clPrograms
+	OPENCL_V( this->GetKernelGenKeyPvt<Transpose>( fftParams ), _T("GetKernelGenKey() failed!") );
+
+	unsigned long long count, count0, count1;
+	count0 = DivRoundingUp<unsigned long long> (this->length[0], fftParams.fft_R);
+	count1 = DivRoundingUp<unsigned long long> (this->length[1], fftParams.fft_R);
+	count  = (count0>count1) ? count0 : count1;
+	count  = (count * (count+1)) /2;
+	count *= fftParams.fft_SIMD;
+	count *= this->batchsize;
+
+	globalWS.push_back( static_cast< size_t >( count ) );
+	localWS.push_back( fftParams.fft_SIMD );
+
+	return	CLFFT_SUCCESS;
+}
+
+
+//	OpenCL does not take unicode strings as input, so this routine returns only ASCII strings
+//	Feed this generator the FFTPlan, and it returns the generated program as a string
+template<>
+clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo ) const
+{
+	FFTKernelGenKeyParams params;
+	OPENCL_V( this->GetKernelGenKeyPvt<Transpose> (params), _T("GetKernelGenKey() failed!") );
+
+	std::string programCode;
+	OPENCL_V( GenerateTransposeKernel( params, programCode ), _T( "GenerateTransposeKernel() failed!" ) );
+
+	OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
+	OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
+
+	return CLFFT_SUCCESS;
+}
diff --git a/src/library/generator.transpose.h b/src/library/generator.transpose.h
new file mode 100644
index 00000000..b08e3d3a
--- /dev/null
+++ b/src/library/generator.transpose.h
@@ -0,0 +1,29 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+////////////////////////////////////////////
+//	Copyright (C) 2011 Advanced Micro Devices, Inc. All Rights Reserved.
+////////////////////////////////////////////
+
+#pragma once
+#if !defined( AMD_CLFFT_generator_transpose_H )
+#define AMD_CLFFT_generator_transpose_H
+#include "private.h"
+#include "repo.h"
+#include "plan.h"
+
+#endif
+
diff --git a/src/library/lifetime.cpp b/src/library/lifetime.cpp
new file mode 100644
index 00000000..7548e9a9
--- /dev/null
+++ b/src/library/lifetime.cpp
@@ -0,0 +1,90 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.lifetime.cpp : Functions that control the lifetime of the FFT library and their supporting functions
+//
+
+#include "stdafx.h"
+#include "private.h"
+#include "repo.h"
+#include "../include/sharedLibrary.h"
+#include "../statTimer/statisticalTimer.extern.h"
+
+//	Allow AMD's implementation of FFT's to allocate internal resources
+clfftStatus	clfftSetup( const clfftSetupData* sData )
+{
+	//	Static data is not thread safe (to create), so we implement a lock to protect instantiation for the first call
+	//	Implemented outside of FFTRepo::getInstance to minimize lock overhead; this is only necessary on first creation
+	scopedLock sLock( FFTRepo::lockRepo, _T( "FFTRepo::getInstance" ) );
+
+	//	First invocation of this function will allocate the FFTRepo singleton; thereafter the object always exists
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+
+	//	Discover and load the timer module if present
+	fftRepo.timerHandle = LoadSharedLibrary( "lib", "StatTimer", true );
+	if( fftRepo.timerHandle )
+	{
+		//	Timer module discovered and loaded successfully
+		//	Initialize function pointers to call into the shared module
+		PFGETSTATTIMER pfGetStatTimer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( fftRepo.timerHandle, "getStatTimer" ) );
+
+		//	Create and initialize our timer class, if the external timer shared library loaded
+		if( pfGetStatTimer )
+		{
+			fftRepo.pStatTimer = reinterpret_cast< GpuStatTimer* > ( pfGetStatTimer( CLFFT_GPU ) );
+		}
+	}
+
+	// If the client has no setupData, we are done
+	if( sData == NULL )
+		return CLFFT_SUCCESS;
+
+	//	Versioning checks commented out until necessary
+	////	If the major version number between the client and library do not match, return mismatch
+	//if( sData->major > clfftVersionMajor )
+	//	return CLFFT_VERSION_MISMATCH;
+
+	////	If the minor version number between the client and library do not match, return mismatch
+	//if( sData->minor > clfftVersionMinor )
+	//	return CLFFT_VERSION_MISMATCH;
+
+	////	We ignore patch version number for version validation
+
+	fftRepo.setupData	= *sData;
+
+	return	CLFFT_SUCCESS;
+}
+
+//	Allow AMD's implementation of FFT's to destroy internal resources
+clfftStatus	clfftTeardown( )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	fftRepo.releaseResources( );
+
+	FreeSharedLibrary( fftRepo.timerHandle );
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch )
+{
+	*major	= clfftVersionMajor;
+	*minor	= clfftVersionMinor;
+	*patch	= clfftVersionPatch;
+
+	return	CLFFT_SUCCESS;
+}
diff --git a/src/library/lock.h b/src/library/lock.h
new file mode 100644
index 00000000..49c95aca
--- /dev/null
+++ b/src/library/lock.h
@@ -0,0 +1,248 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_lock_H )
+#define CLFFT_lock_H
+
+#if defined( _WIN32 )
+	#include <windows.h>
+#else
+	#include <pthread.h>
+#endif
+
+#include "private.h"
+
+#if defined( _WIN32 )
+
+//	lockRAII provides an abstraction for the concept of a mutex; it wraps all  mutex functions in generic methods
+//	On windows, the mutex is implemented as a CRITICAL_SECTION, as this is the fastest intraprocess mutex
+//	available.
+//	The template argument 'debugPrint' activates debugging information, but if not active the compiler optimizes
+//	the print statements out
+template< bool debugPrint >
+class lockRAII
+{
+	CRITICAL_SECTION cs;
+	tstring			csName;
+	tstringstream	tstream;
+
+	//	Does not make sense to create a copy of a lock object; private method
+	lockRAII( const lockRAII& rhs ): csName( rhs.csName )
+	{
+		tstream << std::hex << std::showbase;
+		::InitializeCriticalSection( &cs );
+	}
+
+	public:
+		lockRAII( )
+		{
+			tstream << std::hex << std::showbase;
+			::InitializeCriticalSection( &cs );
+		}
+
+		lockRAII( const tstring& name ): csName( name )
+		{
+			tstream << std::hex << std::showbase;
+			::InitializeCriticalSection( &cs );
+		}
+
+		~lockRAII( )
+		{
+			::DeleteCriticalSection( &cs );
+		}
+
+		tstring& getName( )
+		{
+			return csName;
+		}
+
+		void setName( const tstring& name )
+		{
+			csName	= name;
+		}
+
+		void enter( )
+		{
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Attempting CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl;
+				tout << tstream.str( );
+			}
+
+			::EnterCriticalSection( &cs );
+
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Acquired CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl;
+				tstream << _T( "\tOwningThread( " ) << cs.OwningThread << _T( " )" ) << std::endl;
+				tstream << _T( "\tLockcount( " ) << cs.LockCount << _T( " )" ) << std::endl;
+				tstream << _T( "\tRecursionCount( " ) << cs.RecursionCount << _T( " )" ) << std::endl;
+				tout << tstream.str( );
+			}
+		}
+
+		void leave( )
+		{
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Releasing CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl;
+				tstream << _T( "\tOwningThread( " ) << cs.OwningThread << _T( " )" ) << std::endl;
+				tstream << _T( "\tLockcount( " ) << cs.LockCount << _T( " )" ) << std::endl;
+				tstream << _T( "\tRecursionCount( " ) << cs.RecursionCount << _T( " )" ) << std::endl << std::endl;
+				tout << tstream.str( );
+			}
+
+			::LeaveCriticalSection( &cs );
+		}
+};
+
+#else
+//	lockRAII provides an abstraction for the concept of a mutex; it wraps all  mutex functions in generic methods
+//	Linux implementation not done yet
+//	The template argument 'debugPrint' activates debugging information, but if not active the compiler optimizes
+//	the print statements out
+template< bool debugPrint >
+class lockRAII
+{
+	pthread_mutex_t	mutex;
+	pthread_mutexattr_t mAttr;
+	tstring			mutexName;
+	tstringstream	tstream;
+
+	//	Does not make sense to create a copy of a lock object; private method
+	lockRAII( const lockRAII& rhs ): mutexName( rhs.mutexName )
+	{
+		tstream << std::hex << std::showbase;
+	}
+
+	public:
+		lockRAII( )
+		{
+			tstream << std::hex << std::showbase;
+			pthread_mutexattr_init( &mAttr );
+			pthread_mutexattr_settype( &mAttr, PTHREAD_MUTEX_RECURSIVE );
+			pthread_mutex_init( &mutex, &mAttr );
+		}
+
+		lockRAII( const tstring& name ): mutexName( name )
+		{
+			tstream << std::hex << std::showbase;
+			pthread_mutexattr_init( &mAttr );
+			pthread_mutexattr_settype( &mAttr, PTHREAD_MUTEX_RECURSIVE );
+			pthread_mutex_init( &mutex, &mAttr );
+		}
+
+		~lockRAII( )
+		{
+			pthread_mutex_destroy( &mutex );
+			pthread_mutexattr_destroy( &mAttr );
+		}
+
+		tstring& getName( )
+		{
+			return mutexName;
+		}
+
+		void setName( const tstring& name )
+		{
+			mutexName	= name;
+		}
+
+		void enter( )
+		{
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Attempting pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl;
+				tout << tstream.str( );
+			}
+
+			::pthread_mutex_lock( &mutex );
+
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Acquired pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl;
+				//tstream << _T( "\tOwningThread( " ) << mutex.OwningThread << _T( " )" ) << std::endl;
+				//tstream << _T( "\tLockcount( " ) << mutex.LockCount << _T( " )" ) << std::endl;
+				//tstream << _T( "\tRecursionCount( " ) << mutex.RecursionCount << _T( " )" ) << std::endl;
+				tout << tstream.str( );
+			}
+		}
+
+		void leave( )
+		{
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Releasing pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl;
+				//tstream << _T( "\tOwningThread( " ) << mutex.OwningThread << _T( " )" ) << std::endl;
+				//tstream << _T( "\tLockcount( " ) << mutex.LockCount << _T( " )" ) << std::endl;
+				//tstream << _T( "\tRecursionCount( " ) << mutex.RecursionCount << _T( " )" ) << std::endl << std::endl;
+				tout << tstream.str( );
+			}
+
+			::pthread_mutex_unlock( &mutex );
+		}
+};
+#endif
+
+//	Class used to make sure that we enter and leave critical sections in pairs
+//	The template logic logs our CRITICAL_SECTION actions; if the template parameter is false,
+//	the branch is constant and the compiler will optimize the branch out
+template< bool debugPrint >
+class scopedLock
+{
+	lockRAII< debugPrint >* sLock;
+	tstring			sLockName;
+	tstringstream	tstream;
+
+	public:
+		scopedLock( lockRAII< debugPrint >& lock, const tstring& name ): sLock( &lock ), sLockName( name )
+		{
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Entering scopedLock( " ) << sLockName << _T( " )" ) << std::endl << std::endl;
+				tout << tstream.str( );
+			}
+
+			sLock->enter( );
+		}
+
+		~scopedLock( )
+		{
+			sLock->leave( );
+
+			if( debugPrint )
+			{
+				tstream.str( _T( "" ) );
+				tstream << _T( "Left scopedLock( " ) << sLockName << _T( " )" ) << std::endl << std::endl;
+				tout << tstream.str( );
+			}
+		}
+};
+
+//	Convenience macro to enable/disable debugging print statements
+#define lockRAII lockRAII< false >
+#define scopedLock scopedLock< false >
+
+#endif	// CLFFT_lock_H
diff --git a/src/library/mainpage.h b/src/library/mainpage.h
new file mode 100644
index 00000000..326ad7a1
--- /dev/null
+++ b/src/library/mainpage.h
@@ -0,0 +1,556 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+/*! @file clFFT.mainpage.h
+
+This file contains all documentation, no code, in the form of comment text.  It's purpose is to provide
+chapter 1 of the documentation we produce with doxygen.  This included the title page, installation instructions
+and prose on the nature of FFT's and their use in our library.
+
+@mainpage OpenCL Fast Fourier Transforms (FFT's)
+
+The clFFT library is an OpenCL library implementation of discrete Fast Fourier Transforms. It:
+@li Provides a fast and accurate platform for calculating discrete FFTs.
+@li Works on CPU or GPU backends.
+@li Supports in-place or out-of-place transforms.
+@li Supports 1D, 2D, and 3D transforms with a batch size that can be greater than 1.
+@li Supports planar (real and complex components in separate arrays) and interleaved (real and complex
+components as a pair contiguous in memory) formats.
+@li Supports dimension lengths that can be any mix of powers of 2, 3, and 5.
+@li Supports single and double precision floating point formats.
+
+@section InstallFFT Installation of clFFT library
+
+@subsection DownBinaries Downloadable Binaries
+AMD provides clFFT library pre-compiled packages for recent versions of Microsoft Windows operating systems
+and several flavors of Linux.
+
+The downloadable binary packages are freely available from AMD at
+http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-math-libraries/
+
+Once the appropriate package for the respective OS has finished downloading,
+uncompress the package using the native tools available on the platform in a
+directory of the user's choice. Everything needed to build a program using
+clFFT is included in the directory tree, including documentation, header files,
+binary library components, and sample programs for programming illustration.
+
+@subsubsection CMakeDependancy CMake
+After the clFFT package is uncompressed on the user's hard drive, a samples directory exists with source code,
+but no Visual Studio project files, Unix makefiles, or other native build system exist. Instead, it contains a
+\c CMakeLists.txt file. clFFT uses CMake as its build system, and other build files, such as Visual Studio projects,
+nmake makefiles, or Unix makefiles, are generated by the CMake build system, during configuration. CMake is freely
+available for download from: http://www.cmake.org/
+
+@note CMake generates the native OS build files, so any changes made to the native build files are overwritten the
+next time CMake is run.
+
+CMake is written to pull compiler information from environment variables, and to look in default install
+directories for tools. Once installed, a popular interface to control the process of creating native build
+files is CMake-gui. When the GUI is launched, two text boxes appear at the top of the dialog: a path to
+source and a separate path to generate binaries. For the \c browse source... box, find the path to where you
+unzipped clFFT, and select the root \c samples directory that contains the CMakeLists.txt; for clFFT,
+this should be \c clFFT/samples.  For \c browse \c build..., select an appropriate directory where the build
+environment generates build files; a convenient location is a sibling directory to the source. This makes
+it easy to wipe all the binaries and start a fresh build. For instance, for a debug configuration of NMake,
+an example directory could be \c clFFT/bin/NMakeDebug. This is where the generated makefile, native build
+files, and intermediate object files are built. These generated files are kept separate from the source;
+this is referred to as 'out-of-source' builds, and is very similar in concept to what 'autotools' does for Linux.
+To build using NMake, simply type NMake in the build directory containing the makefile. To build using
+Visual Studio, generate the solution and project files into a directory such as \c clFFT/bin/vs10, find the
+generated \c .sln file, and open the solution.
+
+The first time the \c configure button near the bottom of the screen is clicked, it causes CMake to prompt for
+what type of native build files to make. Various properties appear in red in the \c properties box. Red indicates
+that the value has changed since last time \c configure was clicked. (The first time configure is clicked,
+everything is red.) CMake tries to configure itself automatically to the client's system by looking at a systems
+environment variables and by searching through default install locations for project dependencies. Take a moment to
+verify the settings and paths that are displayed on the configuration screen; if any changes must be made, you can
+provide correct paths or adjust settings by typing directly into the CMake configuration screen. Click the
+\c configure button a second time to 'bake' those settings and serialize them to disk.
+
+Options relevant to the clFFT project include:
+
+@li \c 'AMDAPPSDKROOT': Location of the Stream SDK installation. This value is already populated if CMake
+could determine the location by looking at the environment variables. If not, the user must provide a path to
+the root installation of the Stream SDK here.
+
+@li \c 'BOOST_ROOT':  Location of the Boost SDK installation. This value is already populated if CMake could
+determine the location by looking at the environment variables or default install locations. If not, the user must
+provide a path to the root installation of the Stream SDK here. This dependency is only relevant to the sample
+client; the FFT library does not depend on Boost.
+
+@li \c 'CMAKE_BUILD_TYPE':  Defines the build type (default is debug). For Visual Studio projects, this does
+not appear (modifiable in IDE); for makefile-based builds, this is set in CMake.
+
+@li \c 'CMAKE_INSTALL_PREFIX':  The path to install all binaries and headers generated from the build. This is
+used when the user types \c make \c install or builds the INSTALL project in Visual Studio. All generated binaries and
+headers are copied into the path prefixed with \c CMAKE_INSTALL_PREFIX.  The Visual Studio projects are self
+explanatory, but a few other projects are autogenerated; these might be unfamiliar.
+
+The Visual Studio projects are self explanatory, but a few other projects are autogenerated; these might be unfamiliar.
+
+@li \c 'ALL_BUILD': A project that is empty of files, but since it depends on all user projects, it provides a
+convenient way to rebuild everything.
+
+@li \c 'ZERO_CHECK':  A CMake-specific project that checks to see if the generated solution and project files are in sync
+with the \c CMakeLists.txt file. If these files are modified, the solutions and projects are now out-of-sync, and this
+project prompts the user to regenerate their environment.
+
+@note If the user chooses to build on Windows with a NMake based build, it is important to launch CMake from within a
+Visual Studio Command Prompt (20xx).  This is because CMake must be able to parse environment variables to properly
+initialize NMake. This is not necessary if a Visual Studio solution is generated, because solution files contain their
+own environmental setup.
+
+@subsubsection BoostDependancy Boost
+clFFT includes one sample project that has source dependencies on Boost: the sample client project. Boost is
+freely available from:  http://www.boost.org/.
+
+The command-line clFFT sample client links with the \c program_options library, which provides functionality for
+parsing command-line parameters and \c .ini files in a cross-platform manner. Once Boost is downloaded and
+extracted on the hard drive, the \c program_options library must be compiled. The Boost build system
+uses the BJam builder (a project for a CMake-based Boost build is available for separate download). This is
+available for download from the Boost website, or the user can build BJam; Boost includes the source to BJam
+in its distribution, and the user can execute \c bootstrap.bat (located in the root boost directory) to build it.
+
+After BJam is either built or installed, an example BJam command-line is given below for building a 64-bit
+\c program_options binary, for both static and dynamic linking:
+@code
+bjam --with-program_options address-model=64 link=static,shared stage
+@endcode
+
+The last step to make boost readily available and usable by CMake and the native compiler is to add an environment
+variable to the system called \c BOOST_ROOT. In Windows, right click on the computer icon and go to
+@code
+'Properties|Advanced system settings|Advanced|Environment Variables...'
+@endcode
+Remember to relaunch any new processes that are open, in order to inherit the new environment variable. On Linux,
+consider modifying the \c .bash_rc file (or shell equivalent) to export a new environment variable every time you log in.
+
+If you are on a Linux system and have used a package manager to install Boost, you may have to confirm where the Boost
+\c include and \c library files have been placed. For example, after installing Boost with the Ubuntu Synaptic Package
+Manager, the Boost \c include files are in \c /usr/include/boost, and the library files either \c /usr/lib or \c /usr/lib64.
+The \c CMakeLists.txt file in this project defaults the \c BOOST_ROOT value to \c /usr on Linux; so, if the system is set up
+similarly, no further action is necessary. If the system is set up differently, you may have to set the \c BOOST_ROOT
+environmental variable accordingly.
+
+@note Note that CMake does not recognize version numbers at the end of the library filename; so, if the package
+manager only created a \c libboost_module_name.so.x.xx.x file (where x.xx.x is the version of Boost),
+the user may need to manually create a soft link called \c libboost_module_name.so to the versioned
+\c libboost_module_name.so.x.xx.x. See the clFFT binary artifacts in the install directory for an example.
+
+@section IntroFFT Introduction to clFFT
+
+The FFT is an implementation of the Discrete Fourier Transform (DFT) that makes use of symmetries in the FFT
+definition to reduce the mathematical intensity required from O(\f$N^2\f$) to O(\f$ N \log N\f$) when the
+sequence length, \c N, is the product of small prime factors.  Currently, there is no standard API for FFT
+routines. Hardware vendors usually provide a set of high-performance FFTs optimized for their systems:
+no two vendors employ the same interfaces for their FFT routines. clFFT provides a set of FFT routines that
+are optimized for AMD graphics processors, and that also functional across CPU and other compute devices.
+
+@subsection SupportRadix Supported Radices
+clFFT supports powers of 2, 3 and 5 sizes. This means that the vector lengths that can be
+configured through a plan can be any length that is a power of two, three, and five; examples include \f$2^7, 2^1*3^1, 3^2*5^4, 2^2*3^3*5^5\f$,
+up to the limit that the device can support.
+
+@subsection SizeLimit Transform Size Limits
+Currently, there is an upper bound on the transform size the library supports. This
+limit is \f$2^{24}\f$ for single precision and \f$2^{22}\f$ for double precision. This means that the
+product of transform lengths must not exceed these values. As an example, a
+1D single-precision FFT of size 1024 is valid since 1024 \f$<= 2^{24}\f$. Similarly, a 2D
+double-precision FFT of size 1024x1024 is also valid, since 1024*1024 \f$<= 2^{22}\f$.
+But, a 2D single-precision FFT of size 4096x8192 is not valid because
+4096*8192 > 224.
+
+@subsection EnumDim Dimensionality
+clFFT currently supports FFTs of up to three dimensions, given by the enum \c clFFT-Dim. This enum
+is a required parameter into \c clfftCreateDefaultPlan() to create an initial plan; there is no default for
+this parameter. Depending on the dimensionality that the client requests, clFFT uses the formulations
+shown below to compute the DFT.
+
+The definition of a 1D complex DFT used by clFFT is given by:
+\f[
+{\tilde{x}}_j = {{1}\over{scale}}\sum_{k=0}^{n-1}x_k\exp\left({\pm i}{{2\pi jk}\over{n}}\right)\hbox{ for } j=0,1,\ldots,n-1
+\f]
+where \f$x_k\f$ are the complex data to be transformed, \f$\tilde{x}_j\f$ are the transformed data, and the sign
+of \f$\pm\f$ determines the direction of the transform: \f$-\f$ for forward and \f$+\f$ for backward. Note that
+the user must provided the scaling factor.  Typically, the scale is set to 1 for forward transforms, and
+\f${{1}\over{N}}\f$ for backwards transforms.
+
+The definition of a complex 2D DFT used by clFFT is given by:
+\f[
+{\tilde{x}}_{jk} = {{1}\over{scale}}\sum_{q=0}^{m-1}\sum_{r=0}^{n-1}x_{rq}\exp\left({\pm i} {{2\pi jr}\over{n}}\right)\exp\left({\pm i}{{2\pi kq}\over{m}}\right)
+\f]
+for \f$j=0,1,\ldots,n-1\hbox{ and } k=0,1,\ldots,m-1\f$, where \f$x_{rq}\f$ are the complex data to be transformed,
+\f$\tilde{x}_{jk}\f$ are the transformed data, and the sign of \f$\pm\f$ determines the direction of the
+transform.  Typically, the scale is set to 1 for forwards transforms and \f${{1}\over{M \cdot N}}\f$ for backwards transforms.
+
+The definition of a complex 3D DFT used by clFFT is given by:
+\f[
+\tilde{x}_{jkl} = {{1}\over{scale}}\sum_{s=0}^{p-1}\sum_{q=0}^{m-1}\sum_{r=0}^{n-1}
+x_{rqs}\exp\left({\pm i} {{2\pi jr}\over{n}}\right)\exp\left({\pm i}{{2\pi kq}\over{m}}\right)\exp\left({\pm i}{{2\pi ls}\over{p}}\right)
+\f]
+for \f$j=0,1,\ldots,n-1\hbox{ and } k=0,1,\ldots,m-1\hbox{ and } l=0,1,\ldots,p-1\f$, where \f$x_{rqs}\f$ are the complex data
+to be transformed, \f$\tilde{x}_{jkl}\f$ are the transformed data, and the sign of \f$\pm\f$ determines the direction of the
+transform.  Typically, the scale is set to 1 for forwards transforms and \f${{1}\over{M \cdot N \cdot P}}\f$ for backwards transforms.
+
+@subsection InitLibrary Setup and Teardown of clFFT
+clFFT is initialized by a call to \c clfftSetup(), which must be called before any other API exported
+from clFFT. This allows the library to create resources used to manage the plans that are created and
+destroyed by the user. This API also takes a structure \c clfftInitSetupData that is initialized by the
+client to control the behavior of the library. The corresponding \c clfftTeardown() method must be called
+by the client when it is done using the library. This instructs clFFT to release all resources, including
+any acquired references to any OpenCL objects that may have been allocated or passed to it through the
+API.
+
+@subsection ThreadSafety Thread safety
+The clFFT API is designed to be thread-safe. It is safe to create plans from multiple threads, and to
+destroy those plans in separate threads. Multiple threads can call \c clfftEnqueueTransform() to place work
+into a command queue at the same time. clFFT does not provide a single-threaded version of the library.
+It is expected that the overhead of the synchronization mechanisms inside of clFFT thread safe is minor.
+
+Currently, multi-device operation must be managed by the user. OpenCL contexts can be created that are
+associated with multiple devices, but clFFT only uses a single device from that context to transform
+the data. Multi-device operation can be managed by the user by creating multiple contexts, where each
+context contains a different device, and the user is responsible for scheduling and partitioning the work
+across multiple devices and contexts.
+
+@subsection MajorFormat Row Major formats
+clFFT expects all multi-dimensional input passed to it to be in row-major format. This is compatible
+with C-based languages. However, clFFT is very flexible in the input and output data organization it
+accepts by allowing the user to specify a stride for each dimension. This feature can be used to process
+data in column major arrays, and other non-contiguous data formats. See \ref clfftSetPlanInStride and
+\ref clfftSetPlanOutStride.
+
+@subsection Object OpenCL object creation
+OpenCL objects, such as contexts, \c cl_mem buffers, and command queues, are the responsibility of the
+user application to allocate and manage. All of the clFFT interfaces that must interact with OpenCL
+objects take those objects as references through the API. Specifically, the plan creation function
+@ref clfftCreateDefaultPlan() takes an OpenCL context as a parameter reference, increments the reference
+count on that object, and keeps the object alive until the corresponding plan has been destroyed through
+a call to @ref clfftDestroyPlan().
+
+@subsection FlushQueue Flushing of command queues
+The clFFT API operates asynchronously, and with the exception of thread safety locking with multiple
+threads, all APIs return immediately. Specifically, the @ref clfftEnqueueTransform() API does not
+explicitly flush the command queues that are passed by reference to it; it pushes the transform work onto the
+command queues and returns the modified queues to the client. The client is free to issue its own blocking
+logic, using OpenCL synchronization mechanisms, or push further work onto the queue to continue processing.
+
+@section clFFTPlans clFFT Plans
+
+A plan is the collection of (almost) all of the parameters needed to specify an FFT computation.
+This includes:
+<ul>
+<li> What OpenCL context executes the transform?
+<li> Is this a 1D, 2D or 3D transform?
+<li> What are the lengths or extents of the data in each dimension?
+<li> How many datasets are being transformed?
+<li> What is the data precision?
+<li> Should a scaling factor be applied to the transformed data?
+<li> Does the output transformed data replace the original input data in the same buffer (or
+buffers), or is the output data written to a different buffer (or buffers).
+<li> How is the input data stored in its data buffers?
+<li> How is the output data stored in its data buffers?
+</ul>
+
+The plan does not include:
+<ul>
+<li> The OpenCL handles to the input and output data buffers.
+<li> The OpenCL handle to a temporary scratch buffer (if needed).
+<li> Whether to execute a forward or reverse transform.
+</ul>
+These are specified when the plan is executed.
+
+@subsection Default Default Plan Values
+
+When a new plan is created by calling @ref clfftCreateDefaultPlan, its parameters are initialized as
+follows:
+
+<ul>
+<li> Dimensions: as provided by the caller.
+<li> Lengths: as provided by the caller.
+<li> Batch size: 1.
+<li> Precision: \c CLFFT_SINGLE.
+<li> Scaling factors:
+    <ol>
+    <li> For the forward transform, the default is 1.0, or no scale factor is applied.
+    <li> For the reverse transform, the default is 1.0 / P, where P is the product of the FFT lengths.
+    </ol>
+<li> Location: \c CLFFT_INPLACE.
+<li> Input layout: \c CLFFT_COMPLEX_INTERLEAVED.
+<li> Input strides: the strides of a multidimensional array of the lengths specified, where the data is
+compactly stored using the row-major convention.
+<li> Output layout: \c CLFFT_COMPLEX_INTERLEAVED.
+<li> Output strides: same as input strides.
+</ul>
+
+Writing client programs that depend on these initial values is <b> not </b> recommended.
+
+@subsection EnumLayout Supported Memory Layouts
+There are two main families of Discrete Fourier Transform (DFT):
+<ul>
+<li> Routines for the transformation of complex data. clFFT supports two layouts to store complex numbers:
+a 'planar' format, where the real and imaginary components are kept in separate arrays:
+<ol>
+	<li> Buffer1: \c RRRRR
+	<li> Buffer2: \c IIIII
+</ol>
+and an interleaved format, where the real and imaginary components are stored as contiguous pairs:
+<ol>
+	<li> Buffer1: \c RIRIRIRIRIRI
+</ol>
+<li> Routines for the transformation of real to complex data and vice versa; clFFT provides enums to define
+these formats. For transforms involving real data, there are two possibilities:
+<ul>
+<li> Real data being subject to forward FFT transform that results in complex
+data.
+<li> Complex data being subject to backward FFT transform that results in
+real data. See the Section "FFTs of Real Data".
+</ul>
+</ul>
+
+@subsubsection DistanceStridesandPitches Strides and Distances
+For one-dimensional data, if clStrides[0] = strideX = 1, successive elements in the first dimension are stored contiguously
+in memory. If strideX is an integral value greater than 1, gaps in memory exist between each element of
+the vectors.
+
+For multi-dimensional data, if clStrides[1] = strideY = LenX for 2 dimensional data and clStrides[2] = strideZ
+= LenX*LenY for 3 dimensional data, no gaps exist in memory between each element, and all vectors are
+stored tightly packed in memory. Here, LenX, LenY, and LenZ denote the transform lengths clLengths[0],
+clLengths[1], and clLengths[2], respectively, which are used to set up the plan.
+
+By specifying non-default strides, it is possible to process either
+row-major or column-major arrays. Data can be extracted from arrays of structures. Almost any regular
+data storage pattern can be accommodated.
+
+Distance is the amount of memory that exists between corresponding elements
+in an FFT primitive in a batch. Distance is measured in the units of the FFT
+primitive; complex data measures in complex units, and real data measures in
+real data. Stride between tightly packed elements is 1 in either case. Typically,
+one can measure the distance between any two elements in a batch primitive,
+be it 1D, 2D, or 3D data. For tightly packed data, the distance between FFT
+primitives is the size of the FFT primitive, such that dist=LenX for 1D data,
+dist=LenX*LenY for 2D data, and dist=LenX*LenY*LenZ for 3D data. It is
+possible to set the distance of a plan to be less than the size of the FFT vector;
+most often 1 for this case. When computing a batch of 1D FFT vectors, if
+distance == 1, and strideX == length( vector ), a transposed output is produced
+for a batch of 1D vectors. It is left to the user to verify that the distance and
+strides are valid (not intersecting); if not valid, undefined results can occur.
+
+A simple example is to perform a 1D length 4096 on each row of an array of 1024 rows x 4096 columns of
+values stored in a column-major array, such as a FORTRAN program might provide. (This would be equivalent
+to a C or C++ program that had an array of 4096 rows x 1024 columns stored in a row-major manner, and
+you wanted to perform a 1-D length 4096 transform on each column.) In this case, specify the strides
+[1024, 1].
+
+For a more complex example, an input buffer contained a raster grid of 1024 x 1024 monochrome pixel
+values, and you want to compute a 2D FFT for each 64 x 64 subtile of the grid. Specifying strides
+allows you to treat each horizontal band of 1024 x 64 pixels as an array of 16 64 x 64 matrixes,
+and process an entire band with a single call to @ref clfftEnqueueTransform. (Specifying strides is not
+quite flexible enough to transform the entire grid of this example with a single kernel execution.)
+It is possible to create a Plan to compute arrays of 64 x 64 2D FFTs, then specify three strides:
+[1, 1024, 64]. The first stride, 1, indicates that the rows of each matrix are stored consecutively;
+the second stride, 1024, gives the distance between rows, and the third stride, 64, defines the
+distance from matrix to matrix. Then call @ref clfftEnqueueTransform 16 times: once for each
+horizontal band of pixels.
+
+@subsection EnumPrecision Supported Precisions in clFFT
+Both \c CLFFT_SINGLE and \c CLFFT_DOUBLE precisions are supported by the library
+for all supported radices. With both of these enums the host computer's math
+functions are used to produce tables of sines and cosines for use by the OpenCL
+kernel.
+
+Both \c CLFFT_SINGLE_FAST and \c CLFFT_DOUBLE_FAST are meant to generate faster
+kernels with reduced accuracy, but are disabled in the current build..
+
+See @ref clfftPrecision, @ref clfftSetPlanPrecision, and @ref clfftGetPlanPrecision.
+
+@subsection FftDirection clfftDirection
+The direction of the transform is not baked into the plan; the same plan can be used to specify both forward
+and backward transforms. Instead, @ref clfftDirection is passed as a parameter into @ref clfftEnqueueTransform.
+
+@subsection EnumResultLocation In-Place and Out-of-Place
+The clFFT API supports both in-place and out-of-place transforms. With inplace
+transforms, only input buffers are provided to the @ref clfftEnqueueTransform() API,
+and the resulting data is written in the same buffers, overwriting the input data.
+With out-of-place transforms, distinct output buffers are provided to the
+@ref clfftEnqueueTransform() API, and the inputdata is preserved.
+In-place transforms require that the \c cl_mem objects the client
+creates have both \c read and \c write permissions. This is given in the nature of the
+in-place algorithm. Out-of-place transforms require that the destination buffers
+have \c read and \c write permissions, but input buffers can still be created with
+read-only permissions. This is a clFFT requirement because internally the
+algorithms may go back and forth between the destination buffers and internally
+allocated temp buffers. For out-of-place transforms, clFFT never writes back
+to the input buffers.
+
+@subsection clFFTEff Batches
+The efficiency of clFFT is improved by utilizing transforms in batches. Sending
+as much data as possible in a single transform call leverages the parallel
+compute capabilities of OpenCL devices (and GPU devices in particular), and
+minimizes the penalty of transfer overhead. It's best to think of an OpenCL device
+as a high-throughput, high-latency device. Using a networking analogy as an
+example, it's similar to having a massively high-bandwidth pipe with very high
+ping response times. If the client is ready to send data to the device for compute,
+it should be sent in as few API calls as possible. This can be done by batching.
+clFFT plans have a parameter to describe the number of transforms being
+batched: @ref clfftSetPlanBatchSize(), and to describe how those batches are
+laid out and spaced in memory: @ref clfftSetPlanDistance(). 1D, 2D, or 3D
+transforms can be batched.
+
+@section Outline  Using clFFT on a Client Application
+
+To perform FFT calculations using clFFT, the client program must:
+<ul>
+	<li> Initialize the library by calling @ref clfftSetup. </li>
+	<li> For each distinct type of FFT needed: </li>
+	<ol>
+		<li> Create an FFT Plan object. This usually is done by calling the factory function @ref clfftCreateDefaultPlan.
+		Some of the most fundamental parameters are specified at this time, and others assume default values.  The OpenCL
+		context must be provided when the plan is created; it cannot be changed. Another way is to call @ref clfftCopyPlan.
+		In either case, the function returns an opaque handle to the Plan object. </li>
+		<li> Complete the specification of all of the Plan parameters by calling the various parameter-setting functions,
+		\c clAmdFFtSet_____. </li>
+		<li> Optionally, "bake" or finalize the plan, calling @ref clfftBakePlan. This signals to the library the end
+		of the specification phase, and causes it to generate and compile the exact OpenCL kernels needed to perform the
+		specified FFT on the OpenCL device provided.
+
+		At this point, all performance-enhancing optimizations are applied, possibly including executing benchmark kernels
+		on the OpenCL device context in order to maximize runtime performance.
+
+		Although this step is optional, most users probably want to include it so that they can control when this work is
+		done. Usually, this time consuming step is done when the application is initialized. If the user does not call
+		@ref clfftBakePlan, this work is done during the first call to @ref clfftEnqueueTransform.
+		</li>
+	</ol>
+
+	<li> The OpenCL FFT kernels now are ready to execute as many times as needed. </li>
+	<ol>
+		<li>  Call @ref clfftEnqueueTransform. At this point, specify whether you want to execute a forward or reverse
+		transform; also, provide the OpenCL \c cl_mem handles for the input buffer(s), output buffer(s)--unless you want
+		the transformed data to overwrite the input buffers, and (optionally) scratch buffer.
+
+		@ref clfftEnqueueTransform performs one or more calls to the OpenCL function clEnqueueNDRangeKernel.
+		Like clEnqueueNDRangeKernel, @ref clfftEnqueueTransform is a non-blocking call. The commands to
+		execute the FFT compute kernel(s) are added to the OpenCL context queue to be executed asynchronously.
+		An OpenCL event handle is returned to the caller. If multiple NDRangeKernel operations are queued,
+		the final event handle is returned.
+		</li>
+		<li>  The application now can add additional OpenCL tasks to the OpenCL context's queue. For example, if the
+		next step in the application's process is to apply a filter to the transformed data, the application would generate
+		that clEnqueueNDRangeKernel, specifying the transform's output buffer(s) as the input to the filter kernel,
+		and providing the transform's event handle to ensure proper synchronization. </li>
+		<li>  If the application must access the transformed data directly, it must call one of the OpenCL functions
+		for synchronizing the host computer's execution with the OpenCL device (for example: clFinish()). </li>
+	</ol>
+	<li> Terminate the library by calling @ref clfftTeardown.
+</ul>
+
+@section RealFFT  FFTs of Real Data
+
+When real data is subject to DFT transformation, the resulting complex output
+follows a special property. About half of the output is redundant because they are
+complex conjugates of the other half. This is called the Hermitian redundancy.
+So, for space and performance considerations, it is only necessary to store the
+non-redundant part of the data. Most FFT libraries use this property to offer
+specific storage layouts for FFTs involving real data. clFFT provides 3
+enumerated types to deal with real data FFTs:
+
+<ul>
+	<li> \c CLFFT_REAL
+	<li> \c CLFFT_HERMITIAN_INTERLEAVED
+	<li> \c CLFFT_HERMITIAN_PLANAR
+</ul>
+
+The first enum specifies that the data is purely real. This can be used to feed
+real input or get back real output. The second and third enums specify layouts
+for storing FFT output. They are similar to the corresponding full complex enums
+in the way they store real and imaginary components. The difference is that they
+store only about half of the complex output. Client applications can do just a
+forward transform and analyze the output. Or they can do some processing of
+the output and do a backward transform to get back real data. This is illustrated
+in the following figure.
+
+@image html realfft_fwdinv.jpg "Forward and Backward Transform Processes"
+
+Let us consider a 1D real FFT of length N. The full output looks as shown in
+following figure.
+
+@image html realfft_1dlen.jpg "1D Real FFT of Length N"
+
+Here, C* denotes the complex conjugate of. Since the values at indices greater
+than N/2 can be deduced from the first half of the array, clFFT stores data
+only up to the index N/2. This means that the output contains only 1 + N/2
+complex elements, where the division N/2 is rounded down. Examples for even
+and odd lengths are given below.
+
+Example for N = 8 is shown in following figure.
+
+@image html realfft_ex_n8.jpg "Example for N = 8"
+
+Example for N = 7 is shown in following figure.
+
+@image html realfft_ex_n7.jpg "Example for N = 7"
+
+
+For length 8, only (1 + 8/2) = 5 of the output complex numbers are stored, with
+the index ranging from 0 through 4. Similarly for length 7, only (1 + 7/2) = 4 of
+the output complex numbers are stored, with the index ranging from 0 through 3.
+
+For 2D and 3D FFTs, the FFT length along the least dimension is used to
+compute the (1 + N/2) value. This is because the FFT along the least dimension
+is what is computed first and is logically a real-to-hermitian transform. The FFTs
+along other dimensions are computed afterwards; they are simply 'complex-tocomplex'
+transforms. For example, assuming clLengths[2] is used to set up a 2D
+real FFT, let N1 = clLengths[1], and N0 = clLengths[0]. The output FFT has
+N1*(1 + N0/2) complex elements. Similarly, for a 3D FFT with clLengths[3] and
+N2 = clLengths[2], N1 = clLengths[1], and N0 = clLengths[0], the output has
+N2*N1*(1 + N0/2) complex elements.
+
+@subsection RealModes Supported Modes
+
+Out-of-place transforms:
+
+<ul>
+	<li> \c CLFFT_REAL to \c CLFFT_HERMITIAN_INTERLEAVED
+	<li> \c CLFFT_REAL to \c CLFFT_HERMITIAN_PLANAR
+	<li> \c CLFFT_HERMITIAN_INTERLEAVED to \c CLFFT_REAL
+	<li> \c CLFFT_ CLFFT_HERMITIAN_PLANAR to \c CLFFT_REAL
+</ul>
+
+In-place transforms:
+
+<ul>
+	<li> \c CLFFT_REAL to \c CLFFT_HERMITIAN_INTERLEAVED
+	<li> \c CLFFT_HERMITIAN_INTERLEAVED to \c CLFFT_REAL
+</ul>
+
+
+@subsection RealExamples Examples
+
+The following pages provide figures and examples to explain in detail the real
+FFT features of this library.
+
+@image html realfft_expl_01.jpg "1D FFT - Real to Hermitian"
+@image html realfft_expl_02.jpg "1D FFT - Real to Hermitian, Example 1"
+@image html realfft_expl_03.jpg "1D FFT - Real to Hermitian, Example 2"
+@image html realfft_expl_04.jpg "1D FFT - Real to Hermitian, Example 3"
+@image html realfft_expl_05.jpg "1D FFT - Hermitian to Real"
+@image html realfft_expl_06.jpg "1D FFT - Hermitian to Real, Example"
+@image html realfft_expl_07.jpg "2D FFT - Real to Hermitian In Place"
+@image html realfft_expl_08.jpg "2D FFT - Real to Hermitian, Example"
+
+ */
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
new file mode 100644
index 00000000..ec87b2d4
--- /dev/null
+++ b/src/library/plan.cpp
@@ -0,0 +1,3302 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+////////////////////////////////////////////
+
+// clfft.plan.cpp : Defines the entry point for the console application.
+//
+
+#include "stdafx.h"
+#include <math.h>
+#include "private.h"
+#include "repo.h"
+#include "plan.h"
+#include "generator.stockham.h"
+#include "../include/convenienceFunctions.h"
+
+using std::vector;
+
+const std::string beginning_of_binary( "<[£_beginning_of_binary_£]>" );
+const std::string end_of_binary( "<[£_I_may_be_a_sorry_case,_but_I_don't_write_jokes_in_base_13_£]>" );
+const std::string end_of_file( "<[£_You're_off_the_edge_of_the_map,_mate._Here_there_be_monsters_£]>" );
+
+//	This operator is used to sort FFTKernelGenKeyParams structs inside of a std::map
+bool operator<( const FFTKernelGenKeyParams& lhs, const FFTKernelGenKeyParams& rhs)
+{
+	int ret = ::memcmp( &lhs, &rhs, sizeof( FFTKernelGenKeyParams ) );
+
+	if( ret < 0 )
+		return true;
+
+	return false;
+}
+
+clfftStatus	clfftCreateDefaultPlan( clfftPlanHandle* plHandle, cl_context context, const clfftDim dim,
+						const size_t* clLengths )
+{
+	if( clLengths == NULL )
+		return CLFFT_INVALID_HOST_PTR;
+
+	size_t lenX = 1, lenY = 1, lenZ = 1;
+
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			//	Minimum length size is 1
+			if( clLengths[ DimX ] == 0 )
+				return CLFFT_INVALID_ARG_VALUE;
+
+			if( !IsASupportedLength( clLengths[ DimX ] ) )
+			{
+				return CLFFT_NOTIMPLEMENTED;
+			}
+
+			lenX = clLengths[ DimX ];
+		}
+			break;
+		case CLFFT_2D:
+		{
+			//	Minimum length size is 1
+			if( clLengths[ DimX ] == 0 || clLengths[ DimY ] == 0 )
+				return CLFFT_INVALID_ARG_VALUE;
+
+			if( !IsASupportedLength( clLengths[ DimX ] ) || !IsASupportedLength( clLengths[ DimY ] ) )
+			{
+				return CLFFT_NOTIMPLEMENTED;
+			}
+
+			lenX = clLengths[ DimX ];
+			lenY = clLengths[ DimY ];
+		}
+			break;
+		case CLFFT_3D:
+		{
+			//	Minimum length size is 1
+			if( clLengths[ DimX ] == 0 || clLengths[ DimY ] == 0 || clLengths[ DimZ ] == 0 )
+				return CLFFT_INVALID_ARG_VALUE;
+
+			if( !IsASupportedLength( clLengths[ DimX ] ) || !IsASupportedLength( clLengths[ DimY ] ) ||
+				!IsASupportedLength( clLengths[ DimZ ] ))
+			{
+				return CLFFT_NOTIMPLEMENTED;
+			}
+
+			lenX = clLengths[ DimX ];
+			lenY = clLengths[ DimY ];
+			lenZ = clLengths[ DimZ ];
+		}
+			break;
+		default:
+			return CLFFT_NOTIMPLEMENTED;
+			break;
+	}
+
+	FFTPlan* fftPlan	= NULL;
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	OPENCL_V( fftRepo.createPlan( plHandle, fftPlan ), _T( "fftRepo.insertPlan failed" ) );
+
+	fftPlan->baked			= false;
+	fftPlan->dim			= dim;
+	fftPlan->placeness		= CLFFT_INPLACE;
+	fftPlan->inputLayout	= CLFFT_COMPLEX_INTERLEAVED;
+	fftPlan->outputLayout	= CLFFT_COMPLEX_INTERLEAVED;
+	fftPlan->precision		= CLFFT_SINGLE;
+	fftPlan->context		= context;
+	fftPlan->forwardScale	= 1.0;
+	fftPlan->backwardScale	= 1.0 / static_cast< double >( lenX * lenY * lenZ );
+	fftPlan->batchsize		= 1;
+
+	fftPlan->gen			= Stockham; //default setting
+
+	OPENCL_V(fftPlan->SetEnvelope(), _T("SetEnvelope failed"));
+
+	clRetainContext( fftPlan->context );
+
+	/////////////////////////////////////////////////////////////////
+	// Detect OpenCL devices
+	/////////////////////////////////////////////////////////////////
+	// First, get the size of device list data
+	size_t deviceListSize;
+	OPENCL_V( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize ),
+		"Getting device array size ( ::clGetContextInfo() )" );
+
+	//	Allocate memory for the devices
+	fftPlan->devices.resize( deviceListSize / sizeof( cl_device_id ) );
+
+	/* Now, get the device list data */
+	OPENCL_V( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, &fftPlan->devices[ 0 ], NULL ),
+		"Getting device array ( ::clGetContextInfo() )" );
+
+	//	Need to devise a way to generate better names
+	tstringstream	tstream;
+	tstream << _T( "plan_" ) << *plHandle;
+
+	lockRAII* planLock	= NULL;
+	OPENCL_V( fftRepo.getPlan( *plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	planLock->setName( tstream.str( ) );
+
+	//	Set the lengths and default strides/pitches depending on the dim that the user passes to us
+	switch( dim )
+	{
+		case CLFFT_1D:
+		{
+			fftPlan->length.push_back( lenX );
+			fftPlan->inStride.push_back( 1 );
+			fftPlan->outStride.push_back( 1 );
+			fftPlan->iDist		= lenX;
+			fftPlan->oDist		= lenX;
+		}
+			break;
+		case CLFFT_2D:
+		{
+			fftPlan->length.push_back( lenX );
+			fftPlan->length.push_back( lenY );
+			fftPlan->inStride.push_back( 1 );
+			fftPlan->inStride.push_back( lenX );
+			fftPlan->outStride.push_back( 1 );
+			fftPlan->outStride.push_back( lenX );
+			fftPlan->iDist		= lenX*lenY;
+			fftPlan->oDist		= lenX*lenY;
+		}
+			break;
+		case CLFFT_3D:
+		{
+			fftPlan->length.push_back( lenX );
+			fftPlan->length.push_back( lenY );
+			fftPlan->length.push_back( lenZ );
+			fftPlan->inStride.push_back( 1 );
+			fftPlan->inStride.push_back( lenX );
+			fftPlan->inStride.push_back( lenX*lenY );
+			fftPlan->outStride.push_back( 1 );
+			fftPlan->outStride.push_back( lenX );
+			fftPlan->outStride.push_back( lenX*lenY );
+			fftPlan->iDist		= lenX*lenY*lenZ;
+			fftPlan->oDist		= lenX*lenY*lenZ;
+		}
+			break;
+	}
+
+	return	CLFFT_SUCCESS;
+}
+
+//	Read the kernels that this plan uses from file, and store into the plan
+clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+
+	//	Logic to define a sensible filename
+	const std::string kernelPrefix( "clfft.kernel." );
+	std::string generatorName;
+	std::stringstream kernelPath;
+
+	switch( gen )
+	{
+		case Stockham:		generatorName = "Stockham"; break;
+		case Transpose:		generatorName = "Transpose"; break;
+	}
+
+	kernelPath << kernelPrefix << generatorName << plHandle << ".cl";
+
+	//	Logic to write string contents out to file
+	tofstreamRAII< std::ofstream, std::string > kernelFile( kernelPath.str( ) );
+	if( !kernelFile.get( ) )
+	{
+		std::cerr << "Failed to open kernel file for writing: " << kernelPath.str( ) << std::endl;
+		return CLFFT_FILE_CREATE_FAILURE;
+	}
+
+	std::string kernel;
+	OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel ), _T( "fftRepo.getProgramCode failed." ) );
+
+	kernelFile.get( ) << kernel << std::endl;
+
+	return	CLFFT_SUCCESS;
+}
+
+// **************** TODO TODO TODO ***********************
+// Making CompileKernels function take in command queue parameter so we can build for 1 particular device only;
+// this may not be desirable for persistent plans, where we may have to compile for all devices in the context;
+// make changes appropriately before enabling persistent plans and then remove this comment
+
+//	Compile the kernels that this plan uses, and store into the plan
+clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlanHandle plHandle, const clfftGenerators gen, FFTPlan* fftPlan )
+{
+	cl_int status = 0;
+	size_t deviceListSize = 0;
+
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+
+
+	// create a cl program executable for the device associated with command queue
+	// Get the device
+	cl_device_id q_device;
+	clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &q_device, NULL);
+
+	FFTKernelGenKeyParams fftParams;
+	OPENCL_V( fftPlan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") );
+
+	cl_program program;
+	if( fftRepo.getclProgram( gen, fftParams, program ) == CLFFT_INVALID_PROGRAM )
+	{
+		//	If the user wishes us to write the kernels out to disk, we do so
+		if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS )
+		{
+			OPENCL_V( WriteKernel( plHandle, gen, fftParams ), _T( "WriteKernel failed." ) );
+		}
+
+		std::string programCode;
+		OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode ), _T( "fftRepo.getProgramCode failed." ) );
+
+		const char* source = programCode.c_str();
+		program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status );
+		OPENCL_V( status, _T( "clCreateProgramWithSource failed." ) );
+
+		// create a cl program executable for the device associated with command queue
+
+#if defined(DEBUGGING)
+		status = clBuildProgram( program, 1, &q_device, "-g -cl-opt-disable", NULL, NULL); // good for debugging kernels
+
+// if you have trouble creating smbols that GDB can pick up to set a breakpoint after kernels are loaded into memory
+// this can be used to stop execution to allow you to set a breakpoint in a kernel after kernel symbols are in memory.
+#ifdef DEBUG_BREAK_GDB
+		__debugbreak();
+#endif
+#else
+		status = clBuildProgram( program, 1, &q_device, NULL, NULL, NULL);
+#endif
+		if( status != CL_SUCCESS )
+		{
+			if( status == CL_BUILD_PROGRAM_FAILURE )
+			{
+				size_t buildLogSize = 0;
+				OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
+						_T( "clGetProgramBuildInfo failed" ) );
+
+				vector< char > buildLog( buildLogSize );
+				::memset( &buildLog[ 0 ], 0x0, buildLogSize );
+
+				OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
+						_T( "clGetProgramBuildInfo failed" ) );
+
+				std::cerr << "\n\t\t\tBUILD LOG\n";
+				std::cerr << "************************************************\n";
+				std::cerr << &buildLog[ 0 ] << std::endl;
+				std::cerr << "************************************************\n";
+			}
+
+			OPENCL_V( status, _T( "clBuildProgram failed" ) );
+		}
+
+		fftRepo.setclProgram( gen, fftParams, program );
+
+		// For real transforms we comppile either forward or backward kernel
+		bool r2c_transform = (fftParams.fft_inputLayout == CLFFT_REAL);
+		bool c2r_transform = (fftParams.fft_outputLayout == CLFFT_REAL);
+		bool real_transform = (gen == Copy) ? true : (r2c_transform || c2r_transform);
+		bool h2c = (gen == Copy) && ((fftParams.fft_inputLayout == CLFFT_HERMITIAN_PLANAR) || (fftParams.fft_inputLayout == CLFFT_HERMITIAN_INTERLEAVED));
+		bool c2h = (gen == Copy) && ((fftParams.fft_outputLayout == CLFFT_HERMITIAN_PLANAR) || (fftParams.fft_outputLayout == CLFFT_HERMITIAN_INTERLEAVED));
+
+		// get a kernel object handle for a kernel with the given name
+		cl_kernel kernel;
+		if( (!real_transform) || r2c_transform || c2h )
+		{
+			if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL )
+			{
+				std::string entryPoint;
+				OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) );
+
+				kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
+				OPENCL_V( status, _T( "clCreateKernel failed" ) );
+
+				fftRepo.setclKernel( program, CLFFT_FORWARD, kernel );
+			}
+		}
+
+		if( (!real_transform) || c2r_transform || h2c )
+		{
+			if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL )
+			{
+				std::string entryPoint;
+				OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) );
+
+				kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
+				OPENCL_V( status, _T( "clCreateKernel failed" ) );
+
+				fftRepo.setclKernel( program, CLFFT_BACKWARD, kernel );
+			}
+		}
+	}
+
+//TODO caching kernel binaries for later reload
+#if 0
+	// figure out number of devices and the sizes of the binary for each device.
+	OPENCL_V( clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof(fftPlan->number_of_devices), &(fftPlan->number_of_devices), NULL ), _T("CompileKernels(): error getting number of devices") );
+
+
+	// get the sizes of the different binaries
+	fftPlan->ResetBinarySizes();
+	OPENCL_V( clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * fftPlan->number_of_devices, fftPlan->binary_sizes.get(), NULL ), _T("CompileKernels(): error getting binary sizes") );
+
+	// we need a list of naked pointers to all of the binaries for OpenCL
+	std::unique_ptr<char*[]> naked_binary_pointers( new char*[fftPlan->number_of_devices] );
+
+	// make space for all of the generated binaries
+	for( int i = 0; i < fftPlan->number_of_devices; i++ )
+	{
+		// this is our permanent storage place for the binaries
+		fftPlan->binaries.push_back( std::unique_ptr<char[]>(new char[fftPlan->binary_sizes[i]] ) );
+		// and we need this second copy of it for OpenCL
+		naked_binary_pointers[i] = fftPlan->binaries[i].get();
+	}
+
+	// copy all of the generated binaries over
+	OPENCL_V( clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*) * fftPlan->number_of_devices, naked_binary_pointers.get(), NULL ), _T("CompileKernels(): error getting program binaries") );
+#endif
+
+	return	CLFFT_SUCCESS;
+}
+
+//TODO caching kernel binaries for later reload
+#if 0
+//	Compile the kernels that this plan uses, and store into the plan
+clfftStatus LoadCompiledKernels( const clfftPlanHandle plHandle, const clfftGenerators gen, FFTPlan* plan )
+{
+	// if there are no devices, there are not any kernels to load
+	if( plan->number_of_devices == 0 )
+		return CLFFT_SUCCESS;
+
+	FFTRepo& repo = FFTRepo::getInstance( );
+
+	FFTKernelGenKeyParams fftParams;
+	OPENCL_V( plan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") );
+
+	cl_program program;
+	if( repo.getclProgram( gen, fftParams, program ) == CLFFT_INVALID_PROGRAM )
+	{
+		//if( repo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS )
+		//{
+		//	OPENCL_V( WriteKernel( plHandle, gen, fftParams ), _T( "WriteKernel failed." ) );
+		//	//TODO there's no source to spit out, but we should consider giving the user a helpful message
+		//	// such as "there's no source to output -- kernel binaries loaded from file"
+		//}
+
+		std::unique_ptr<cl_int[]> binary_status( new cl_int[plan->number_of_devices] );
+		cl_int error_code;
+
+		std::unique_ptr<const unsigned char*[]> binaries( new const unsigned char*[plan->number_of_devices] );
+		for( int i = 0; i < plan->number_of_devices; i++ )
+		{
+			binaries[i] = reinterpret_cast<const unsigned char*>(plan->binaries[0].get());
+		}
+
+		if( plan->number_of_devices > 0 )
+		{
+			program = clCreateProgramWithBinary( plan->context,
+				(cl_uint)plan->number_of_devices, &plan->devices[0], &plan->binary_sizes[0], &binaries[0],
+				binary_status.get(), &error_code);
+
+			cl_int status = 0;
+			// create a cl program executable for all the devices specified
+			status = clBuildProgram( program, 1, &plan->devices[0], NULL, NULL, NULL);
+
+			if( status != CL_SUCCESS )
+			{
+				if( status == CL_BUILD_PROGRAM_FAILURE )
+				{
+					size_t buildLogSize = 0;
+					OPENCL_V( clGetProgramBuildInfo( program, plan->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
+						_T( "clGetProgramBuildInfo failed" ) );
+
+					vector< char > buildLog( buildLogSize );
+					::memset( &buildLog[ 0 ], 0x0, buildLogSize );
+
+					OPENCL_V( clGetProgramBuildInfo( program, plan->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
+						_T( "clGetProgramBuildInfo failed" ) );
+
+					std::cerr << " \n\t\t\tBUILD LOG\n";
+					std::cerr << " ************************************************\n";
+					std::cerr << &buildLog[ 0 ] << std::endl;
+					std::cerr << " ************************************************\n";
+				}
+
+				OPENCL_V( status, _T( "clBuildProgram failed" ) );
+			}
+
+			repo.setclProgram( gen, fftParams, program );
+
+			// get a kernel object handle for a kernel with the given name
+			cl_kernel kernel;
+			if( repo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL )
+			{
+				kernel = clCreateKernel( program, "fft_fwd", &status );
+				OPENCL_V( status, _T( "clCreateKernel failed" ) );
+
+				repo.setclKernel( program, CLFFT_FORWARD, kernel );
+			}
+
+			if( repo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL )
+			{
+				kernel = clCreateKernel( program, "fft_back", &status );
+				OPENCL_V( status, _T( "clCreateKernel failed" ) );
+
+				repo.setclKernel( program, CLFFT_BACKWARD, kernel );
+			}
+
+			FFTKernelGenKeyParams params;
+			plan->GetKernelGenKey( params );
+			OPENCL_V( repo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
+		}
+	}
+
+	return CLFFT_SUCCESS;
+}
+#endif
+
+clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT,
+							void (CL_CALLBACK *pfn_notify)( clfftPlanHandle plHandle, void *user_data ), void* user_data )
+{
+	//	We do not currently support multi-GPU transforms
+	if( numQueues > 1 )
+		return CLFFT_NOTIMPLEMENTED;
+
+	//	Notification mechanism is not set up yet; BakePlan can be called recursively to decompose higher dimension FFT's into
+	//	arrays of 1d transforms, and this must be implemented to make only a single callback to the user.
+	if( pfn_notify != NULL )
+		return CLFFT_NOTIMPLEMENTED;
+
+	if( user_data != NULL )
+		return CLFFT_NOTIMPLEMENTED;
+
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftBakePlan" ) );
+
+	// if we have already baked the plan and nothing has changed since, we're done here
+	if( fftPlan->baked == true )
+	{
+		return CLFFT_SUCCESS;
+	}
+
+	//find product of lengths
+	size_t pLength = 1;
+	switch(fftPlan->dim)
+	{
+		case CLFFT_3D: pLength *= fftPlan->length[DimZ];
+		case CLFFT_2D: pLength *= fftPlan->length[DimY];
+		case CLFFT_1D: pLength *= fftPlan->length[DimX];
+	}
+
+	// upper bounds on transfrom lengths - address this in the next release
+	size_t SP_MAX_LEN = 1 << 24;
+	size_t DP_MAX_LEN = 1 << 22;
+	if((fftPlan->precision == CLFFT_SINGLE) && (pLength > SP_MAX_LEN)) return CLFFT_NOTIMPLEMENTED;
+	if((fftPlan->precision == CLFFT_DOUBLE) && (pLength > DP_MAX_LEN)) return CLFFT_NOTIMPLEMENTED;
+
+
+	// release buffers, as these will be created only in EnqueueTransform
+	if( NULL != fftPlan->intBuffer ) { OPENCL_V( clReleaseMemObject( fftPlan->intBuffer ), _T( "Failed to release internal temporary buffer" ) ); fftPlan->intBuffer = NULL; }
+	if( NULL != fftPlan->intBufferRC ) { OPENCL_V( clReleaseMemObject( fftPlan->intBufferRC ), _T( "Failed to release internal temporary buffer" ) ); fftPlan->intBufferRC = NULL; }
+	if( NULL != fftPlan->intBufferC2R ) { OPENCL_V( clReleaseMemObject( fftPlan->intBufferC2R ), _T( "Failed to release internal temporary buffer" ) ); fftPlan->intBufferC2R = NULL; }
+
+
+	if(fftPlan->dim == fftPlan->length.size() && fftPlan->gen != Transpose && fftPlan->gen != Copy) // confirm it is top-level plan (user plan)
+	{
+		if(fftPlan->placeness == CLFFT_INPLACE)
+		{
+			if( (fftPlan->inputLayout == CLFFT_HERMITIAN_PLANAR) || (fftPlan->outputLayout == CLFFT_HERMITIAN_PLANAR) )
+				return CLFFT_INVALID_PLAN;
+		}
+
+		// Make sure strides & distance are same for C-C transforms
+		if(fftPlan->placeness == CLFFT_INPLACE)
+		{
+			if( (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL) )
+			{
+				// check strides
+				for(size_t i=0; i<fftPlan->dim; i++)
+					if(fftPlan->inStride[i] != fftPlan->outStride[i])
+						return CLFFT_INVALID_PLAN;
+
+				// check distance
+				if(fftPlan->iDist != fftPlan->oDist)
+					return CLFFT_INVALID_PLAN;
+			}
+		}
+	}
+
+	if(fftPlan->gen == Copy)
+	{
+		OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
+		OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
+		fftPlan->baked		= true;
+		return	CLFFT_SUCCESS;
+	}
+
+//TODO caching kernel binaries for later reload
+#if 0
+	if( fftPlan->readFromFile == true )
+	{
+		OPENCL_V( LoadCompiledKernels( plHandle, fftPlan->gen, fftPlan ), _T( "LoadCompiledKernels() failed" ) );
+
+		// all of the plan compressing and subplan making should be done already,
+		// but we still need to make constant buffers
+		OPENCL_V( fftPlan->AllocateBuffers(), _T("AllocateBuffers() failed"));
+		fftPlan->ConstructAndEnqueueConstantBuffers( commQueueFFT );
+
+		if( fftPlan->planX )
+		{
+			OPENCL_V( clfftBakePlan( fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), "clfftBakePlan failed (planX)" );
+		}
+
+		if( fftPlan->planY )
+		{
+			OPENCL_V( clfftBakePlan( fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), "clfftBakePlan failed (planY)" );
+		}
+
+		if( fftPlan->planZ )
+		{
+			OPENCL_V( clfftBakePlan( fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), "clfftBakePlan failed (planZ)" );
+		}
+
+		fftPlan->baked = true;
+		return CLFFT_SUCCESS;
+	}
+#endif
+
+	bool rc = (fftPlan->inputLayout == CLFFT_REAL) || (fftPlan->outputLayout == CLFFT_REAL);
+
+	// Compress the plan by discarding length '1' dimensions
+	// decision to pick generator
+	if(fftPlan->dim == fftPlan->length.size() && fftPlan->gen != Transpose && !rc) // confirm it is top-level plan (user plan)
+	{
+		size_t dmnsn = fftPlan->dim;
+		bool pow2flag = true;
+
+		// switch case flows with no 'break' statements
+		switch(fftPlan->dim)
+		{
+		case CLFFT_3D:
+
+			if(fftPlan->length[DimZ] == 1)
+			{
+				dmnsn -= 1;
+				fftPlan-> inStride.erase(fftPlan-> inStride.begin() + 2);
+				fftPlan->outStride.erase(fftPlan->outStride.begin() + 2);
+				fftPlan->   length.erase(fftPlan->   length.begin() + 2);
+			}
+			else
+			{
+				if( !IsPo2(fftPlan->length[DimZ])) pow2flag=false;
+			}
+		case CLFFT_2D:
+
+			if(fftPlan->length[DimY] == 1)
+			{
+				dmnsn -= 1;
+				fftPlan-> inStride.erase(fftPlan-> inStride.begin() + 1);
+				fftPlan->outStride.erase(fftPlan->outStride.begin() + 1);
+				fftPlan->   length.erase(fftPlan->   length.begin() + 1);
+			}
+			else
+			{
+				if( !IsPo2(fftPlan->length[DimY])) pow2flag=false;
+			}
+
+		case CLFFT_1D:
+
+			if( (fftPlan->length[DimX] == 1) && (dmnsn > 1) )
+			{
+				dmnsn -= 1;
+				fftPlan-> inStride.erase(fftPlan-> inStride.begin());
+				fftPlan->outStride.erase(fftPlan->outStride.begin());
+				fftPlan->   length.erase(fftPlan->   length.begin());
+			}
+			else
+			{
+				if( !IsPo2(fftPlan->length[DimX])) pow2flag=false;
+			}
+		}
+
+		fftPlan->dim = (clfftDim)dmnsn;
+	}
+
+	// first time check transposed
+	if (fftPlan->transposed != CLFFT_NOTRANSPOSE && fftPlan->dim != CLFFT_2D &&
+		fftPlan->dim == fftPlan->length.size())
+		return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+
+	//	The largest vector we can transform in a single pass
+	//	depends on the GPU caps -- especially the amount of LDS
+	//	available
+	//
+	size_t Large1DThreshold = 0;
+
+	//First time check or see if LDS paramters are set-up.
+	if (fftPlan->uLdsFraction == 0)
+	{
+		switch( fftPlan->dim )
+		{
+		case CLFFT_1D:
+			{
+				if (fftPlan->length[0] < 32768 || fftPlan->length[0] > 1048576)
+					fftPlan->uLdsFraction = 8;
+				else
+					fftPlan->uLdsFraction = 4;
+
+				if (fftPlan->length[0] < 1024 )
+					fftPlan->bLdsComplex = true;
+				else
+					fftPlan->bLdsComplex = false;
+			}
+			break;
+		case CLFFT_2D:
+			{
+				fftPlan->uLdsFraction = 4;
+				fftPlan->bLdsComplex = false;
+			}
+			break;
+		case CLFFT_3D:
+			{
+				//for case 128*128*128 and 1024*128*128, fraction = 8 is faster.
+				fftPlan->uLdsFraction = 4;
+				fftPlan->bLdsComplex = false;
+			}
+			break;
+		}
+	}
+	OPENCL_V(fftPlan->GetMax1DLength (&Large1DThreshold), _T("GetMax1DLength failed"));
+	BUG_CHECK (Large1DThreshold > 1);
+
+	//	Verify that the data passed to us is packed
+	switch( fftPlan->dim )
+	{
+	case CLFFT_1D:
+		{
+			if ( fftPlan->length[0] > Large1DThreshold )
+			{
+				size_t clLengths[] = { 1, 1, 0 };
+				size_t in_1d, in_x, count;
+
+				BUG_CHECK (IsPo2 (Large1DThreshold))
+					//ARG_CHECK (IsPo2 (fftPlan->length[0]))
+
+					// see whether large1D_Xfactor are fixed or not
+					if (fftPlan->large1D_Xfactor == 0 )
+					{
+						if( IsPo2(fftPlan->length[0]) )
+						{
+							in_1d = BitScanF (Large1DThreshold);	// this is log2(LARGE1D_THRESHOLD)
+							in_x  = BitScanF (fftPlan->length[0]);	// this is log2(length)
+							BUG_CHECK (in_1d > 0)
+								count = in_x/in_1d;
+							if (count*in_1d < in_x)
+							{
+								count++;
+								in_1d = in_x / count;
+								if (in_1d * count < in_x) in_1d++;
+							}
+							clLengths[1] = (size_t)1 << in_1d;
+
+						}
+						else
+						{
+							// This array must be kept sorted in the ascending order
+							size_t supported[] = {	1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40,
+													45, 48, 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135,
+													144, 150, 160, 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288,
+													300, 320, 324, 360, 375, 384, 400, 405, 432, 450, 480, 486, 500, 512, 540,
+													576, 600, 625, 640, 648, 675, 720, 729, 750, 768, 800, 810, 864, 900, 960,
+													972, 1000, 1024, 1080, 1125, 1152, 1200, 1215, 1250, 1280, 1296, 1350, 1440,
+													1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875, 1920, 1944, 2000, 2025, 2048,
+													2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592, 2700, 2880, 2916, 3000,
+													3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840, 3888, 4000, 4050, 4096 };
+
+							size_t lenSupported = sizeof(supported)/sizeof(supported[0]);
+							size_t maxFactoredLength = (supported[lenSupported-1] < Large1DThreshold) ? supported[lenSupported-1] : Large1DThreshold;
+
+							size_t halfPowerLength = (size_t)1 << ( (StockhamGenerator::CeilPo2(fftPlan->length[0]) + 1) / 2 );
+							size_t factoredLengthStart =  (halfPowerLength < maxFactoredLength) ? halfPowerLength : maxFactoredLength;
+
+							size_t indexStart = 0;
+							while(supported[indexStart] < factoredLengthStart) indexStart++;
+
+							for(size_t i = indexStart; i >= 1; i--)
+							{
+								if( fftPlan->length[0] % supported[i] == 0 )
+								{
+									clLengths[1] = supported[i];
+									break;
+								}
+							}
+						}
+
+						clLengths[0] = fftPlan->length[0]/clLengths[1];
+					}
+					else
+					{
+						//large1D_Xfactor will not pass to the second level of call
+						clLengths[0] = fftPlan->large1D_Xfactor;
+						clLengths[1] = fftPlan->length[0]/clLengths[0];
+						ARG_CHECK (fftPlan->length[0] == clLengths[0] * clLengths[1]);
+					}
+
+					while (1 && (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
+					{
+						if (!IsPo2(fftPlan->length[0])) break;
+						//if (fftPlan->precision != CLFFT_SINGLE) break;
+						//TBD, only one dimension?
+						if (fftPlan->length.size() > 1) break;
+						if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1) break;
+						//This length is good for using transpose
+						if (fftPlan->length[0] < 131072) break;
+
+						//first version not support huge1D, TBD
+						if (clLengths[0] > Large1DThreshold) break;
+						ARG_CHECK(clLengths[0]>=32 && clLengths[1]>=32);
+
+						if (fftPlan->tmpBufSize==0 )
+						{
+							fftPlan->tmpBufSize = clLengths[0] * clLengths[1] *
+								fftPlan->batchsize * fftPlan->ElementSize();
+						}
+
+						//Transpose
+						//Input --> tmp buffer
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planTX, fftPlan->context, CLFFT_2D, clLengths ),
+							_T( "CreateDefaultPlan Large1d transpose 1 failed" ) );
+
+						FFTPlan* trans1Plan	= NULL;
+						lockRAII* trans1Lock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
+
+						trans1Plan->placeness     = CLFFT_OUTOFPLACE;
+						trans1Plan->precision     = fftPlan->precision;
+						trans1Plan->tmpBufSize    = 0;
+						trans1Plan->batchsize     = fftPlan->batchsize;
+						trans1Plan->envelope	  = fftPlan->envelope;
+						trans1Plan->inputLayout   = fftPlan->inputLayout;
+						trans1Plan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						trans1Plan->inStride[0]   = fftPlan->inStride[0];
+						trans1Plan->inStride[1]   = clLengths[0];
+						trans1Plan->outStride[0]  = 1;
+						trans1Plan->outStride[1]  = clLengths[1];
+						trans1Plan->iDist         = fftPlan->iDist;
+						trans1Plan->oDist         = fftPlan->length[0];
+						trans1Plan->gen           = Transpose;
+						trans1Plan->transflag     = true;
+
+						OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+							_T( "BakePlan large1d trans1 plan failed" ) );
+
+						//Row transform
+						//tmp->output
+						//size clLengths[1], batch clLengths[0], with length[0] twiddle factor multiplication
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &clLengths[1] ),
+							_T( "CreateDefaultPlan Large1d column failed" ) );
+
+						FFTPlan* row1Plan	= NULL;
+						lockRAII* row1Lock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planX, row1Plan, row1Lock ), _T( "fftRepo.getPlan failed" ) );
+
+						row1Plan->placeness     = CLFFT_OUTOFPLACE;
+						row1Plan->precision     = fftPlan->precision;
+						row1Plan->forwardScale  = 1.0f;
+						row1Plan->backwardScale = 1.0f;
+						row1Plan->tmpBufSize    = 0;
+						row1Plan->batchsize     = fftPlan->batchsize;
+						row1Plan->bLdsComplex   = fftPlan->bLdsComplex;
+						row1Plan->uLdsFraction  = fftPlan->uLdsFraction;
+						row1Plan->ldsPadding    = fftPlan->ldsPadding;
+						row1Plan->gen			= fftPlan->gen;
+						row1Plan->envelope		= fftPlan->envelope;
+
+						//Pass large1D flag to confirm we need multiply twiddle factor
+						row1Plan->large1D       = fftPlan->length[0];
+
+						row1Plan->length.push_back(clLengths[0]);
+						row1Plan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						row1Plan->outputLayout  = fftPlan->outputLayout;
+						row1Plan->inStride[0]   = 1;
+						row1Plan->outStride[0]  = fftPlan->outStride[0];
+						row1Plan->iDist         = fftPlan->length[0];
+						row1Plan->oDist         = fftPlan->oDist;
+						row1Plan->inStride.push_back(clLengths[1]);
+						row1Plan->outStride.push_back(clLengths[1]);
+
+						OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ),
+							_T( "BakePlan large1d first row plan failed" ) );
+
+						//Transpose 2
+						//Output --> tmp buffer
+						clLengths[2] = clLengths[0];
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planTY, fftPlan->context, CLFFT_2D, &clLengths[1] ),
+							_T( "CreateDefaultPlan Large1d transpose 1 failed" ) );
+
+						FFTPlan* trans2Plan	= NULL;
+						lockRAII* trans2Lock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
+
+						trans2Plan->placeness     = CLFFT_OUTOFPLACE;
+						trans2Plan->precision     = fftPlan->precision;
+						trans2Plan->tmpBufSize    = 0;
+						trans2Plan->batchsize     = fftPlan->batchsize;
+						trans2Plan->envelope	  = fftPlan->envelope;
+						trans2Plan->inputLayout   = fftPlan->outputLayout;
+						trans2Plan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						trans2Plan->inStride[0]   = fftPlan->outStride[0];
+						trans2Plan->inStride[1]   = clLengths[1];
+						trans2Plan->outStride[0]  = 1;
+						trans2Plan->outStride[1]  = clLengths[0];
+						trans2Plan->iDist         = fftPlan->oDist;
+						trans2Plan->oDist         = fftPlan->length[0];
+						trans2Plan->gen           = Transpose;
+						trans2Plan->transflag     = true;
+
+						OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
+							_T( "BakePlan large1d trans2 plan failed" ) );
+
+						//Row transform 2
+						//tmp->tmp
+						//size clLengths[0], batch clLengths[1]
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D, &clLengths[0] ),
+							_T( "CreateDefaultPlan Large1d column failed" ) );
+
+						FFTPlan* row2Plan	= NULL;
+						lockRAII* row2Lock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planY, row2Plan, row2Lock ), _T( "fftRepo.getPlan failed" ) );
+
+						row2Plan->placeness     = CLFFT_INPLACE;
+						row2Plan->precision     = fftPlan->precision;
+						row2Plan->forwardScale  = fftPlan->forwardScale;
+						row2Plan->backwardScale = fftPlan->backwardScale;
+						row2Plan->tmpBufSize    = 0;
+						row2Plan->batchsize     = fftPlan->batchsize;
+						row2Plan->bLdsComplex   = fftPlan->bLdsComplex;
+						row2Plan->uLdsFraction  = fftPlan->uLdsFraction;
+						row2Plan->ldsPadding    = fftPlan->ldsPadding;
+						row2Plan->gen			= fftPlan->gen;
+						row2Plan->envelope		= fftPlan->envelope;
+
+						//No twiddle factor is needed.
+						row2Plan->large1D       = 0;
+
+						row2Plan->length.push_back(clLengths[1]);
+						row2Plan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						row2Plan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						row2Plan->inStride[0]   = 1;
+						row2Plan->outStride[0]  = 1;
+						row2Plan->iDist         = fftPlan->length[0];
+						row2Plan->oDist         = fftPlan->length[0];
+						row2Plan->inStride.push_back(clLengths[0]);
+						row2Plan->outStride.push_back(clLengths[0]);
+
+						OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
+							_T( "BakePlan large1d first row plan failed" ) );
+
+						//Transpose 3
+						//tmp --> output
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planTZ, fftPlan->context, CLFFT_2D, clLengths ),
+							_T( "CreateDefaultPlan Large1d transpose 1 failed" ) );
+
+						FFTPlan* trans3Plan	= NULL;
+						lockRAII* trans3Lock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planTZ, trans3Plan, trans3Lock ), _T( "fftRepo.getPlan failed" ) );
+
+						trans3Plan->placeness     = CLFFT_OUTOFPLACE;
+						trans3Plan->precision     = fftPlan->precision;
+						trans3Plan->tmpBufSize    = 0;
+						trans3Plan->batchsize     = fftPlan->batchsize;
+						trans3Plan->envelope	  = fftPlan->envelope;
+						trans3Plan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						trans3Plan->outputLayout  = fftPlan->outputLayout;
+						trans3Plan->inStride[0]   = 1;
+						trans3Plan->inStride[1]   = clLengths[0];
+						trans3Plan->outStride[0]  = fftPlan->outStride[0];
+						trans3Plan->outStride[1]  = clLengths[1];
+						trans3Plan->iDist         = fftPlan->length[0];
+						trans3Plan->oDist         = fftPlan->oDist;
+						trans3Plan->gen           = Transpose;
+						trans3Plan->transflag     = true;
+
+						OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
+							_T( "BakePlan large1d trans3 plan failed" ) );
+
+						fftPlan->transflag = true;
+						fftPlan->baked = true;
+						return	CLFFT_SUCCESS;
+					}
+
+					size_t length0 = clLengths[0];
+					size_t length1 = clLengths[1];
+
+					if(fftPlan->inputLayout == CLFFT_REAL)
+					{
+						if (fftPlan->tmpBufSizeRC==0 )
+						{
+							fftPlan->tmpBufSizeRC = length0 * length1 *
+								fftPlan->batchsize * fftPlan->ElementSize();
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								fftPlan->tmpBufSizeRC *= fftPlan->length[index];
+							}
+						}
+
+						// column FFT, size clLengths[1], batch clLengths[0], with length[0] twiddle factor multiplication
+						// transposed output
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &clLengths[1] ),
+							_T( "CreateDefaultPlan Large1d column failed" ) );
+
+						FFTPlan* colTPlan	= NULL;
+						lockRAII* colLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planX, colTPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// current plan is to create intermediate buffer, packed and interleave
+						// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+						// elements in the original buffer. Like a transpose of the matrix
+						// we need to pass clLengths[0] and instride size to kernel, so kernel can tell the difference
+
+						//this part are common for both passes
+						colTPlan->placeness     = CLFFT_OUTOFPLACE;
+						colTPlan->precision     = fftPlan->precision;
+						colTPlan->forwardScale  = 1.0f;
+						colTPlan->backwardScale = 1.0f;
+						colTPlan->tmpBufSize    = 0;
+						colTPlan->batchsize     = fftPlan->batchsize;
+						colTPlan->bLdsComplex   = fftPlan->bLdsComplex;
+						colTPlan->uLdsFraction  = fftPlan->uLdsFraction;
+						colTPlan->ldsPadding    = fftPlan->ldsPadding;
+						colTPlan->gen			= fftPlan->gen;
+						colTPlan->envelope			= fftPlan->envelope;
+
+						//Pass large1D flag to confirm we need multiply twiddle factor
+						colTPlan->large1D       = fftPlan->length[0];
+						colTPlan->RCsimple		= true;
+
+						colTPlan->length.push_back(clLengths[0]);
+
+						// first Pass
+						colTPlan->inputLayout   = fftPlan->inputLayout;
+						colTPlan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						colTPlan->inStride[0]   = fftPlan->inStride[0] * clLengths[0];
+						colTPlan->outStride[0]  = 1;
+						colTPlan->iDist         = fftPlan->iDist;
+						colTPlan->oDist         = length0 * length1;//fftPlan->length[0];
+						colTPlan->inStride.push_back(fftPlan->inStride[0]);
+						colTPlan->outStride.push_back(length1);//clLengths[1]);
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							colTPlan->length.push_back(fftPlan->length[index]);
+							colTPlan->inStride.push_back(fftPlan->inStride[index]);
+							// tmp buffer is tightly packed
+							colTPlan->outStride.push_back(colTPlan->oDist);
+							colTPlan->oDist        *= fftPlan->length[index];
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d first column plan failed" ) );
+
+						//another column FFT, size clLengths[0], batch clLengths[1], output without transpose
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D,  &clLengths[0] ),
+							_T( "CreateDefaultPlan large1D row failed" ) );
+
+						FFTPlan* col2Plan	= NULL;
+						lockRAII* rowLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planY, col2Plan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// This is second column fft, intermediate buffer is packed and interleaved
+						// we need to pass clLengths[1] and instride size to kernel, so kernel can tell the difference
+
+						// common part for both passes
+						col2Plan->placeness     = CLFFT_INPLACE;
+						col2Plan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						col2Plan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+
+						col2Plan->precision     = fftPlan->precision;
+						col2Plan->forwardScale  = fftPlan->forwardScale;
+						col2Plan->backwardScale = fftPlan->backwardScale;
+						col2Plan->tmpBufSize    = 0;
+						col2Plan->batchsize     = fftPlan->batchsize;
+						col2Plan->bLdsComplex   = fftPlan->bLdsComplex;
+						col2Plan->uLdsFraction  = fftPlan->uLdsFraction;
+						col2Plan->ldsPadding    = fftPlan->ldsPadding;
+						col2Plan->gen			= fftPlan->gen;
+						col2Plan->envelope			= fftPlan->envelope;
+
+						col2Plan->length.push_back(length1);
+
+						col2Plan->inStride[0]  = length1;
+						col2Plan->inStride.push_back(1);
+						col2Plan->iDist        = length0 * length1;
+
+						col2Plan->outStride[0] = length1;
+						col2Plan->outStride.push_back(1);
+						col2Plan->oDist         = length0 * length1;
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							col2Plan->length.push_back(fftPlan->length[index]);
+							col2Plan->inStride.push_back(col2Plan->iDist);
+							col2Plan->outStride.push_back(col2Plan->oDist);
+							col2Plan->iDist   *= fftPlan->length[index];
+							col2Plan->oDist   *= fftPlan->length[index];
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d second column plan failed" ) );
+
+
+						// copy plan to get back to hermitian
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planRCcopy, fftPlan->context, CLFFT_1D,  &fftPlan->length[0] ),
+							_T( "CreateDefaultPlan RC copy failed" ) );
+
+						FFTPlan* copyPlan	= NULL;
+						lockRAII* copyLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planRCcopy, copyPlan, copyLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// This is second column fft, intermediate buffer is packed and interleaved
+						// we need to pass clLengths[1] and instride size to kernel, so kernel can tell the difference
+
+						// common part for both passes
+						copyPlan->placeness     = CLFFT_OUTOFPLACE;
+						copyPlan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						copyPlan->outputLayout  = fftPlan->outputLayout;
+
+						copyPlan->precision     = fftPlan->precision;
+						copyPlan->forwardScale  = 1.0f;
+						copyPlan->backwardScale = 1.0f;
+						copyPlan->tmpBufSize    = 0;
+						copyPlan->batchsize     = fftPlan->batchsize;
+						copyPlan->bLdsComplex   = fftPlan->bLdsComplex;
+						copyPlan->uLdsFraction  = fftPlan->uLdsFraction;
+						copyPlan->ldsPadding    = fftPlan->ldsPadding;
+						copyPlan->gen			= Copy;
+						copyPlan->envelope		= fftPlan->envelope;
+
+
+						copyPlan->inStride[0]  = 1;
+						copyPlan->iDist        = fftPlan->length[0];
+
+						copyPlan->outStride[0] = fftPlan->outStride[0];
+						copyPlan->oDist         = fftPlan->oDist;
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							copyPlan->length.push_back(fftPlan->length[index]);
+							copyPlan->inStride.push_back(copyPlan->inStride[index-1] * fftPlan->length[index-1]);
+							copyPlan->iDist   *= fftPlan->length[index];
+							copyPlan->outStride.push_back(fftPlan->outStride[index]);
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planRCcopy, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d RC copy plan failed" ) );
+
+					}
+					else if(fftPlan->outputLayout == CLFFT_REAL)
+					{
+						if (fftPlan->tmpBufSizeRC==0 )
+						{
+							fftPlan->tmpBufSizeRC = length0 * length1 *
+								fftPlan->batchsize * fftPlan->ElementSize();
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								fftPlan->tmpBufSizeRC *= fftPlan->length[index];
+							}
+						}
+
+						// copy plan to from hermitian to full complex
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planRCcopy, fftPlan->context, CLFFT_1D,  &fftPlan->length[0] ),
+							_T( "CreateDefaultPlan RC copy failed" ) );
+
+						FFTPlan* copyPlan	= NULL;
+						lockRAII* copyLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planRCcopy, copyPlan, copyLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// This is second column fft, intermediate buffer is packed and interleaved
+						// we need to pass clLengths[1] and instride size to kernel, so kernel can tell the difference
+
+						// common part for both passes
+						copyPlan->placeness     = CLFFT_OUTOFPLACE;
+						copyPlan->inputLayout   = fftPlan->inputLayout;
+						copyPlan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+
+						copyPlan->precision     = fftPlan->precision;
+						copyPlan->forwardScale  = 1.0f;
+						copyPlan->backwardScale = 1.0f;
+						copyPlan->tmpBufSize    = 0;
+						copyPlan->batchsize     = fftPlan->batchsize;
+						copyPlan->bLdsComplex   = fftPlan->bLdsComplex;
+						copyPlan->uLdsFraction  = fftPlan->uLdsFraction;
+						copyPlan->ldsPadding    = fftPlan->ldsPadding;
+						copyPlan->gen			= Copy;
+						copyPlan->envelope		= fftPlan->envelope;
+
+						copyPlan->inStride[0]  = fftPlan->inStride[0];
+						copyPlan->iDist        = fftPlan->iDist;
+
+						copyPlan->outStride[0]  = 1;
+						copyPlan->oDist        = fftPlan->length[0];
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							copyPlan->length.push_back(fftPlan->length[index]);
+							copyPlan->outStride.push_back(copyPlan->outStride[index-1] * fftPlan->length[index-1]);
+							copyPlan->oDist   *= fftPlan->length[index];
+							copyPlan->inStride.push_back(fftPlan->inStride[index]);
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planRCcopy, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d RC copy plan failed" ) );
+
+						// column FFT, size clLengths[1], batch clLengths[0], with length[0] twiddle factor multiplication
+						// transposed output
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &clLengths[1] ),
+							_T( "CreateDefaultPlan Large1d column failed" ) );
+
+						FFTPlan* colTPlan	= NULL;
+						lockRAII* colLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planX, colTPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// current plan is to create intermediate buffer, packed and interleave
+						// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+						// elements in the original buffer. Like a transpose of the matrix
+						// we need to pass clLengths[0] and instride size to kernel, so kernel can tell the difference
+
+						//this part are common for both passes
+						colTPlan->placeness     = CLFFT_INPLACE;
+						colTPlan->precision     = fftPlan->precision;
+						colTPlan->forwardScale  = 1.0f;
+						colTPlan->backwardScale = 1.0f;
+						colTPlan->tmpBufSize    = 0;
+						colTPlan->batchsize     = fftPlan->batchsize;
+						colTPlan->bLdsComplex   = fftPlan->bLdsComplex;
+						colTPlan->uLdsFraction  = fftPlan->uLdsFraction;
+						colTPlan->ldsPadding    = fftPlan->ldsPadding;
+						colTPlan->gen			= fftPlan->gen;
+						colTPlan->envelope			= fftPlan->envelope;
+
+						//Pass large1D flag to confirm we need multiply twiddle factor
+						colTPlan->large1D       = fftPlan->length[0];
+
+						colTPlan->length.push_back(clLengths[0]);
+
+						// first Pass
+						colTPlan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						colTPlan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+
+
+						colTPlan->inStride[0]  = length0;
+						colTPlan->inStride.push_back(1);
+						colTPlan->iDist        = length0 * length1;
+
+						colTPlan->outStride[0] = length0;
+						colTPlan->outStride.push_back(1);
+						colTPlan->oDist         = length0 * length1;
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							colTPlan->length.push_back(fftPlan->length[index]);
+							colTPlan->inStride.push_back(colTPlan->iDist);
+							colTPlan->outStride.push_back(colTPlan->oDist);
+							colTPlan->iDist   *= fftPlan->length[index];
+							colTPlan->oDist   *= fftPlan->length[index];
+						}
+
+
+						OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d first column plan failed" ) );
+
+						//another column FFT, size clLengths[0], batch clLengths[1], output without transpose
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D,  &clLengths[0] ),
+							_T( "CreateDefaultPlan large1D row failed" ) );
+
+						FFTPlan* col2Plan	= NULL;
+						lockRAII* rowLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planY, col2Plan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// This is second column fft, intermediate buffer is packed and interleaved
+						// we need to pass clLengths[1] and instride size to kernel, so kernel can tell the difference
+
+						// common part for both passes
+						col2Plan->placeness     = CLFFT_OUTOFPLACE;
+						col2Plan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+						col2Plan->outputLayout  = fftPlan->outputLayout;
+
+						col2Plan->precision     = fftPlan->precision;
+						col2Plan->forwardScale  = fftPlan->forwardScale;
+						col2Plan->backwardScale = fftPlan->backwardScale;
+						col2Plan->tmpBufSize    = 0;
+						col2Plan->batchsize     = fftPlan->batchsize;
+						col2Plan->bLdsComplex   = fftPlan->bLdsComplex;
+						col2Plan->uLdsFraction  = fftPlan->uLdsFraction;
+						col2Plan->ldsPadding    = fftPlan->ldsPadding;
+						col2Plan->gen			= fftPlan->gen;
+						col2Plan->envelope			= fftPlan->envelope;
+
+						col2Plan->RCsimple = true;
+						col2Plan->length.push_back(length1);
+
+						col2Plan->inStride[0]  = 1;
+						col2Plan->inStride.push_back(length0);
+						col2Plan->iDist        = length0 * length1;
+
+						col2Plan->outStride[0] = length1 * fftPlan->outStride[0];
+						col2Plan->outStride.push_back(fftPlan->outStride[0]);
+						col2Plan->oDist         = fftPlan->oDist;
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							col2Plan->length.push_back(fftPlan->length[index]);
+							col2Plan->inStride.push_back(col2Plan->iDist);
+							col2Plan->iDist   *= fftPlan->length[index];
+							col2Plan->outStride.push_back(fftPlan->outStride[index]);
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d second column plan failed" ) );
+					}
+					else
+					{
+						if (fftPlan->cacheSize) {
+							length0 += fftPlan->cacheSize & 0xFF;
+							length1 += (fftPlan->cacheSize >> 8) & 0xFF;
+							if (length0 * length1 > 2 * fftPlan->length[0])
+							{
+								length0 = clLengths[0];
+								length1 = clLengths[1];
+							}
+						}
+						else
+						{
+							if (fftPlan->length[0] == 131072) length1 += 1;     //x0=0, y0=1 good for Cayman card
+							else if (fftPlan->length[0] == 65536) length1 += 8; //x0=0, y0=8 good for Cypress card
+						}
+
+						if (clLengths[0] > Large1DThreshold)
+						{//make no change for Huge 1D case
+							length0 = clLengths[0];
+							length1 = clLengths[1];
+						}
+
+						if (fftPlan->tmpBufSize==0 )
+						{
+							fftPlan->tmpBufSize = length0 * length1 *
+								fftPlan->batchsize * fftPlan->ElementSize();
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								fftPlan->tmpBufSize *= fftPlan->length[index];
+							}
+						}
+						else
+						{//make no change for cases passed from higher dimension
+							length0 = clLengths[0];
+							length1 = clLengths[1];
+						}
+
+						// column FFT, size clLengths[1], batch clLengths[0], with length[0] twiddle factor multiplication
+						// transposed output
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &clLengths[1] ),
+							_T( "CreateDefaultPlan Large1d column failed" ) );
+
+						FFTPlan* colTPlan	= NULL;
+						lockRAII* colLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planX, colTPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// current plan is to create intermediate buffer, packed and interleave
+						// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+						// elements in the original buffer. Like a transpose of the matrix
+						// we need to pass clLengths[0] and instride size to kernel, so kernel can tell the difference
+
+						//this part are common for both passes
+						colTPlan->placeness     = CLFFT_OUTOFPLACE;
+						colTPlan->precision     = fftPlan->precision;
+						colTPlan->forwardScale  = 1.0f;
+						colTPlan->backwardScale = 1.0f;
+						colTPlan->tmpBufSize    = 0;
+						colTPlan->batchsize     = fftPlan->batchsize;
+						colTPlan->bLdsComplex   = fftPlan->bLdsComplex;
+						colTPlan->uLdsFraction  = fftPlan->uLdsFraction;
+						colTPlan->ldsPadding    = fftPlan->ldsPadding;
+						colTPlan->gen			= fftPlan->gen;
+						colTPlan->envelope			= fftPlan->envelope;
+
+						//Pass large1D flag to confirm we need multiply twiddle factor
+						colTPlan->large1D       = fftPlan->length[0];
+
+						colTPlan->length.push_back(clLengths[0]);
+
+						if (fftPlan->large1D == 0)
+						{
+							// first Pass
+							colTPlan->inputLayout   = fftPlan->inputLayout;
+							colTPlan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+							colTPlan->inStride[0]   = fftPlan->inStride[0] * clLengths[0];
+							colTPlan->outStride[0]  = 1;
+							colTPlan->iDist         = fftPlan->iDist;
+							colTPlan->oDist         = length0 * length1;//fftPlan->length[0];
+							colTPlan->inStride.push_back(fftPlan->inStride[0]);
+							colTPlan->outStride.push_back(length1);//clLengths[1]);
+
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								colTPlan->length.push_back(fftPlan->length[index]);
+								colTPlan->inStride.push_back(fftPlan->inStride[index]);
+								// tmp buffer is tightly packed
+								colTPlan->outStride.push_back(colTPlan->oDist);
+								colTPlan->oDist        *= fftPlan->length[index];
+							}
+						}
+						else
+						{
+							// second pass for huge 1D
+							colTPlan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+							colTPlan->outputLayout  = fftPlan->outputLayout;
+							colTPlan->inStride[0]   = fftPlan->length[1]*clLengths[0];
+							colTPlan->outStride[0]  = fftPlan->outStride[0];
+							colTPlan->iDist         = fftPlan->length[0];
+							colTPlan->oDist         = fftPlan->oDist;
+							colTPlan->inStride.push_back(fftPlan->length[1]);
+							colTPlan->outStride.push_back(fftPlan->outStride[0]*clLengths[1]);
+
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								colTPlan->length.push_back(fftPlan->length[index]);
+								colTPlan->inStride.push_back(fftPlan->inStride[index]);
+								colTPlan->outStride.push_back(fftPlan->outStride[index]);
+								colTPlan->iDist        *= fftPlan->length[index];
+							}
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d first column plan failed" ) );
+
+						//another column FFT, size clLengths[0], batch clLengths[1], output without transpose
+						OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D,  &clLengths[0] ),
+							_T( "CreateDefaultPlan large1D row failed" ) );
+
+						FFTPlan* col2Plan	= NULL;
+						lockRAII* rowLock	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planY, col2Plan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+						// This is second column fft, intermediate buffer is packed and interleaved
+						// we need to pass clLengths[1] and instride size to kernel, so kernel can tell the difference
+
+						// common part for both passes
+						col2Plan->outputLayout  = fftPlan->outputLayout;
+						col2Plan->precision     = fftPlan->precision;
+						col2Plan->forwardScale  = fftPlan->forwardScale;
+						col2Plan->backwardScale = fftPlan->backwardScale;
+						col2Plan->tmpBufSize    = 0;
+						col2Plan->batchsize     = fftPlan->batchsize;
+						col2Plan->oDist         = fftPlan->oDist;
+						col2Plan->bLdsComplex   = fftPlan->bLdsComplex;
+						col2Plan->uLdsFraction  = fftPlan->uLdsFraction;
+						col2Plan->ldsPadding    = fftPlan->ldsPadding;
+						col2Plan->gen			= fftPlan->gen;
+						col2Plan->envelope			= fftPlan->envelope;
+
+						if (clLengths[0] > Large1DThreshold)
+							//prepare for huge 1D
+							col2Plan->large1D   = fftPlan->length[0];
+
+						col2Plan->length.push_back(clLengths[1]);
+						col2Plan->outStride.push_back(fftPlan->outStride[0]);
+
+						if (fftPlan->large1D == 0)
+						{
+							//first layer, large 1D from tmp buffer to output buffer
+							col2Plan->placeness    = CLFFT_OUTOFPLACE;
+							col2Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+							col2Plan->inStride[0]  = length1;//clLengths[1];
+							col2Plan->outStride[0] = fftPlan->outStride[0] * clLengths[1];
+							col2Plan->iDist        = length0 * length1; //fftPlan->length[0];
+							col2Plan->inStride.push_back(1);
+
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								col2Plan->length.push_back(fftPlan->length[index]);
+								col2Plan->inStride.push_back(col2Plan->iDist);
+								col2Plan->outStride.push_back(fftPlan->outStride[index]);
+								col2Plan->iDist   *= fftPlan->length[index];
+							}
+						}
+						else
+						{
+							//second layer, huge 1D from output buffer to output buffer
+							col2Plan->placeness    = CLFFT_INPLACE;
+							col2Plan->inputLayout  = fftPlan->outputLayout;
+							col2Plan->inStride[0]  = fftPlan->outStride[0] * clLengths[1];
+							col2Plan->outStride[0] = col2Plan->inStride[0];
+							col2Plan->iDist        = fftPlan->oDist;
+							col2Plan->inStride.push_back(fftPlan->outStride[0]);
+
+							for (size_t index=1; index < fftPlan->length.size(); index++)
+							{
+								col2Plan->length.push_back(fftPlan->length[index]);
+								col2Plan->inStride.push_back(fftPlan->outStride[index]);
+								col2Plan->outStride.push_back(fftPlan->outStride[index]);
+							}
+						}
+
+						OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d second column plan failed" ) );
+					}
+
+					fftPlan->baked = true;
+					return	CLFFT_SUCCESS;
+			}
+		}
+		break;
+	case CLFFT_2D:
+		{
+			size_t length0 = fftPlan->length[0];
+			size_t length1 = fftPlan->length[1];
+
+
+			if (fftPlan->cacheSize)
+			{
+				length0 += fftPlan->cacheSize & 0xFF;
+				length1 += (fftPlan->cacheSize >> 8) & 0xFF;
+				if (length0 * length1 > 2 * fftPlan->length[0] * fftPlan->length[1])
+				{
+					length0 = fftPlan->length[0];
+					length1 = fftPlan->length[1];
+				}
+			}
+			else
+			{
+				if (fftPlan->length[0]==256 && fftPlan->length[1]==256)
+				{
+					length0 += 8;
+					length1 += 1;
+				}
+				else if (fftPlan->length[0]==512 && fftPlan->length[1]==512)
+				{
+					length0 += 1;
+					length1 += 1;//length1 += 0;
+				}
+				else if (fftPlan->length[0]==1024 && fftPlan->length[1]==512)
+				{
+					length0 += 2;
+					length1 += 2;//length1 += 0;
+				}
+				else if (fftPlan->length[0]==1024 && fftPlan->length[1]==1024)
+				{
+					length0 += 1;
+					length1 += 1;//length1 += 0;
+				}
+			}
+
+			if (fftPlan->length[0] > Large1DThreshold ||
+				fftPlan->length[1] > Large1DThreshold)
+				fftPlan->large2D = true;
+
+			while (1 && (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
+			{
+				//break;
+				if (fftPlan->transflag) //Transpose for 2D
+				{
+					OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateTransposeProgram() failed" ) );
+					OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
+
+					fftPlan->baked		= true;
+					return	CLFFT_SUCCESS;
+				}
+
+				if (fftPlan->length.size() != 2) break;
+				if (!(IsPo2(fftPlan->length[0])) || !(IsPo2(fftPlan->length[1])))
+					break;
+				if (fftPlan->length[1] < 32) break;
+				//TBD: restrict the use large2D in x!=y case becase we will need two temp buffers
+				//     (1) for 2D usage (2) for 1D large usage
+				//if (fftPlan->large2D) break;
+				//Performance show 512 is the good case with transpose
+				//if user want the result to be transposed, then we will.
+				if (fftPlan->length[0] < 512 && fftPlan->transposed == CLFFT_NOTRANSPOSE) break;
+				if (fftPlan->length[0] < 32) break;
+				//x!=y case, we need tmp buffer, currently temp buffer only support interleaved format
+				//if (fftPlan->length[0] != fftPlan->length[1] && fftPlan->outputLayout == CLFFT_COMPLEX_PLANAR) break;
+				if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1 ||
+					fftPlan->inStride[1] != fftPlan->length[0] || fftPlan->outStride[1] != fftPlan->length[0])
+					break;
+				//if (fftPlan->placeness != CLFFT_INPLACE || fftPlan->inputLayout != CLFFT_COMPLEX_PLANAR)
+				//	break;
+				//if (fftPlan->batchsize != 1) break;
+				//if (fftPlan->precision != CLFFT_SINGLE) break;
+
+				fftPlan->transflag = true;
+
+				//create row plan,
+				// x=y & x!=y, In->In for inplace, In->out for outofplace
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
+					_T( "CreateDefaultPlan for planX failed" ) );
+
+				FFTPlan* rowPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+				rowPlan->inputLayout     = fftPlan->inputLayout;
+				rowPlan->outputLayout    = fftPlan->outputLayout;
+				rowPlan->placeness       = fftPlan->placeness;
+				rowPlan->outStride[0]    = fftPlan->outStride[0];
+				rowPlan->outStride.push_back(fftPlan->outStride[1]);
+				rowPlan->oDist           = fftPlan->oDist;
+				rowPlan->precision       = fftPlan->precision;
+				rowPlan->forwardScale    = 1.0f;
+				rowPlan->backwardScale   = 1.0f;
+				rowPlan->tmpBufSize      = 0;
+				rowPlan->bLdsComplex     = fftPlan->bLdsComplex;
+				rowPlan->uLdsFraction    = fftPlan->uLdsFraction;
+				rowPlan->ldsPadding      = fftPlan->ldsPadding;
+				rowPlan->gen			 = fftPlan->gen;
+				rowPlan->envelope		 = fftPlan->envelope;
+				rowPlan->batchsize       = fftPlan->batchsize;
+				rowPlan->inStride[0]     = fftPlan->inStride[0];
+				rowPlan->length.push_back(fftPlan->length[1]);
+				rowPlan->inStride.push_back(fftPlan->inStride[1]);
+				rowPlan->iDist           = fftPlan->iDist;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ),
+					_T( "BakePlan for planX failed" ) );
+
+				//Create transpose plan for first transpose
+				//x=y: inplace. x!=y inplace: in->tmp, outofplace out->tmp
+				size_t clLengths[] = { 1, 1, 0 };
+				clLengths[0] = fftPlan->length[0];
+				clLengths[1] = fftPlan->length[1];
+
+				bool xyflag = (clLengths[0]==clLengths[1]) ? false : true;
+				if (xyflag && fftPlan->tmpBufSize==0 && fftPlan->length.size()<=2)
+				{
+					// we need tmp buffer for x!=y case
+					// we assume the tmp buffer is packed interleaved
+					fftPlan->tmpBufSize = length0 * length1 *
+						fftPlan->batchsize * fftPlan->ElementSize();
+				}
+
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planTX, fftPlan->context, CLFFT_2D, clLengths ),
+					_T( "CreateDefaultPlan for planT failed" ) );
+
+				FFTPlan* transPlanX	= NULL;
+				lockRAII* transLockX	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planTX, transPlanX, transLockX ), _T( "fftRepo.getPlan failed" ) );
+
+				transPlanX->inputLayout     = fftPlan->outputLayout;
+				transPlanX->precision       = fftPlan->precision;
+				transPlanX->tmpBufSize      = 0;
+				transPlanX->gen			    = Transpose;
+				transPlanX->envelope		= fftPlan->envelope;
+				transPlanX->batchsize       = fftPlan->batchsize;
+				transPlanX->inStride[0]     = fftPlan->outStride[0];
+				transPlanX->inStride[1]     = fftPlan->outStride[1];
+				transPlanX->iDist           = fftPlan->oDist;
+				transPlanX->transflag       = true;
+
+				if (xyflag)
+				{
+					transPlanX->outputLayout    = CLFFT_COMPLEX_INTERLEAVED;
+					transPlanX->placeness       = CLFFT_OUTOFPLACE;
+					transPlanX->outStride[0]    = 1;
+					transPlanX->outStride[1]    = clLengths[0];
+					transPlanX->oDist           = clLengths[0] * clLengths[1];
+				}
+				else
+				{
+					transPlanX->outputLayout    = fftPlan->outputLayout;
+					transPlanX->placeness       = CLFFT_INPLACE;
+					transPlanX->outStride[0]    = fftPlan->outStride[0];
+					transPlanX->outStride[1]    = fftPlan->outStride[1];
+					transPlanX->oDist           = fftPlan->oDist;
+				}
+
+				OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+					_T( "BakePlan for planTX failed" ) );
+
+				//create second row plan
+				//x!=y: tmp->tmp, x=y case: In->In or Out->Out
+				//if Transposed result is a choice x!=y: tmp->In or out
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+					_T( "CreateDefaultPlan for planY failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				if (xyflag)
+				{
+					colPlan->inputLayout     = CLFFT_COMPLEX_INTERLEAVED;
+					colPlan->inStride[0]     = 1;
+					colPlan->inStride.push_back(clLengths[1]);
+					colPlan->iDist           = clLengths[0] * clLengths[1];
+
+					if (fftPlan->transposed == CLFFT_NOTRANSPOSE)
+					{
+						colPlan->outputLayout    = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->outStride[0]    = 1;
+						colPlan->outStride.push_back(clLengths[1]);
+						colPlan->oDist           = clLengths[0] * clLengths[1];
+						colPlan->placeness       = CLFFT_INPLACE;
+					}
+					else
+					{
+						colPlan->outputLayout    = fftPlan->outputLayout;
+						colPlan->outStride[0]    = fftPlan->outStride[0];
+						colPlan->outStride.push_back(clLengths[1] * fftPlan->outStride[0]);
+						colPlan->oDist           = fftPlan->oDist;
+						colPlan->placeness       = CLFFT_OUTOFPLACE;
+					}
+				}
+				else
+				{
+					colPlan->inputLayout     = fftPlan->outputLayout;
+					colPlan->outputLayout    = fftPlan->outputLayout;
+					colPlan->outStride[0]    = fftPlan->outStride[0];
+					colPlan->outStride.push_back(fftPlan->outStride[1]);
+					colPlan->oDist           = fftPlan->oDist;
+					colPlan->inStride[0]     = fftPlan->outStride[0];
+					colPlan->inStride.push_back(fftPlan->outStride[1]);
+					colPlan->iDist           = fftPlan->oDist;
+					colPlan->placeness       = CLFFT_INPLACE;
+				}
+
+				colPlan->precision       = fftPlan->precision;
+				colPlan->forwardScale    = fftPlan->forwardScale;
+				colPlan->backwardScale   = fftPlan->backwardScale;
+				colPlan->tmpBufSize      = 0;
+				colPlan->bLdsComplex     = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction    = fftPlan->uLdsFraction;
+				colPlan->ldsPadding      = fftPlan->ldsPadding;
+				colPlan->gen			 = fftPlan->gen;
+				colPlan->envelope		 = fftPlan->envelope;
+				colPlan->batchsize       = fftPlan->batchsize;
+				colPlan->length.push_back(fftPlan->length[0]);
+
+				OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
+					_T( "BakePlan for planY failed" ) );
+
+				if (fftPlan->transposed == CLFFT_TRANSPOSED)
+				{
+					fftPlan->baked = true;
+					return	CLFFT_SUCCESS;
+				}
+
+				//Create transpose plan for second transpose
+				//x!=y case tmp->In or Out, x=y case In->In or Out->out
+				clLengths[0] = fftPlan->length[1];
+				clLengths[1] = fftPlan->length[0];
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planTY, fftPlan->context, CLFFT_2D, clLengths ),
+					_T( "CreateDefaultPlan for planTY failed" ) );
+
+				FFTPlan* transPlanY	= NULL;
+				lockRAII* transLockY	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planTY, transPlanY, transLockY ), _T( "fftRepo.getPlan failed" ) );
+
+				if (xyflag)
+				{
+					transPlanY->inputLayout     = CLFFT_COMPLEX_INTERLEAVED;
+					transPlanY->placeness       = CLFFT_OUTOFPLACE;
+					transPlanY->inStride[0]     = 1;
+					transPlanY->inStride[1]     = clLengths[0];
+					transPlanY->iDist           = clLengths[0] * clLengths[1];
+				}
+				else
+				{
+					transPlanY->inputLayout     = fftPlan->outputLayout;
+					transPlanY->placeness       = CLFFT_INPLACE;
+					transPlanY->inStride[0]     = fftPlan->outStride[0];
+					transPlanY->inStride[1]     = fftPlan->outStride[1];
+					transPlanY->iDist           = fftPlan->oDist;
+				}
+				transPlanY->outputLayout    = fftPlan->outputLayout;
+				transPlanY->outStride[0]    = fftPlan->outStride[0];
+				transPlanY->outStride[1]    = fftPlan->outStride[1];
+				transPlanY->oDist           = fftPlan->oDist;
+				transPlanY->precision       = fftPlan->precision;
+				transPlanY->tmpBufSize      = 0;
+				transPlanY->gen			    = Transpose;
+				transPlanY->envelope		= fftPlan->envelope;
+				transPlanY->batchsize       = fftPlan->batchsize;
+				transPlanY->transflag       = true;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
+					_T( "BakePlan for planTY failed" ) );
+
+				fftPlan->baked = true;
+				return	CLFFT_SUCCESS;
+			}
+
+			//check transposed
+			if (fftPlan->transposed != CLFFT_NOTRANSPOSE)
+				return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+
+
+			if(fftPlan->inputLayout == CLFFT_REAL)
+			{
+				length0 = fftPlan->length[0];
+				length1 = fftPlan->length[1];
+
+				size_t Nt = (1 + length0/2);
+				if (fftPlan->tmpBufSize==0)
+				{
+					fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
+					if(fftPlan->length.size() > 2) fftPlan->tmpBufSize *= fftPlan->length[2];
+				}
+
+				// create row plan
+				// real to hermitian
+
+				//create row plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
+					_T( "CreateDefaultPlan for planX failed" ) );
+
+				FFTPlan* rowPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+
+				rowPlan->outputLayout  = fftPlan->outputLayout;
+				rowPlan->inputLayout  = fftPlan->inputLayout;
+				rowPlan->placeness     = fftPlan->placeness;
+				rowPlan->length.push_back(length1);
+
+				rowPlan->inStride[0]  = fftPlan->inStride[0];
+				rowPlan->inStride.push_back(fftPlan->inStride[1]);
+				rowPlan->iDist         = fftPlan->iDist;
+
+				rowPlan->precision     = fftPlan->precision;
+				rowPlan->forwardScale  = 1.0f;
+				rowPlan->backwardScale = 1.0f;
+				rowPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				rowPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				rowPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				rowPlan->ldsPadding    = fftPlan->ldsPadding;
+				rowPlan->gen			= fftPlan->gen;
+				rowPlan->envelope			= fftPlan->envelope;
+
+				rowPlan->batchsize    = fftPlan->batchsize;
+
+				rowPlan->outStride[0]  = fftPlan->outStride[0];
+				rowPlan->outStride.push_back(fftPlan->outStride[1]);
+				rowPlan->oDist         = fftPlan->oDist;
+
+				//this 2d is decomposed from 3d
+				if (fftPlan->length.size()>2)
+				{
+					rowPlan->length.push_back(fftPlan->length[2]);
+					rowPlan->inStride.push_back(fftPlan->inStride[2]);
+					rowPlan->outStride.push_back(fftPlan->outStride[2]);
+				}
+
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
+
+				// create col plan
+				// complex to complex
+
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+					_T( "CreateDefaultPlan for planY failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				switch(fftPlan->outputLayout)
+				{
+				case CLFFT_HERMITIAN_INTERLEAVED:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					}
+					break;
+				case CLFFT_HERMITIAN_PLANAR:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
+						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+					}
+					break;
+				default: assert(false);
+				}
+
+				colPlan->placeness     = CLFFT_INPLACE;
+				colPlan->length.push_back(Nt);
+
+				colPlan->outStride[0]  = fftPlan->outStride[1];
+				colPlan->outStride.push_back(fftPlan->outStride[0]);
+				colPlan->oDist         = fftPlan->oDist;
+
+
+				colPlan->precision     = fftPlan->precision;
+				colPlan->forwardScale  = fftPlan->forwardScale;
+				colPlan->backwardScale = fftPlan->backwardScale;
+				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				colPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				colPlan->ldsPadding    = fftPlan->ldsPadding;
+				colPlan->gen			= fftPlan->gen;
+				colPlan->envelope			= fftPlan->envelope;
+
+				colPlan->batchsize = fftPlan->batchsize;
+
+				colPlan->inStride[0]  = rowPlan->outStride[1];
+				colPlan->inStride.push_back(rowPlan->outStride[0]);
+				colPlan->iDist         = rowPlan->oDist;
+
+				//this 2d is decomposed from 3d
+				if (fftPlan->length.size()>2)
+				{
+					colPlan->length.push_back(fftPlan->length[2]);
+					colPlan->outStride.push_back(fftPlan->outStride[2]);
+					colPlan->inStride.push_back(rowPlan->outStride[2]);
+				}
+
+				OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+
+			}
+			else if(fftPlan->outputLayout == CLFFT_REAL)
+			{
+				length0 = fftPlan->length[0];
+				length1 = fftPlan->length[1];
+
+				size_t Nt = (1 + length0/2);
+				if (fftPlan->tmpBufSize==0)
+				{
+					fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
+					if(fftPlan->length.size() > 2) fftPlan->tmpBufSize *= fftPlan->length[2];
+				}
+
+				// create col plan
+				// complex to complex
+
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+					_T( "CreateDefaultPlan for planY failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				colPlan->length.push_back(Nt);
+
+				colPlan->inStride[0]  = fftPlan->inStride[1];
+				colPlan->inStride.push_back(fftPlan->inStride[0]);
+				colPlan->iDist         = fftPlan->iDist;
+
+
+				//this 2d is decomposed from 3d
+				if (fftPlan->length.size()>2)
+				{
+					colPlan->placeness = CLFFT_INPLACE;
+
+					colPlan->length.push_back(fftPlan->length[2]);
+					colPlan->inStride.push_back(fftPlan->inStride[2]);
+					colPlan->outStride[0]  = colPlan->inStride[0];
+					colPlan->outStride.push_back(colPlan->inStride[1]);
+					colPlan->outStride.push_back(colPlan->inStride[2]);
+					colPlan->oDist         = fftPlan->iDist;
+				}
+				else
+				{
+					colPlan->placeness = CLFFT_OUTOFPLACE;
+
+					colPlan->outStride[0]  = Nt;
+					colPlan->outStride.push_back(1);
+					colPlan->oDist         = Nt*length1;
+				}
+
+
+				switch(fftPlan->inputLayout)
+				{
+				case CLFFT_HERMITIAN_INTERLEAVED:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					}
+					break;
+				case CLFFT_HERMITIAN_PLANAR:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+					}
+					break;
+				default: assert(false);
+				}
+
+
+				colPlan->precision     = fftPlan->precision;
+				colPlan->forwardScale  = 1.0f;
+				colPlan->backwardScale = 1.0f;
+				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				colPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				colPlan->ldsPadding    = fftPlan->ldsPadding;
+				colPlan->gen			= fftPlan->gen;
+				colPlan->envelope			= fftPlan->envelope;
+
+				colPlan->batchsize = fftPlan->batchsize;
+
+				if ((fftPlan->tmpBufSizeC2R==0) && (length1 > Large1DThreshold) && (fftPlan->length.size()<=2))
+				{
+					fftPlan->tmpBufSizeC2R = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
+					if(fftPlan->length.size() > 2) fftPlan->tmpBufSizeC2R *= fftPlan->length[2];
+				}
+
+				OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+
+				// create row plan
+				// hermitian to real
+
+				//create row plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
+					_T( "CreateDefaultPlan for planX failed" ) );
+
+				FFTPlan* rowPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+
+				rowPlan->outputLayout  = fftPlan->outputLayout;
+				rowPlan->inputLayout   = CLFFT_HERMITIAN_INTERLEAVED;
+				rowPlan->placeness     = CLFFT_OUTOFPLACE;
+				rowPlan->length.push_back(length1);
+
+				rowPlan->inStride[0]   = 1;
+				rowPlan->inStride.push_back(Nt);
+				rowPlan->iDist         = colPlan->oDist;
+
+				rowPlan->precision     = fftPlan->precision;
+				rowPlan->forwardScale  = fftPlan->forwardScale;
+				rowPlan->backwardScale = fftPlan->backwardScale;
+				rowPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				rowPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				rowPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				rowPlan->ldsPadding    = fftPlan->ldsPadding;
+				rowPlan->gen			= fftPlan->gen;
+				rowPlan->envelope			= fftPlan->envelope;
+
+				rowPlan->batchsize    = fftPlan->batchsize;
+
+				rowPlan->outStride[0]  = fftPlan->outStride[0];
+				rowPlan->outStride.push_back(fftPlan->outStride[1]);
+				rowPlan->oDist         = fftPlan->oDist;
+
+				//this 2d is decomposed from 3d
+				if (fftPlan->length.size()>2)
+				{
+					rowPlan->length.push_back(fftPlan->length[2]);
+					rowPlan->inStride.push_back(Nt*length1);
+					rowPlan->outStride.push_back(fftPlan->outStride[2]);
+				}
+
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
+			}
+			else
+			{
+				if (fftPlan->tmpBufSize==0 && fftPlan->length.size()<=2)
+				{
+					fftPlan->tmpBufSize = length0 * length1 *
+						fftPlan->batchsize * fftPlan->ElementSize();
+				}
+
+				//create row plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
+					_T( "CreateDefaultPlan for planX failed" ) );
+
+				FFTPlan* rowPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+				rowPlan->inputLayout   = fftPlan->inputLayout;
+				if (fftPlan->large2D || fftPlan->length.size()>2)
+				{
+					rowPlan->outputLayout  = fftPlan->outputLayout;
+					rowPlan->placeness     = fftPlan->placeness;
+					rowPlan->outStride[0]  = fftPlan->outStride[0];
+					rowPlan->outStride.push_back(fftPlan->outStride[1]);
+					rowPlan->oDist         = fftPlan->oDist;
+				}
+				else
+				{
+					rowPlan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					rowPlan->placeness     = CLFFT_OUTOFPLACE;
+					rowPlan->outStride[0]  = length1;//1;
+					rowPlan->outStride.push_back(1);//length0);
+					rowPlan->oDist         = length0 * length1;
+				}
+				rowPlan->precision     = fftPlan->precision;
+				rowPlan->forwardScale  = 1.0f;
+				rowPlan->backwardScale = 1.0f;
+				rowPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				rowPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				rowPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				rowPlan->ldsPadding    = fftPlan->ldsPadding;
+				rowPlan->gen			= fftPlan->gen;
+				rowPlan->envelope			= fftPlan->envelope;
+
+				// This is the row fft, the first elements distance between the first two FFTs is the distance of the first elements
+				// of the first two rows in the original buffer.
+				rowPlan->batchsize    = fftPlan->batchsize;
+				rowPlan->inStride[0]  = fftPlan->inStride[0];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				rowPlan->length.push_back(fftPlan->length[1]);
+				rowPlan->inStride.push_back(fftPlan->inStride[1]);
+
+				//this 2d is decomposed from 3d
+				if (fftPlan->length.size()>2)
+				{
+					rowPlan->length.push_back(fftPlan->length[2]);
+					rowPlan->inStride.push_back(fftPlan->inStride[2]);
+					rowPlan->outStride.push_back(fftPlan->outStride[2]);
+				}
+
+				rowPlan->iDist    = fftPlan->iDist;
+
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
+
+				//create col plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+					_T( "CreateDefaultPlan for planY failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				if (fftPlan->large2D || fftPlan->length.size()>2)
+				{
+					colPlan->inputLayout   = fftPlan->outputLayout;
+					colPlan->placeness     = CLFFT_INPLACE;
+					colPlan->inStride[0]   = fftPlan->outStride[1];
+					colPlan->inStride.push_back(fftPlan->outStride[0]);
+					colPlan->iDist         = fftPlan->oDist;
+				}
+				else
+				{
+					colPlan->inputLayout   = CLFFT_COMPLEX_INTERLEAVED;
+					colPlan->placeness     = CLFFT_OUTOFPLACE;
+					colPlan->inStride[0]   = 1;//length0;
+					colPlan->inStride.push_back(length1);//1);
+					colPlan->iDist         = length0 * length1;
+				}
+
+				colPlan->outputLayout  = fftPlan->outputLayout;
+				colPlan->precision     = fftPlan->precision;
+				colPlan->forwardScale  = fftPlan->forwardScale;
+				colPlan->backwardScale = fftPlan->backwardScale;
+				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				colPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				colPlan->ldsPadding    = fftPlan->ldsPadding;
+				colPlan->gen			= fftPlan->gen;
+				colPlan->envelope			= fftPlan->envelope;
+
+				// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+				// elements in the original buffer. Like a transpose of the matrix
+				colPlan->batchsize = fftPlan->batchsize;
+				colPlan->outStride[0] = fftPlan->outStride[1];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				colPlan->length.push_back(fftPlan->length[0]);
+				colPlan->outStride.push_back(fftPlan->outStride[0]);
+				colPlan->oDist    = fftPlan->oDist;
+
+				//this 2d is decomposed from 3d
+				if (fftPlan->length.size()>2)
+				{
+					//assert(fftPlan->large2D);
+					colPlan->length.push_back(fftPlan->length[2]);
+					colPlan->inStride.push_back(fftPlan->outStride[2]);
+					colPlan->outStride.push_back(fftPlan->outStride[2]);
+				}
+
+				OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+			}
+
+			fftPlan->baked = true;
+			return	CLFFT_SUCCESS;
+		}
+	case CLFFT_3D:
+		{
+			if(fftPlan->inputLayout == CLFFT_REAL)
+			{
+				size_t clLengths[] = { 1, 1, 0 };
+				clLengths[0] = fftPlan->length[ DimX ];
+				clLengths[1] = fftPlan->length[ DimY ];
+
+				//create 2D xy plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+					_T( "CreateDefaultPlan 2D planX failed" ) );
+
+				FFTPlan* xyPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+				xyPlan->inputLayout   = fftPlan->inputLayout;
+				xyPlan->outputLayout  = fftPlan->outputLayout;
+				xyPlan->placeness     = fftPlan->placeness;
+				xyPlan->precision     = fftPlan->precision;
+				xyPlan->forwardScale  = 1.0f;
+				xyPlan->backwardScale = 1.0f;
+				xyPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				xyPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				xyPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				xyPlan->ldsPadding    = fftPlan->ldsPadding;
+				xyPlan->gen			 = fftPlan->gen;
+				xyPlan->envelope			 = fftPlan->envelope;
+
+				// This is the xy fft, the first elements distance between the first two FFTs is the distance of the first elements
+				// of the first two rows in the original buffer.
+				xyPlan->batchsize    = fftPlan->batchsize;
+				xyPlan->inStride[0]  = fftPlan->inStride[0];
+				xyPlan->inStride[1]  = fftPlan->inStride[1];
+				xyPlan->outStride[0] = fftPlan->outStride[0];
+				xyPlan->outStride[1] = fftPlan->outStride[1];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				xyPlan->length.push_back(fftPlan->length[2]);
+				xyPlan->inStride.push_back(fftPlan->inStride[2]);
+				xyPlan->outStride.push_back(fftPlan->outStride[2]);
+				xyPlan->iDist    = fftPlan->iDist;
+				xyPlan->oDist    = fftPlan->oDist;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+
+				clLengths[0] = fftPlan->length[ DimZ ];
+				clLengths[1] = clLengths[2] = 0;
+				//create 1D col plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+					_T( "CreateDefaultPlan for planZ failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				switch(fftPlan->outputLayout)
+				{
+				case CLFFT_HERMITIAN_INTERLEAVED:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					}
+					break;
+				case CLFFT_HERMITIAN_PLANAR:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
+						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+					}
+					break;
+				default: assert(false);
+				}
+
+				colPlan->placeness     = CLFFT_INPLACE;
+				colPlan->precision     = fftPlan->precision;
+				colPlan->forwardScale  = fftPlan->forwardScale;
+				colPlan->backwardScale = fftPlan->backwardScale;
+				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				colPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				colPlan->ldsPadding    = fftPlan->ldsPadding;
+				colPlan->gen			 = fftPlan->gen;
+				colPlan->envelope			 = fftPlan->envelope;
+
+				// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+				// elements in the original buffer. Like a transpose of the matrix
+				colPlan->batchsize = fftPlan->batchsize;
+				colPlan->inStride[0] = fftPlan->outStride[2];
+				colPlan->outStride[0] = fftPlan->outStride[2];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				colPlan->length.push_back(1 + fftPlan->length[0]/2);
+				colPlan->length.push_back(fftPlan->length[1]);
+				colPlan->inStride.push_back(fftPlan->outStride[0]);
+				colPlan->inStride.push_back(fftPlan->outStride[1]);
+				colPlan->outStride.push_back(fftPlan->outStride[0]);
+				colPlan->outStride.push_back(fftPlan->outStride[1]);
+				colPlan->iDist    = fftPlan->oDist;
+				colPlan->oDist    = fftPlan->oDist;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+			}
+			else if(fftPlan->outputLayout == CLFFT_REAL)
+			{
+				if (fftPlan->tmpBufSize == 0)
+				{
+					fftPlan->tmpBufSize = fftPlan->length[2] * fftPlan->length[1] * (1 + fftPlan->length[0]/2);
+					fftPlan->tmpBufSize *= fftPlan->batchsize * fftPlan->ElementSize();
+				}
+
+				size_t clLengths[] = { 1, 1, 0 };
+
+				clLengths[0] = fftPlan->length[ DimZ ];
+				clLengths[1] = clLengths[2] = 0;
+
+				//create 1D col plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+					_T( "CreateDefaultPlan for planZ failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				switch(fftPlan->inputLayout)
+				{
+				case CLFFT_HERMITIAN_INTERLEAVED:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					}
+					break;
+				case CLFFT_HERMITIAN_PLANAR:
+					{
+						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+					}
+					break;
+				default: assert(false);
+				}
+
+				colPlan->placeness     = CLFFT_OUTOFPLACE;
+
+				colPlan->precision     = fftPlan->precision;
+				colPlan->forwardScale  = 1.0f;
+				colPlan->backwardScale = 1.0f;
+				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				colPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				colPlan->ldsPadding    = fftPlan->ldsPadding;
+				colPlan->gen			 = fftPlan->gen;
+				colPlan->envelope			 = fftPlan->envelope;
+
+				// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+				// elements in the original buffer. Like a transpose of the matrix
+				colPlan->batchsize = fftPlan->batchsize;
+				colPlan->inStride[0] = fftPlan->inStride[2];
+				colPlan->outStride[0] = fftPlan->length[1] * (1 + fftPlan->length[0]/2);
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				colPlan->length.push_back(1 + fftPlan->length[0]/2);
+				colPlan->length.push_back(fftPlan->length[1]);
+				colPlan->inStride.push_back(fftPlan->inStride[0]);
+				colPlan->inStride.push_back(fftPlan->inStride[1]);
+				colPlan->outStride.push_back(1);
+				colPlan->outStride.push_back(1 + fftPlan->length[0]/2);
+				colPlan->iDist    = fftPlan->iDist;
+				colPlan->oDist    = fftPlan->length[2] * fftPlan->length[1] * (1 + fftPlan->length[0]/2);
+
+				if ((fftPlan->tmpBufSizeC2R==0) && ((fftPlan->length[2] > Large1DThreshold) || (fftPlan->length[1] > Large1DThreshold)))
+				{
+					fftPlan->tmpBufSizeC2R = (1 + fftPlan->length[0]/2) * (fftPlan->length[1]) * (fftPlan->length[2]) *
+						fftPlan->batchsize * fftPlan->ElementSize();
+				}
+
+				OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+
+				clLengths[0] = fftPlan->length[ DimX ];
+				clLengths[1] = fftPlan->length[ DimY ];
+
+				//create 2D xy plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+					_T( "CreateDefaultPlan 2D planX failed" ) );
+
+				FFTPlan* xyPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+				xyPlan->inputLayout   = CLFFT_HERMITIAN_INTERLEAVED;
+				xyPlan->outputLayout  = fftPlan->outputLayout;
+
+				xyPlan->placeness     = CLFFT_OUTOFPLACE;
+
+				xyPlan->precision     = fftPlan->precision;
+				xyPlan->forwardScale  = fftPlan->forwardScale;
+				xyPlan->backwardScale = fftPlan->backwardScale;
+				xyPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				xyPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				xyPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				xyPlan->ldsPadding    = fftPlan->ldsPadding;
+				xyPlan->gen			 = fftPlan->gen;
+				xyPlan->envelope			 = fftPlan->envelope;
+
+				// This is the xy fft, the first elements distance between the first two FFTs is the distance of the first elements
+				// of the first two rows in the original buffer.
+				xyPlan->batchsize    = fftPlan->batchsize;
+				xyPlan->inStride[0]  = 1;
+				xyPlan->inStride[1]  = (1 + fftPlan->length[0]/2);
+				xyPlan->outStride[0] = fftPlan->outStride[0];
+				xyPlan->outStride[1] = fftPlan->outStride[1];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				xyPlan->length.push_back(fftPlan->length[2]);
+				xyPlan->inStride.push_back(fftPlan->length[1] * (1 + fftPlan->length[0]/2));
+				xyPlan->outStride.push_back(fftPlan->outStride[2]);
+				xyPlan->iDist    = colPlan->oDist;
+				xyPlan->oDist    = fftPlan->oDist;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+			}
+			else
+			{
+				if (fftPlan->tmpBufSize==0 && (
+					fftPlan->length[0] > Large1DThreshold ||
+					fftPlan->length[1] > Large1DThreshold ||
+					fftPlan->length[2] > Large1DThreshold
+					))
+				{
+					fftPlan->tmpBufSize = fftPlan->length[0] * fftPlan->length[1] * fftPlan->length[2] *
+						fftPlan->batchsize * fftPlan->ElementSize();
+				}
+
+				size_t clLengths[] = { 1, 1, 0 };
+				clLengths[0] = fftPlan->length[ DimX ];
+				clLengths[1] = fftPlan->length[ DimY ];
+
+				//create 2D xy plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+					_T( "CreateDefaultPlan 2D planX failed" ) );
+
+				FFTPlan* xyPlan	= NULL;
+				lockRAII* rowLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+				xyPlan->inputLayout   = fftPlan->inputLayout;
+				xyPlan->outputLayout  = fftPlan->outputLayout;
+				xyPlan->placeness     = fftPlan->placeness;
+				xyPlan->precision     = fftPlan->precision;
+				xyPlan->forwardScale  = 1.0f;
+				xyPlan->backwardScale = 1.0f;
+				xyPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				xyPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				xyPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				xyPlan->ldsPadding    = fftPlan->ldsPadding;
+				xyPlan->gen			 = fftPlan->gen;
+				xyPlan->envelope			 = fftPlan->envelope;
+
+				// This is the xy fft, the first elements distance between the first two FFTs is the distance of the first elements
+				// of the first two rows in the original buffer.
+				xyPlan->batchsize    = fftPlan->batchsize;
+				xyPlan->inStride[0]  = fftPlan->inStride[0];
+				xyPlan->inStride[1]  = fftPlan->inStride[1];
+				xyPlan->outStride[0] = fftPlan->outStride[0];
+				xyPlan->outStride[1] = fftPlan->outStride[1];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				xyPlan->length.push_back(fftPlan->length[2]);
+				xyPlan->inStride.push_back(fftPlan->inStride[2]);
+				xyPlan->outStride.push_back(fftPlan->outStride[2]);
+				xyPlan->iDist    = fftPlan->iDist;
+				xyPlan->oDist    = fftPlan->oDist;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+
+				clLengths[0] = fftPlan->length[ DimZ ];
+				clLengths[1] = clLengths[2] = 0;
+				//create 1D col plan
+				OPENCL_V(clfftCreateDefaultPlan( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+					_T( "CreateDefaultPlan for planZ failed" ) );
+
+				FFTPlan* colPlan	= NULL;
+				lockRAII* colLock	= NULL;
+				OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+				colPlan->inputLayout   = fftPlan->outputLayout;
+				colPlan->outputLayout  = fftPlan->outputLayout;
+				colPlan->placeness     = CLFFT_INPLACE;
+				colPlan->precision     = fftPlan->precision;
+				colPlan->forwardScale  = fftPlan->forwardScale;
+				colPlan->backwardScale = fftPlan->backwardScale;
+				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+				colPlan->bLdsComplex   = fftPlan->bLdsComplex;
+				colPlan->uLdsFraction  = fftPlan->uLdsFraction;
+				colPlan->ldsPadding    = fftPlan->ldsPadding;
+				colPlan->gen			 = fftPlan->gen;
+				colPlan->envelope			 = fftPlan->envelope;
+
+				// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+				// elements in the original buffer. Like a transpose of the matrix
+				colPlan->batchsize = fftPlan->batchsize;
+				colPlan->inStride[0] = fftPlan->outStride[2];
+				colPlan->outStride[0] = fftPlan->outStride[2];
+
+				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+				colPlan->length.push_back(fftPlan->length[0]);
+				colPlan->length.push_back(fftPlan->length[1]);
+				colPlan->inStride.push_back(fftPlan->outStride[0]);
+				colPlan->inStride.push_back(fftPlan->outStride[1]);
+				colPlan->outStride.push_back(fftPlan->outStride[0]);
+				colPlan->outStride.push_back(fftPlan->outStride[1]);
+				colPlan->iDist    = fftPlan->oDist;
+				colPlan->oDist    = fftPlan->oDist;
+
+				OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+			}
+
+			fftPlan->baked = true;
+			return	CLFFT_SUCCESS;
+		}
+	}
+
+	//	For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
+	OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
+
+	//	For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
+	OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
+
+	//	Allocate resources
+	OPENCL_V( fftPlan->AllocateBuffers (), _T("AllocateBuffers() failed"));
+
+	fftPlan->ConstructAndEnqueueConstantBuffers( commQueueFFT );
+
+	//	Record that we baked the plan
+	fftPlan->baked		= true;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus clfftCopyPlan( clfftPlanHandle* out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* in_fftPlan	= NULL, *out_fftPlan = NULL;
+	lockRAII* in_planLock = NULL, *out_planLock = NULL;
+
+	OPENCL_V( fftRepo.getPlan( in_plHandle, in_fftPlan, in_planLock ), _T( "fftRepo.getPlan failed" ) );
+
+	OPENCL_V( clfftCreateDefaultPlan( out_plHandle, new_context, in_fftPlan->dim, &in_fftPlan->length[ 0 ] ),
+		_T( "clfftCreateDefaultPlan failed" ) );
+
+	OPENCL_V( fftRepo.getPlan( *out_plHandle, out_fftPlan, out_planLock ), _T( "fftRepo.getPlan failed" ) );
+
+	//	Let other operations complete before attempting to copy the plan
+	scopedLock sLock( *in_planLock, _T( "clfftCopyPlan" ) );
+
+	out_fftPlan->baked = false;
+	out_fftPlan->gen = in_fftPlan->gen;
+	out_fftPlan->envelope = in_fftPlan->envelope;
+	out_fftPlan->dim = in_fftPlan->dim;
+	out_fftPlan->inputLayout = in_fftPlan->inputLayout;
+	out_fftPlan->outputLayout = in_fftPlan->outputLayout;
+	out_fftPlan->placeness = in_fftPlan->placeness;
+	out_fftPlan->precision = in_fftPlan->precision;
+	out_fftPlan->forwardScale = in_fftPlan->forwardScale;
+	out_fftPlan->backwardScale = in_fftPlan->backwardScale;
+	out_fftPlan->iDist = in_fftPlan->iDist;
+	out_fftPlan->oDist = in_fftPlan->oDist;
+	out_fftPlan->length = in_fftPlan->length;
+	out_fftPlan->inStride = in_fftPlan->inStride;
+	out_fftPlan->outStride = in_fftPlan->outStride;
+	out_fftPlan->batchsize = in_fftPlan->batchsize;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTPlan::ConstructAndEnqueueConstantBuffers( cl_command_queue* commQueueFFT )
+{
+	//	Construct the constant buffer and call clEnqueueWriteBuffer
+	//
+	cb_t ConstantBufferParams [CLFFT_CB_SIZE];
+	memset (& ConstantBufferParams, 0, sizeof (ConstantBufferParams));
+
+	cl_uint nY = 1;
+	cl_uint nZ = 0;
+	cl_uint nW = 0;
+	cl_uint n5 = 0;
+
+	switch( /*fftPlan->*/length.size() )
+	{
+	case 1:
+		nY = std::max<cl_uint> (1, cl_uint (/*fftPlan->*/batchsize));
+		break;
+
+	case 2:
+		nY = (cl_uint)/*fftPlan->*/length[DimY];
+		nZ = std::max<cl_uint> (1, cl_uint (/*fftPlan->*/batchsize));
+		break;
+
+	case 3:
+		nY = (cl_uint)/*fftPlan->*/length[DimY];
+		nZ = (cl_uint)/*fftPlan->*/length[DimZ];
+		nW = std::max<cl_uint> (1, cl_uint (/*fftPlan->*/batchsize));
+		break;
+
+	case 4:
+		nY = (cl_uint)/*fftPlan->*/length[DimY];
+		nZ = (cl_uint)/*fftPlan->*/length[DimZ];
+		nW = (cl_uint)/*fftPlan->*/length[DimW];
+		n5 = std::max<cl_uint> (1, cl_uint (/*fftPlan->*/batchsize));
+		break;
+	}
+	ConstantBufferParams[CLFFT_CB_NY ].u = nY;
+	ConstantBufferParams[CLFFT_CB_NZ ].u = nZ;
+	ConstantBufferParams[CLFFT_CB_NW ].u = nW;
+	ConstantBufferParams[CLFFT_CB_N5 ].u = n5;
+
+	assert (/*fftPlan->*/inStride.size() == /*fftPlan->*/outStride.size());
+
+	switch (/*fftPlan->*/inStride.size()) {
+	case 1:
+		ConstantBufferParams[CLFFT_CB_ISX].u = cl_uint (/*fftPlan->*/inStride[0]);
+		ConstantBufferParams[CLFFT_CB_ISY].u = cl_uint (/*fftPlan->*/iDist);
+		break;
+
+	case 2:
+		ConstantBufferParams[CLFFT_CB_ISX].u = cl_uint (/*fftPlan->*/inStride[0]);
+		ConstantBufferParams[CLFFT_CB_ISY].u = cl_uint (/*fftPlan->*/inStride[1]);
+		ConstantBufferParams[CLFFT_CB_ISZ].u = cl_uint (/*fftPlan->*/iDist);
+		break;
+
+	case 3:
+		ConstantBufferParams[CLFFT_CB_ISX].u = cl_uint (/*fftPlan->*/inStride[0]);
+		ConstantBufferParams[CLFFT_CB_ISY].u = cl_uint (/*fftPlan->*/inStride[1]);
+		ConstantBufferParams[CLFFT_CB_ISZ].u = cl_uint (/*fftPlan->*/inStride[2]);
+		ConstantBufferParams[CLFFT_CB_ISW].u = cl_uint (/*fftPlan->*/iDist);
+		break;
+
+	case 4:
+		ConstantBufferParams[CLFFT_CB_ISX].u = cl_uint (/*fftPlan->*/inStride[0]);
+		ConstantBufferParams[CLFFT_CB_ISY].u = cl_uint (/*fftPlan->*/inStride[1]);
+		ConstantBufferParams[CLFFT_CB_ISZ].u = cl_uint (/*fftPlan->*/inStride[2]);
+		ConstantBufferParams[CLFFT_CB_ISW].u = cl_uint (/*fftPlan->*/inStride[3]);
+		ConstantBufferParams[CLFFT_CB_IS5].u = cl_uint (/*fftPlan->*/iDist);
+		break;
+	}
+
+	switch (/*fftPlan->*/outStride.size()) {
+	case 1:
+		ConstantBufferParams[CLFFT_CB_OSX].u = cl_uint (/*fftPlan->*/outStride[0]);
+		ConstantBufferParams[CLFFT_CB_OSY].u = cl_uint (/*fftPlan->*/oDist);
+		break;
+
+	case 2:
+		ConstantBufferParams[CLFFT_CB_OSX].u = cl_uint (/*fftPlan->*/outStride[0]);
+		ConstantBufferParams[CLFFT_CB_OSY].u = cl_uint (/*fftPlan->*/outStride[1]);
+		ConstantBufferParams[CLFFT_CB_OSZ].u = cl_uint (/*fftPlan->*/oDist);
+		break;
+
+	case 3:
+		ConstantBufferParams[CLFFT_CB_OSX].u = cl_uint (/*fftPlan->*/outStride[0]);
+		ConstantBufferParams[CLFFT_CB_OSY].u = cl_uint (/*fftPlan->*/outStride[1]);
+		ConstantBufferParams[CLFFT_CB_OSZ].u = cl_uint (/*fftPlan->*/outStride[2]);
+		ConstantBufferParams[CLFFT_CB_OSW].u = cl_uint (/*fftPlan->*/oDist);
+		break;
+
+	case 4:
+		ConstantBufferParams[CLFFT_CB_OSX].u = cl_uint (/*fftPlan->*/outStride[0]);
+		ConstantBufferParams[CLFFT_CB_OSY].u = cl_uint (/*fftPlan->*/outStride[1]);
+		ConstantBufferParams[CLFFT_CB_OSZ].u = cl_uint (/*fftPlan->*/outStride[2]);
+		ConstantBufferParams[CLFFT_CB_OSW].u = cl_uint (/*fftPlan->*/outStride[3]);
+		ConstantBufferParams[CLFFT_CB_OS5].u = cl_uint (/*fftPlan->*/oDist);
+		break;
+	}
+
+	OPENCL_V(clEnqueueWriteBuffer( *commQueueFFT,
+		/*fftPlan->*/const_buffer,
+		1,		// TODO? non-blocking write?
+		0,
+		sizeof(ConstantBufferParams),
+		&ConstantBufferParams,
+		0,
+		NULL,
+		NULL), _T("clEnqueueWriteBuffer failed") );
+
+	return CLFFT_SUCCESS;
+}
+
+//TODO caching kernel binaries for later reload
+#if 0
+typedef std::pair<std::string, clfftPlanHandle> plan_tree_node_t;
+typedef std::vector< std::pair<std::string, clfftPlanHandle> > plan_tree_t;
+
+void make_plan_tree( plan_tree_t & tree, std::string name, clfftPlanHandle handle )
+{
+	tree.push_back( plan_tree_node_t(name, handle) );
+
+	FFTPlan* plan = NULL;
+	FFTRepo& repo = FFTRepo::getInstance();
+	lockRAII* lock = NULL;
+	clfftStatus status = repo.getPlan( handle, plan, lock );
+	if( status != CLFFT_SUCCESS )
+	{
+		throw( "make_plan_tree failure: repo.getPlan" );
+	}
+
+	if( plan->planX )
+	{
+		std::string subplan(name);
+		subplan += "X";
+		make_plan_tree(tree, subplan, plan->planX );
+	}
+
+	if( plan->planY )
+	{
+		std::string subplan(name);
+		subplan += "Y";
+		make_plan_tree(tree, subplan, plan->planY );
+	}
+
+	if( plan->planZ )
+	{
+		std::string subplan(name);
+		subplan += "Z";
+		make_plan_tree(tree, subplan, plan->planZ );
+	}
+}
+
+clfftStatus clfftWritePlanToDisk( clfftPlanHandle plan_handle, const char* filename )
+{
+	plan_tree_t plan_tree;
+	make_plan_tree( plan_tree, "plan", plan_handle );
+
+	std::ofstream planfile;
+	planfile.open(filename, std::ios::binary);
+
+	while( !plan_tree.empty() )
+	{
+		plan_tree_node_t node( *plan_tree.begin() );
+		plan_tree.erase( plan_tree.begin() );
+
+		FFTPlan* plan = NULL;
+		FFTRepo& repo = FFTRepo::getInstance();
+		lockRAII* lock = NULL;
+		OPENCL_V(repo.getPlan( node.second, plan, lock ), _T("getPlan failure"));
+
+		// print the name of the node (plan, planX, planXX, planY, plan XY, etc)
+		planfile << node.first << " ";
+
+		planfile << "dimensions " << plan->dim << " " << plan->length.size();
+		// dimensions must be listed first because clfftReadPlanFromDisk
+		// will need to use dimensions for reading in strides and such
+
+		for( int i = 0; i < plan->length.size(); ++i )
+		{
+			planfile << " " << plan->length[i];
+		}
+
+		planfile << " batch " << plan->batchsize;
+
+		planfile << " instride " << plan->inStride.size();
+		for( int i = 0; i < plan->inStride.size(); ++i )
+		{
+			planfile << " " << plan->inStride[i];
+		}
+
+		planfile << " outstride " << plan->outStride.size();
+		for( int i = 0; i < plan->outStride.size(); ++i )
+		{
+			planfile << " " << plan->outStride[i];
+		}
+
+		planfile << " in-out-distances " << plan->iDist << " " << plan->oDist;
+		planfile << " in-out-layouts " << plan->inputLayout << " " << plan->outputLayout;
+		planfile << " resultlocation " << plan->placeness;
+		planfile << " precision " << plan->precision;
+		planfile << " forwardscale " << float_as_hex<double>(plan->forwardScale);
+		planfile << " backwardscale " << float_as_hex<double>(plan->backwardScale);
+		// we need to stash scales as hex so that we don't have any roundoff error
+		// clfftReadPlanFromDisk will read the hex back in as float
+
+		planfile << " gen " << plan->gen;
+		planfile << " bLdsComplex " << plan->bLdsComplex;
+		planfile << " ldsPadding " << plan->ldsPadding;
+		planfile << " uLdsFraction " << plan->uLdsFraction;
+		planfile << " large1D_Xfactor " << plan->large1D_Xfactor;
+		planfile << " cacheSize " << plan->cacheSize;
+		planfile << " tmpBufSize " << plan->tmpBufSize;
+		planfile << " large1D " << plan->large1D;
+		planfile << " large2D " << plan->large2D;
+
+		if( plan->baked == true )
+		{
+			planfile << " number-of-devices " << plan->number_of_devices;
+
+			if( plan->number_of_devices > 0 )
+			{
+				planfile << " binary-sizes";
+				for( int i = 0; i < plan->number_of_devices; i++ )
+				{
+					planfile << " " << *(plan->binary_sizes.get() + i);
+				}
+
+				planfile << " binaries ";
+				for( int i = 0; i < plan->number_of_devices; i++ )
+				{
+					planfile << beginning_of_binary;
+					planfile.write( plan->binaries[i].get(), plan->binary_sizes[i] );
+					planfile << end_of_binary;
+				}
+			}
+			else
+			{
+				planfile << " ";
+			}
+		}
+	}
+
+	planfile << " " << end_of_file;
+	planfile.close();
+	return CLFFT_SUCCESS;
+}
+
+void FFTPlan::ResetBinarySizes()
+{
+	binary_sizes.reset(new size_t[number_of_devices]);
+}
+
+void FFTPlan::ResetBinaries()
+{
+	binaries.clear();
+	for( int i = 0; i < number_of_devices; i++ )
+	{
+		binaries.push_back( std::unique_ptr<char[]>(new char[binary_sizes[i]] ) );
+	}
+}
+
+std::string pop_next_word( std::string & str )
+{
+	size_t next_space = str.find_first_of(' ');
+
+	std::string next_word( str.substr( 0, next_space ) );
+	str.erase( 0, next_space+1 ); // we need the extra +1 to munch off the space
+
+	return next_word;
+}
+
+int my_string_to_int( std::string str )
+{
+	int i;
+	std::stringstream string_to_int( str );
+	string_to_int >> i;
+	return i;
+}
+
+bool start_of_a_plan( std::string word )
+{
+	if( word.substr(0,4) == "plan" )
+		return true;
+	else
+		return false;
+}
+
+clfftStatus clfftReadPlanFromDisk( clfftPlanHandle plan_handle, const char* filename )
+{
+	plan_tree_t tree;
+
+	FFTPlan* plan = NULL;
+	FFTRepo& repo = FFTRepo::getInstance();
+	lockRAII* lock = NULL;
+	OPENCL_V(repo.getPlan( plan_handle, plan, lock ), _T("getPlan failure"));
+
+	std::ifstream planfile;
+	planfile.open(filename, std::ios::in | std::ios::binary);
+
+	unsigned int dimensions = 0;
+	std::string next_word;
+
+	while( planfile >> next_word )
+	{
+		if( start_of_a_plan( next_word ) )
+		{
+			if( next_word.length() > 4 )
+			// if true, this is not a base plan
+			{
+				clfftDim temp_dimension = CLFFT_1D;
+				size_t temp_lengths[3] = {1,1,1};
+
+				// let's create the plan to represent the child plan
+				clfftPlanHandle child_plan;
+				OPENCL_V(clfftCreateDefaultPlan( &child_plan, plan->context, temp_dimension, temp_lengths ),
+					"clfftReadPlanFromDisk(): error calling clfftCreateDefaultPlan()");
+
+				tree.push_back( plan_tree_node_t( next_word, child_plan ) );
+
+				// we need to update the planX, Y, or Z pointer to point at the child plan
+				char child_plan_name = next_word.rbegin()[0]; // this tells us if this is planX, Y, or Z
+				next_word.erase( next_word.end()-1 ); // this tells us the parent plan
+				std::string parent_plan_name = next_word;
+
+				clfftPlanHandle parent_plan = 0;
+
+				for( int i = 0; i < tree.size(); i++ )
+				{
+					if( tree[i].first == parent_plan_name )
+					{
+						parent_plan = tree[i].second;
+					}
+				}
+
+				plan = NULL;
+				OPENCL_V(repo.getPlan( parent_plan, plan, lock ), _T("getPlan failure"));
+
+				if( child_plan_name == 'X' )
+					plan->planX = child_plan;
+				else if( child_plan_name == 'Y' )
+					plan->planY = child_plan;
+				else if( child_plan_name == 'Z' )
+					plan->planZ = child_plan;
+				else
+					OPENCL_V(CLFFT_INVALID_PLAN, "clfftReadPlanFromDisk(): could not identify child plan" );
+
+				// our child plan is now the active plan
+				plan = NULL;
+				OPENCL_V(repo.getPlan( child_plan, plan, lock ), _T("getPlan failure"));
+				plan_handle = child_plan;
+			}
+			else
+			// if this is a base plan, we don't need to do anything fancy.
+			// just add the node to the tree
+			{
+				tree.push_back( plan_tree_node_t( next_word, plan_handle ) );
+			}
+
+			plan->readFromFile = true;
+		}
+		else if( next_word == "dimensions" )
+		{
+			size_t lengths[3];
+
+			// read number of dimensions
+			planfile >> dimensions;
+
+			// number of length values that follow (subplans have some really strange things going on,
+			// so this might not always match the dimension of the transform)
+			size_t number_of_lengths = 0;
+			planfile >> number_of_lengths;
+
+			OPENCL_V( clfftSetPlanDim(plan_handle, static_cast<clfftDim>(dimensions)), _T("clfftReadPlanFromDisk: clfftSetPlanDim") );
+
+			for( unsigned int i = 0; i < number_of_lengths; ++i )
+			{
+				planfile >> lengths[i]; // read one dimension
+
+				// We have to explicitly set the lengths instead of using clfftSetPlanLength here.
+				// Because the number of values to add might be greater than the number of dimensions in plan->dimension,
+				// we don't want to miss out on any super awesome numbers getting added to plan->length with clfftSetPlanLength
+				if( i >= plan->length.size() ) plan->length.push_back(1);
+				plan->length[i] = lengths[i];
+			}
+		}
+		else if( next_word == "batch" )
+		{
+			unsigned int batch;
+			planfile >> batch;
+
+			OPENCL_V( clfftSetPlanBatchSize(plan_handle, batch), _T("clfftReadPlanFromDisk: clfftSetPlanBatchSize") );
+		}
+		else if( next_word == "instride" )
+		{
+			size_t strides[3];
+
+			// number of stride values that follow (subplans have some really strange things going on,
+			// so this might not always match the dimension of the transform)
+			size_t number_of_strides = 0;
+			planfile >> number_of_strides;
+
+			for( unsigned int i = 0; i < number_of_strides; ++i )
+			{
+				planfile >> strides[i]; // read one dimension
+
+				// We have to explicitly set inStride instead of using clfftSetPlanInStride here.
+				// Because the number of values to add might be greater than the number of dimensions in plan->dimension,
+				// we don't want to miss out on any super awesome numbers getting added to plan->inStride with clfftSetPlanInStride
+				if( i >= plan->inStride.size() ) plan->inStride.push_back(1);
+				plan->inStride[i] = strides[i];
+			}
+		}
+		else if( next_word == "outstride" )
+		{
+			size_t strides[3];
+
+			// number of stride values that follow (subplans have some really strange things going on,
+			// so this might not always match the dimension of the transform)
+			size_t number_of_strides = 0;
+			planfile >> number_of_strides;
+
+			for( unsigned int i = 0; i < number_of_strides; ++i )
+			{
+				planfile >> strides[i]; // read one dimension
+
+				// We have to explicitly set outStride instead of using clfftSetPlanOutStride here.
+				// Because the number of values to add might be greater than the number of dimensions in plan->dimension,
+				// we don't want to miss out on any super awesome numbers getting added to plan->outStride with clfftSetPlanOutStride
+				if( i >= plan->outStride.size() ) plan->outStride.push_back(1);
+				plan->outStride[i] = strides[i];
+			}
+		}
+		else if( next_word == "in-out-distances" )
+		{
+			size_t indistance, outdistance;
+			planfile >> indistance >> outdistance;
+
+			OPENCL_V( clfftSetPlanDistance( plan_handle, indistance, outdistance ), _T("clfftReadPlanFromDisk: clfftSetPlanDistance" ) );
+		}
+		else if( next_word == "in-out-layouts" )
+		{
+			size_t inlayout, outlayout;
+			planfile >> inlayout >> outlayout;
+
+			OPENCL_V( clfftSetLayout( plan_handle, static_cast<clfftLayout>(inlayout), static_cast<clfftLayout>(outlayout) ), _T("clfftReadPlanFromDisk: clfftSetLayout") );
+		}
+		else if( next_word == "resultlocation" )
+		{
+			size_t location;
+			planfile >> location;
+
+			OPENCL_V( clfftSetResultLocation( plan_handle, static_cast<clfftResultLocation>(location) ), _T("clfftReadPlanFromDisk: clfftSetResultLocation") );
+		}
+		else if( next_word == "precision" )
+		{
+			size_t precision;
+			planfile >> precision;
+
+			OPENCL_V( clfftSetPlanPrecision( plan_handle, static_cast<clfftPrecision>(precision) ), _T("clfftReadPlanFromDisk: clfftSetPlanPrecision") );
+		}
+		else if( next_word == "forwardscale" || next_word == "backwardscale" )
+		{
+			size_t scale;
+			planfile >> scale;
+
+			if( next_word == "forwardscale" )
+			{
+				OPENCL_V( clfftSetPlanScale( plan_handle, CLFFT_FORWARD, hex_as_float<float>((unsigned int)scale) ), _T("clfftReadPlanFromDisk: clfftSetPlanScale") );
+			}
+			else
+			{
+				OPENCL_V( clfftSetPlanScale( plan_handle, CLFFT_BACKWARD, hex_as_float<float>((unsigned int)scale) ), _T("clfftReadPlanFromDisk: clfftSetPlanScale") );
+			}
+		}
+		else if( next_word == "gen" )
+		{
+			int gen_read;
+			planfile >> gen_read;
+			plan->gen = static_cast<clfftGenerators>(gen_read);
+		}
+		else if( next_word == "bLdsComplex" )
+		{
+			planfile >> plan->bLdsComplex;
+		}
+		else if( next_word == "ldsPadding" )
+		{
+			planfile >> plan->ldsPadding;
+		}
+		else if( next_word == "uLdsFraction" )
+		{
+			planfile >> plan->uLdsFraction;
+		}
+		else if( next_word == "large1D_Xfactor" )
+		{
+			planfile >> plan->large1D_Xfactor;
+		}
+		else if( next_word == "cacheSize" )
+		{
+			planfile >> plan->cacheSize;
+		}
+		else if( next_word == "tmpBufSize" )
+		{
+			planfile >> plan->tmpBufSize;
+		}
+		else if( next_word == "large1D" )
+		{
+			planfile >> plan->large1D;
+		}
+		else if( next_word == "large2D" )
+		{
+			planfile >> plan->large2D;
+		}
+		else if( next_word == "number-of-devices" )
+		{
+			planfile >> plan->number_of_devices;
+		}
+		else if( next_word == "binary-sizes" )
+		{
+			plan->ResetBinarySizes();
+			for( int i = 0; i < plan->number_of_devices; i++ )
+			{
+				planfile >> plan->binary_sizes[i];
+			}
+		}
+		else if( next_word == "binaries" )
+		{
+			plan->ResetBinaries();
+
+			size_t number_of_devices = plan->number_of_devices;
+
+			while( static_cast<char>(planfile.peek()) == ' ' )
+				planfile.ignore();
+
+			// consume the beginning of binary message. the binary will begin with the character immediately following
+			std::unique_ptr<char[]> beginning_message( new char[beginning_of_binary.size()] );
+			planfile.read( beginning_message.get(), beginning_of_binary.size() );
+
+			for( int i = 0; i < plan->number_of_devices; i++ )
+			{
+				planfile.read( plan->binaries[i].get(), plan->binary_sizes[i] );
+			}
+
+			std::unique_ptr<char[]> end_message( new char[end_of_binary.size()] );
+			planfile.read( end_message.get(), end_of_binary.size() );
+		}
+		else if( next_word == end_of_file )
+		{
+			// we're at the end of the file
+		}
+		else
+		{
+			std::cout << next_word << std::endl;
+			OPENCL_V( CLFFT_INVALID_ARG_VALUE, _T("clfftReadPlanFromDisk: unrecognized parameter") );
+		}
+	}
+
+	return CLFFT_SUCCESS;
+}
+#endif
+
+clfftStatus	clfftDestroyPlan( clfftPlanHandle* plHandle )
+{
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	OPENCL_V( fftRepo.getPlan( *plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+
+	//	Recursively destroy subplans, that are used for higher dimensional FFT's
+	if( fftPlan->planX )
+		clfftDestroyPlan( &fftPlan->planX );
+	if( fftPlan->planY )
+		clfftDestroyPlan( &fftPlan->planY );
+	if( fftPlan->planZ )
+		clfftDestroyPlan( &fftPlan->planZ );
+	if( fftPlan->planTX )
+		clfftDestroyPlan( &fftPlan->planTX );
+	if( fftPlan->planTY )
+		clfftDestroyPlan( &fftPlan->planTY );
+	if( fftPlan->planTZ )
+		clfftDestroyPlan( &fftPlan->planTZ );
+	if( fftPlan->planRCcopy )
+		clfftDestroyPlan( &fftPlan->planRCcopy );
+
+	fftRepo.deletePlan( plHandle );
+
+	return	CLFFT_SUCCESS;
+}
+
+//	This routine will query the OpenCL context for it's devices
+//	and their hardware limitations, which we synthesize into a
+//	hardware "envelope".
+//	We only query the devices the first time we're called after
+//	the object's context is set.  On 2nd and subsequent calls,
+//	we just return the pointer.
+//
+clfftStatus FFTPlan::SetEnvelope ()
+{
+
+	// TODO  The caller has already acquired the lock on *this
+	//	However, we shouldn't depend on it.
+
+	if (0 == envelope.limit_LocalMemSize) do {
+		//	First time, query OpenCL for the device info
+		//
+		memset (&envelope, 0, sizeof(envelope));
+
+		//	Get the size needed for the device list
+		//
+		size_t deviceListSize = 0;
+		OPENCL_V( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize ),
+			_T("Getting device array size ( ::clGetContextInfo() )" ));
+		cl_uint n = cl_uint (deviceListSize / sizeof(cl_device_id));
+		if (n == 0) break;
+
+		std::vector< cl_device_id > devices( n+1 );
+		//	Get the device list
+		//
+		OPENCL_V( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, &devices[ 0 ], NULL ),
+			_T("Getting device array ( ::clGetContextInfo() )") );
+
+		//	Get the # of devices
+		//
+		cl_uint cContextDevices	= 0;
+
+		size_t deviceVersionSize	= 0;
+		OPENCL_V( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ),
+			_T("Getting CL_DEVICE_VERSION Info string size ( ::clGetDeviceInfo() )" ));
+
+		std::vector< char > szDeviceVersion( deviceVersionSize );
+		OPENCL_V( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ),
+			_T("Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" ));
+
+		char openclstr[11]="OpenCL 1.0";
+
+		if (!strncmp((const char*)&szDeviceVersion[ 0 ], openclstr, 10))
+		{
+			cContextDevices	= 1;
+		}
+		else
+		{
+			OPENCL_V( ::clGetContextInfo( context, CL_CONTEXT_NUM_DEVICES, sizeof( cContextDevices ), &cContextDevices, NULL ),
+				_T("Getting number of context devices ( ::clGetContextInfo() )" ));
+		}
+
+		cContextDevices = std::min<cl_uint> (cContextDevices, n);
+		if (0 == cContextDevices)
+			break;
+
+		envelope.limit_LocalMemSize  = ~0;
+		envelope.limit_WorkGroupSize = ~0;
+		envelope.limit_Dimensions    = countOf (envelope.limit_Size);
+		for (size_t u = 0; u < countOf (envelope.limit_Size); ++u) {
+			envelope.limit_Size[u] = ~0;
+		}
+
+		for( cl_uint i = 0; i < cContextDevices; ++i )
+		{
+			cl_device_id devId = devices[i];
+
+			cl_ulong memsize = 0;
+			unsigned int maxdim = 0;
+			size_t temp[countOf (envelope.limit_Size)];
+			memset (&temp, 0, sizeof(temp));
+
+			OPENCL_V( ::clGetDeviceInfo( devId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( cl_ulong ), &memsize, NULL ),
+				_T("Getting CL_DEVICE_LOCAL_MEM_SIZE device info ( ::clGetDeviceInfo() )") );
+			envelope.limit_LocalMemSize = std::min<size_t> (envelope.limit_LocalMemSize, memsize);
+
+			OPENCL_V( ::clGetDeviceInfo( devId, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( unsigned int ), &maxdim, NULL ),
+				_T("Getting CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS device info ( ::clGetDeviceInfo() )") );
+			BUG_CHECK (countOf (envelope.limit_Size) >= maxdim);
+			envelope.limit_Dimensions = std::min<size_t> (envelope.limit_Dimensions, maxdim);
+
+			OPENCL_V( ::clGetDeviceInfo( devId, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size_t ), &temp[0], NULL ),
+				_T("Getting CL_DEVICE_MAX_WORK_GROUP_SIZE device info ( ::clGetDeviceInfo() )") );
+			envelope.limit_WorkGroupSize = std::min<size_t> (envelope.limit_WorkGroupSize, temp[0]);
+
+			OPENCL_V( ::clGetDeviceInfo( devId, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( temp ), &temp[0], NULL ),
+				_T("Getting CL_DEVICE_MAX_WORK_ITEM_SIZES device info ( ::clGetDeviceInfo() )") );
+			for (size_t u = 0; u < envelope.limit_Dimensions; ++u) {
+				BUG_CHECK (temp[u] > 0)
+				envelope.limit_Size[u] = std::min<size_t> (envelope.limit_Size[u], temp[u]);
+			}
+		}
+
+		BUG_CHECK (envelope.limit_LocalMemSize >= 1024)
+	} while (0);
+
+	return CLFFT_SUCCESS;
+}
+
+clfftStatus FFTPlan::AllocateBuffers ()
+{
+	cl_int status = CL_SUCCESS;
+
+	assert (NULL == const_buffer);
+	ReleaseBuffers ();
+
+	assert(4 == sizeof(int));
+
+	do {
+		const_buffer = clCreateBuffer (context,
+										CL_MEM_READ_ONLY,
+										CLFFT_CB_SIZE * sizeof (int),
+										0,
+										&status);
+		if (CL_SUCCESS != status)
+			break;
+	} while (0);
+
+	return	(clfftStatus) status;
+}
+
+clfftStatus FFTPlan::ReleaseBuffers ()
+{
+	clfftStatus result = CLFFT_SUCCESS;
+	clfftStatus tmp;
+
+	if( NULL != const_buffer )
+	{
+		tmp = static_cast< clfftStatus >( clReleaseMemObject( const_buffer ) );
+		const_buffer = NULL;
+		if( CLFFT_SUCCESS == result )
+			result = tmp;
+	}
+
+	if( NULL != intBuffer )
+	{
+		tmp = static_cast< clfftStatus >( clReleaseMemObject( intBuffer ) );
+		intBuffer = NULL;
+		if( CLFFT_SUCCESS == result )
+			result = tmp;
+	}
+
+	if( NULL != intBufferRC )
+	{
+		tmp = static_cast< clfftStatus >( clReleaseMemObject( intBufferRC ) );
+		intBufferRC = NULL;
+		if( CLFFT_SUCCESS == result )
+			result = tmp;
+	}
+
+	return	result;
+}
+
+clfftStatus  FFTPlan::GetWorkSizes (std::vector<size_t> & globalws, std::vector<size_t> & localws) const
+{
+	switch(gen)
+	{
+	case Stockham:		return GetWorkSizesPvt<Stockham>(globalws, localws);
+	case Transpose:		return GetWorkSizesPvt<Transpose>(globalws, localws);
+	case Copy:			return GetWorkSizesPvt<Copy>(globalws, localws);
+	default:			assert(false); return CLFFT_NOTIMPLEMENTED;
+	}
+}
+
+clfftStatus  FFTPlan::GetKernelGenKey (FFTKernelGenKeyParams & params) const
+{
+	switch(gen)
+	{
+	case Stockham:		return GetKernelGenKeyPvt<Stockham>(params);
+	case Transpose:		return GetKernelGenKeyPvt<Transpose>(params);
+	case Copy:			return GetKernelGenKeyPvt<Copy>(params);
+	default:			assert(false); return CLFFT_NOTIMPLEMENTED;
+	}
+}
+
+clfftStatus  FFTPlan::GenerateKernel (FFTRepo & fftRepo) const
+{
+	switch(gen)
+	{
+	case Stockham:		return GenerateKernelPvt<Stockham>(fftRepo);
+	case Transpose:		return GenerateKernelPvt<Transpose>(fftRepo);
+	case Copy:			return GenerateKernelPvt<Copy>(fftRepo);
+	default:			assert(false); return CLFFT_NOTIMPLEMENTED;
+	}
+}
+
+clfftStatus FFTPlan::GetMax1DLength (size_t *longest ) const
+{
+	switch(gen)
+	{
+	case Stockham:		return GetMax1DLengthPvt<Stockham>(longest);
+	//No restriction for transpose kernel
+	case Transpose:     *longest = 4096; return CLFFT_SUCCESS;
+	case Copy:			*longest = 4096; return CLFFT_SUCCESS;
+	default:			assert(false); return CLFFT_NOTIMPLEMENTED;
+	}
+}
+
+clfftStatus FFTPlan::GetEnvelope (const FFTEnvelope ** ppEnvelope) const
+{
+	if(&envelope == NULL) assert(false);
+	*ppEnvelope = &envelope;
+	return CLFFT_SUCCESS;
+}
+
+size_t FFTPlan::ElementSize() const
+{
+	return ( ((precision == CLFFT_DOUBLE) || (precision == CLFFT_DOUBLE_FAST)) ? sizeof( std::complex<double> ) : sizeof( std::complex<float> ) );
+}
+
diff --git a/src/library/plan.h b/src/library/plan.h
new file mode 100644
index 00000000..ec96fadb
--- /dev/null
+++ b/src/library/plan.h
@@ -0,0 +1,360 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( AMD_CLFFT_plan_H )
+#define AMD_CLFFT_plan_H
+#include <cstring>
+#include "private.h"
+#include "lock.h"
+#include "generator.h"
+
+namespace ARBITRARY {
+	// TODO:  These arbitrary parameters should be tuned for the type of GPU
+	//	being used.  These values are probably OK for Radeon 58xx and 68xx.
+	enum {
+		MAX_DIMS  = 3,
+			//  The clEnqueuNDRangeKernel accepts a multi-dimensional domain array.
+			//  The # of dimensions is arbitrary, but limited by the OpenCL implementation
+			//  usually to 3 dimensions (CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS).
+			//  The kernel generator also assumes a limit on the # of dimensions.
+
+		SIMD_WIDTH = 64,
+			//  Workgroup size.  This is the # of work items that share
+			//  local data storage (LDS).  This # is best for Evergreen gpus,
+			//  but might change in the future.
+
+		LDS_BANK_BITS = 5,
+		LDS_BANK_SIZE = (1 << LDS_BANK_BITS),
+		LDS_PADDING   = false,//true,
+			//  On AMD hardware, the low-order bits of the local_id enumerate
+			//  the work items that access LDS in parallel.  Ideally, we will
+			//  pad our LDS arrays so that these work items access different banks
+			//  of the LDS.
+			//  2 ** LDS_BANK_BITS is the number of LDS banks.
+			//  If LDS_PADDING is non-zero, the kernel generator should pad the
+			//  LDS arrays to reduce or eliminate bank conflicts.
+
+		LDS_FRACTION_IDEAL = 6,    // i.e., 1/6th
+		LDS_FRACTION_MAX   = 4,    // i.e., 1/4
+			//  For best performance, each workgroup should use 1/IDEAL'th the amount of LDS
+			//  revealed by clGetDeviceInfo (.. CL_DEVICE_LOCAL_MEM_SIZE, ...)
+			//  However, we can use up to 1/MAX'th of LDS per workgroup when necessary to
+			//  perform the FFT in a single pass instead of multiple passes.
+			//  This tuning parameter is a good value for Evergreen gpus,
+			//  but might change in the future.
+
+		LDS_COMPLEX = false,
+			//  This is the default value for FFTKernelGenKeyParams::fft_LdsComplex.
+			//  The generated kernels require so many bytes of LDS for each single precision
+			//..complex number in the vector.
+			//  If LDS_COMPLEX, then we declare an LDS array of complex numbers (8 bytes each)
+			//  and swap data between workitems with a single barrier.
+			//  If ! LDS_COMPLEX, then we declare an LDS array or scalar numbers (4 bytes each)
+			//  and swap data between workitems in two phases, with extra barriers.
+			//  The former approach uses fewer instructions and barriers;
+			//  The latter uses half as much LDS space, so twice as many wavefronts can be run
+			//  in parallel.
+
+		TWIDDLE_DEE = 4,
+			//  4 bits per row of matrix.
+	};
+};
+
+enum eConstantBuffer {
+	/*	Layout of a constant buffer passed to the generated kernel
+	 *	This needs to be know by the kernel generator and by the
+	 *	framework code that creates the buffer and fills it at execution time.
+	*/
+
+	//	 [0] uint  NY   This is the batchsize for a 1D Array,
+	//                    or the 2nd (Y dimension) for a 2D.
+	//	 [1] uint  NZ   This is the batchsize for a 2D Array,
+	//                    or the 3rd (Z dimension) for a 3D.
+	//	 [2] uint  NW   This is the batchsize for a 3D Array,
+	//                    or the 4th (W dimension) for a 4D.
+	//	 [3] uint  N5   This is the batchsize for a 4D Array,
+	//
+	CLFFT_CB_NY = 0,
+	CLFFT_CB_NZ,
+	CLFFT_CB_NW,
+	CLFFT_CB_N5,
+
+	//	 [4] uint  ISX  Input data X stride (== 1 for row-major compact data)
+	//	 [5] uint  ISY  Input data Y stride (== X for row-major compact data)
+	//	 [6] uint  ISZ  Input data Z stride (== X*Y for row-major compact data)
+	//	 [7] uint  ISW  Input data W stride (== X*Y*Z for row-major compact data)
+	//	 [8] uint  IS5  Input data 5th stride
+	//
+	CLFFT_CB_ISX,
+	CLFFT_CB_ISY,
+	CLFFT_CB_ISZ,
+	CLFFT_CB_ISW,
+	CLFFT_CB_IS5,
+
+	//	 [9] uint  OSX  Output data X stride
+	//	[10] uint  OSY  Output data Y stride
+	//	[11] uint  OSZ  Output data Z stride
+	//	[12] uint  OSW  Output data W stride
+	//	[13] uint  OS5  Output data 5th stride
+	//
+	CLFFT_CB_OSX,
+	CLFFT_CB_OSY,
+	CLFFT_CB_OSZ,
+	CLFFT_CB_OSW,
+	CLFFT_CB_OS5,
+
+	CLFFT_CB_SIZE  = 32,
+};
+
+struct FFTKernelGenKeyParams {
+	/*
+	 *	This structure distills a subset of the fftPlan data,
+	 *	including all information that is used to generate the OpenCL kernel.
+	 *	This structure can be used as a key to reusing kernels that have already
+	 *	been compiled.
+	 */
+	size_t                   fft_DataDim;       // Dimensionality of the data
+	size_t                   fft_N[5];          // [0] is FFT size, e.g. 1024
+	                                            // This must be <= size of LDS!
+	size_t                   fft_inStride [5];  // input strides
+	size_t                   fft_outStride[5];  // output strides
+
+	clfftResultLocation   fft_placeness;
+	clfftLayout           fft_inputLayout;
+	clfftLayout           fft_outputLayout;
+	clfftPrecision        fft_precision;
+	double                   fft_fwdScale;
+	double                   fft_backScale;
+
+	size_t                   fft_SIMD;          // Assume this SIMD/workgroup size
+	size_t                   fft_LDSsize;       // Limit the use of LDS to this many bytes.
+	size_t                   fft_R;             // # of complex values to keep in working registers
+	                                            // SIMD size * R must be <= size of LDS!
+	size_t                   fft_MaxRadix;      // Limit the radix to this value.
+	size_t					 fft_MaxWorkGroupSize; // Limit for work group size
+	bool                     fft_LdsComplex;    // If true, store complex values in LDS memory
+	                                            // If false, store scalare values in LDS.
+	                                            // Generally, false will provide more efficient kernels,
+	                                            // but not always.
+	                                            // see FFTPlan::bLdsComplex and ARBITRARY::LDS_COMPLEX
+	bool                     fft_ldsPadding;    // default padding is false
+	bool                     fft_3StepTwiddle;  // This is one pass of the "3-step" algorithm;
+	                                            // so extra twiddles are applied on output.
+	bool                     fft_UseFMA;        // *** TODO
+	bool                     fft_RCsimple;
+};
+
+
+//	Sorting operator for struct FFTKernelGenKeyParams, such that it can be used in a map
+bool operator<( const FFTKernelGenKeyParams& lhs, const FFTKernelGenKeyParams& rhs);
+
+//	The "envelope" is a set of limits imposed by the hardware
+//	This will depend on the GPU(s) in the OpenCL context.
+//	If there are multiple devices, this should be the least
+//	common denominators.
+//
+struct FFTEnvelope {
+	cl_ulong   limit_LocalMemSize;
+	           //  this is the minimum of CL_DEVICE_LOCAL_MEM_SIZE
+	size_t     limit_Dimensions;
+	           //  this is the minimum of CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
+	size_t     limit_Size[8];
+	           //  these are the minimima of CL_DEVICE_MAX_WORK_ITEM_SIZES[0..n]
+	size_t     limit_WorkGroupSize;
+	           //  this is the minimum of CL_DEVICE_MAX_WORK_GROUP_SIZE
+
+	// ??  CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
+
+	FFTEnvelope ()
+	:	limit_LocalMemSize (0)
+	,	limit_Dimensions (0)
+	,	limit_WorkGroupSize (0)
+	{
+		::memset (& limit_Size, 0, sizeof (limit_Size));
+	}
+};
+
+class FFTRepo;
+
+//	This class contains objects that are specific to a particular FFT transform, and the data herein is useful
+//	for us to know ahead of transform time such that we can optimize for these settings
+class	FFTPlan
+{
+	template <clfftGenerators G>
+	clfftStatus GetWorkSizesPvt (std::vector<size_t> & globalws, std::vector<size_t> & localws) const;
+
+	template <clfftGenerators G>
+	clfftStatus GetKernelGenKeyPvt (FFTKernelGenKeyParams & params) const;
+
+	template <clfftGenerators G>
+	clfftStatus GenerateKernelPvt (FFTRepo& fftRepo) const;
+
+	template <clfftGenerators G>
+	clfftStatus GetMax1DLengthPvt (size_t *longest ) const;
+
+public:
+	bool baked;
+	bool readFromFile;
+
+	//	Properties provided by the user.
+	clfftDim             dim;
+	clfftLayout          inputLayout;
+	clfftLayout          outputLayout;
+	clfftResultLocation  placeness;
+	clfftResultTransposed transposed;
+	clfftPrecision       precision;
+	cl_context              context;
+	double                  forwardScale, backwardScale;
+	size_t                  iDist, oDist;
+	size_t                  batchsize;
+
+	//	Devices that the user specified in the context passed to the create function
+	std::vector< cl_device_id > devices;
+
+	//	Length of the FFT in each dimension
+	std::vector< size_t >	length;
+
+	//	Stride of the FFT in each dimension
+	std::vector< size_t >	inStride, outStride;
+
+	//	Hardware Limits
+	FFTEnvelope                 envelope;
+
+	//	Performance Tuning parameters
+	bool                    bLdsComplex;	// see ARBITRARY::LDS_COMPLEX
+	bool                    ldsPadding;     // see ARBITRARY::LDS_PADDING
+	unsigned                uLdsFraction;	// see ARBITRARY::LDS_FRACTION_IDEAL
+
+	// Reserved copy for large 1d, 2d, and 3d plan
+	size_t tmpBufSize;
+	cl_mem intBuffer;
+
+	// for RC copies
+	size_t	tmpBufSizeRC;
+	cl_mem	intBufferRC;
+
+	// for C-to-R transforms with largeness in Y or Z dimension
+	size_t  tmpBufSizeC2R;
+	cl_mem  intBufferC2R;
+
+	//extra cache size for 2d and 3d
+	size_t  cacheSize;
+	size_t  large1D;
+	bool    large2D;
+	size_t  large1D_Xfactor;
+	clfftPlanHandle planX;
+	clfftPlanHandle planY;
+	clfftPlanHandle planZ;
+
+	bool transflag;
+	clfftPlanHandle planTX;
+	clfftPlanHandle planTY;
+	clfftPlanHandle planTZ; //reserve for 3D transpose
+
+	clfftPlanHandle planRCcopy;
+
+	// Plan resources
+	//
+	cl_mem const_buffer;
+
+	// Generator type
+	clfftGenerators gen;
+
+	// stored binaries
+	size_t number_of_devices;
+
+//TODO caching kernel binaries for later reload
+#if 0
+	std::unique_ptr<size_t[]> binary_sizes;
+	std::vector< std::unique_ptr<char[]> > binaries;
+#endif
+
+	// Real-Complex simple flag
+	// if this is set we do real to-and-from full complex using simple algorithm
+	// where imaginary of input is set to zero in forward and imaginary not written in backward
+	bool RCsimple;
+
+	FFTPlan ()
+	:	baked (false)
+	,	readFromFile (false)
+	,	dim (CLFFT_1D)
+	,	inputLayout (CLFFT_COMPLEX_INTERLEAVED)
+	,	outputLayout (CLFFT_COMPLEX_INTERLEAVED)
+	,	placeness (CLFFT_INPLACE)
+	,   transposed (CLFFT_NOTRANSPOSE)
+	,	precision (CLFFT_SINGLE)
+	,	context (NULL)
+	,	forwardScale (1.0)
+	,	backwardScale (1.0)
+	,	iDist( 1 ), oDist( 1 )
+	,	batchsize (1)
+	,   tmpBufSize (0)
+	,	intBuffer( NULL )
+	,	tmpBufSizeRC (0)
+	,	intBufferRC( NULL )
+	,	tmpBufSizeC2R (0)
+	,	intBufferC2R( NULL )
+	,   large1D(0)
+	,   large2D(false)
+	,   planX( 0 )
+	,   planY( 0 )
+	,   planZ( 0 )
+	,   transflag(false)
+	,	RCsimple(false)
+	,   planTX( 0 )
+	,   planTY( 0 )
+	,   planTZ( 0 )
+	,	planRCcopy(0)
+	,	const_buffer( NULL )
+	,	bLdsComplex (ARBITRARY::LDS_COMPLEX)
+	,   ldsPadding  (ARBITRARY::LDS_PADDING)
+	,	uLdsFraction (0/*ARBITRARY::LDS_FRACTION_IDEAL*/)
+	,   large1D_Xfactor(0)
+	,   cacheSize(0)
+	,	number_of_devices(0)
+	,	gen(Stockham)
+	{};
+
+
+	size_t ElementSize() const;
+
+	clfftStatus AllocateBuffers ();
+	clfftStatus ReleaseBuffers ();
+
+	clfftStatus GetWorkSizes (std::vector<size_t> & globalws, std::vector<size_t> & localws) const;
+	clfftStatus GetKernelGenKey (FFTKernelGenKeyParams & params) const;
+	clfftStatus GenerateKernel (FFTRepo & fftRepo) const;
+	clfftStatus GetMax1DLength (size_t *longest ) const;
+
+	void ResetBinarySizes();
+	void ResetBinaries();
+
+	clfftStatus CompressPlan();
+	clfftStatus ConstructAndEnqueueConstantBuffers( cl_command_queue* commQueueFFT );
+
+	clfftStatus GetEnvelope (const FFTEnvelope **) const;
+	clfftStatus SetEnvelope ();
+
+	~FFTPlan ()
+	{
+		ReleaseBuffers ();
+	}
+};
+
+#endif // AMD_CLFFT_plan_H
+
diff --git a/src/library/private.h b/src/library/private.h
new file mode 100644
index 00000000..fe3770d7
--- /dev/null
+++ b/src/library/private.h
@@ -0,0 +1,342 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_private_H )
+#define CLFFT_private_H
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <cassert>
+#include "../include/clFFT.h"
+#include "../include/unicode.compatibility.h"
+
+#if defined(_MSC_VER)
+	//	Microsoft Visual C++ compiler
+	//
+#define SPRINTF(_buffer, _count, _format,...)                        \
+	_snprintf_s (_buffer, _count, _TRUNCATE, _format, __VA_ARGS__)
+#elif defined( __GNUC__ )
+	//	Gnu G++
+	//
+#define SPRINTF(_buffer, _count, _format,...)                   \
+	{	size_t len = (_count)-1;                                \
+		snprintf (_buffer, len, _format,__VA_ARGS__);           \
+		_buffer[len] = 0;                                       \
+	}
+#else
+#error Unknown/unsupported C++ compiler.
+#endif
+
+//	Creating a portable defintion of countof
+#if defined( _WIN32 )
+	#define countOf _countof
+#else
+	#define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) )
+#endif
+
+#if defined( _WIN32 )
+	#include <intrin.h>
+
+	#if defined( _WIN64 )
+		inline void BSF( unsigned long* index, size_t& mask )
+		{
+			_BitScanForward64( index, mask );
+		}
+
+		inline size_t AtomicAdd( volatile size_t* value, size_t op )
+		{
+			return _InterlockedExchangeAdd64( reinterpret_cast< volatile __int64* >( value ), op );
+		}
+	#else
+		inline void BSF( unsigned long* index, size_t& mask )
+		{
+			_BitScanForward( index, mask );
+		}
+
+		inline size_t AtomicAdd( volatile size_t* value, size_t op )
+		{
+			return _InterlockedExchangeAdd( reinterpret_cast< volatile long* >( value ), op );
+		}
+	#endif
+#elif defined( __GNUC__ )
+	inline void BSF( unsigned long * index, size_t & mask )
+	{
+		*index = __builtin_ctz( mask );
+	}
+
+	inline size_t AtomicAdd( volatile size_t* value, size_t op )
+	{
+		return __sync_fetch_and_add( value, op );
+	}
+#endif
+
+//	This header file is not visible to clients, and contains internal structures and functions for use
+//	by the FFT library.  Since this header is private to this implementation, there is no need to keep
+//	strict C compliance.
+
+//	Enum to help provide descriptive names to array indices, when indexing into our various vectors
+enum clfftDim_Index
+{
+	DimX,				///< 1 Dimension
+	DimY,				///< 2 Dimension
+	DimZ,				///< 3 Dimension
+	DimW,				///< 4th Dimension
+	ENDDIMINDEX			///< This value will always be last, and marks the length of clfftDim_Index
+};
+
+template< typename FileStreamType, typename StringType >
+class tofstreamRAII
+{
+	FileStreamType	outFile;
+	StringType		fileName;
+
+	public:
+		tofstreamRAII( const StringType& name ): fileName( name )
+		{
+			outFile.open( fileName.c_str( ) );
+		}
+
+		~tofstreamRAII( )
+		{
+			outFile.close( );
+		}
+
+		StringType& getName( )
+		{
+			return fileName;
+		}
+
+		void setName( const StringType& name )
+		{
+			fileName = name;
+		}
+
+		FileStreamType& get( )
+		{
+			return outFile;
+		}
+};
+
+//(currently) true if length is a power of 2,3,5
+inline bool IsASupportedLength( size_t length )
+{
+	while( length > 1 )
+	{
+		if( length % 2 == 0 )
+			length /= 2;
+		else if( length % 3 == 0 )
+			length /= 3;
+		else if( length % 5 == 0 )
+			length /= 5;
+		else
+			return false;
+	}
+	return true;
+}
+
+inline tstring clfftErrorStatusAsString( const cl_int& status )
+{
+	switch( status )
+	{
+		case CLFFT_INVALID_GLOBAL_WORK_SIZE:
+			return _T( "CLFFT_INVALID_GLOBAL_WORK_SIZE" );
+		case CLFFT_INVALID_MIP_LEVEL:
+			return _T( "CLFFT_INVALID_MIP_LEVEL" );
+		case CLFFT_INVALID_BUFFER_SIZE:
+			return _T( "CLFFT_INVALID_BUFFER_SIZE" );
+		case CLFFT_INVALID_GL_OBJECT:
+			return _T( "CLFFT_INVALID_GL_OBJECT" );
+		case CLFFT_INVALID_OPERATION:
+			return _T( "CLFFT_INVALID_OPERATION" );
+		case CLFFT_INVALID_EVENT:
+			return _T( "CLFFT_INVALID_EVENT" );
+		case CLFFT_INVALID_EVENT_WAIT_LIST:
+			return _T( "CLFFT_INVALID_EVENT_WAIT_LIST" );
+		case CLFFT_INVALID_GLOBAL_OFFSET:
+			return _T( "CLFFT_INVALID_GLOBAL_OFFSET" );
+		case CLFFT_INVALID_WORK_ITEM_SIZE:
+			return _T( "CLFFT_INVALID_WORK_ITEM_SIZE" );
+		case CLFFT_INVALID_WORK_GROUP_SIZE:
+			return _T( "CLFFT_INVALID_WORK_GROUP_SIZE" );
+		case CLFFT_INVALID_WORK_DIMENSION:
+			return _T( "CLFFT_INVALID_WORK_DIMENSION" );
+		case CLFFT_INVALID_KERNEL_ARGS:
+			return _T( "CLFFT_INVALID_KERNEL_ARGS" );
+		case CLFFT_INVALID_ARG_SIZE:
+			return _T( "CLFFT_INVALID_ARG_SIZE" );
+		case CLFFT_INVALID_ARG_VALUE:
+			return _T( "CLFFT_INVALID_ARG_VALUE" );
+		case CLFFT_INVALID_ARG_INDEX:
+			return _T( "CLFFT_INVALID_ARG_INDEX" );
+		case CLFFT_INVALID_KERNEL:
+			return _T( "CLFFT_INVALID_KERNEL" );
+		case CLFFT_INVALID_KERNEL_DEFINITION:
+			return _T( "CLFFT_INVALID_KERNEL_DEFINITION" );
+		case CLFFT_INVALID_KERNEL_NAME:
+			return _T( "CLFFT_INVALID_KERNEL_NAME" );
+		case CLFFT_INVALID_PROGRAM_EXECUTABLE:
+			return _T( "CLFFT_INVALID_PROGRAM_EXECUTABLE" );
+		case CLFFT_INVALID_PROGRAM:
+			return _T( "CLFFT_INVALID_PROGRAM" );
+		case CLFFT_INVALID_BUILD_OPTIONS:
+			return _T( "CLFFT_INVALID_BUILD_OPTIONS" );
+		case CLFFT_INVALID_BINARY:
+			return _T( "CLFFT_INVALID_BINARY" );
+		case CLFFT_INVALID_SAMPLER:
+			return _T( "CLFFT_INVALID_SAMPLER" );
+		case CLFFT_INVALID_IMAGE_SIZE:
+			return _T( "CLFFT_INVALID_IMAGE_SIZE" );
+		case CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR:
+			return _T( "CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR" );
+		case CLFFT_INVALID_MEM_OBJECT:
+			return _T( "CLFFT_INVALID_MEM_OBJECT" );
+		case CLFFT_INVALID_HOST_PTR:
+			return _T( "CLFFT_INVALID_HOST_PTR" );
+		case CLFFT_INVALID_COMMAND_QUEUE:
+			return _T( "CLFFT_INVALID_COMMAND_QUEUE" );
+		case CLFFT_INVALID_QUEUE_PROPERTIES:
+			return _T( "CLFFT_INVALID_QUEUE_PROPERTIES" );
+		case CLFFT_INVALID_CONTEXT:
+			return _T( "CLFFT_INVALID_CONTEXT" );
+		case CLFFT_INVALID_DEVICE:
+			return _T( "CLFFT_INVALID_DEVICE" );
+		case CLFFT_INVALID_PLATFORM:
+			return _T( "CLFFT_INVALID_PLATFORM" );
+		case CLFFT_INVALID_DEVICE_TYPE:
+			return _T( "CLFFT_INVALID_DEVICE_TYPE" );
+		case CLFFT_INVALID_VALUE:
+			return _T( "CLFFT_INVALID_VALUE" );
+		case CLFFT_MAP_FAILURE:
+			return _T( "CLFFT_MAP_FAILURE" );
+		case CLFFT_BUILD_PROGRAM_FAILURE:
+			return _T( "CLFFT_BUILD_PROGRAM_FAILURE" );
+		case CLFFT_IMAGE_FORMAT_NOT_SUPPORTED:
+			return _T( "CLFFT_IMAGE_FORMAT_NOT_SUPPORTED" );
+		case CLFFT_IMAGE_FORMAT_MISMATCH:
+			return _T( "CLFFT_IMAGE_FORMAT_MISMATCH" );
+		case CLFFT_MEM_COPY_OVERLAP:
+			return _T( "CLFFT_MEM_COPY_OVERLAP" );
+		case CLFFT_PROFILING_INFO_NOT_AVAILABLE:
+			return _T( "CLFFT_PROFILING_INFO_NOT_AVAILABLE" );
+		case CLFFT_OUT_OF_HOST_MEMORY:
+			return _T( "CLFFT_OUT_OF_HOST_MEMORY" );
+		case CLFFT_OUT_OF_RESOURCES:
+			return _T( "CLFFT_OUT_OF_RESOURCES" );
+		case CLFFT_MEM_OBJECT_ALLOCATION_FAILURE:
+			return _T( "CLFFT_MEM_OBJECT_ALLOCATION_FAILURE" );
+		case CLFFT_COMPILER_NOT_AVAILABLE:
+			return _T( "CLFFT_COMPILER_NOT_AVAILABLE" );
+		case CLFFT_DEVICE_NOT_AVAILABLE:
+			return _T( "CLFFT_DEVICE_NOT_AVAILABLE" );
+		case CLFFT_DEVICE_NOT_FOUND:
+			return _T( "CLFFT_DEVICE_NOT_FOUND" );
+		case CLFFT_SUCCESS:
+			return _T( "CLFFT_SUCCESS" );
+		case CLFFT_NOTIMPLEMENTED:
+			return _T( "CLFFT_NOTIMPLEMENTED" );
+		case CLFFT_FILE_NOT_FOUND:
+			return _T( "CLFFT_FILE_NOT_FOUND" );
+		case CLFFT_FILE_CREATE_FAILURE:
+			return _T( "CLFFT_FILE_CREATE_FAILURE" );
+		case CLFFT_VERSION_MISMATCH:
+			return _T( "CLFFT_VERSION_MISMATCH" );
+		case CLFFT_INVALID_PLAN:
+			return _T( "CLFFT_INVALID_PLAN" );
+		default:
+			return _T( "Error code not defined" );
+		break;
+	}
+}
+
+//	This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
+//	If an error occurs, we issue a return statement to exit the calling function.
+#define OPENCL_V( fn, msg ) \
+{ \
+	clfftStatus vclStatus = static_cast< clfftStatus >( fn ); \
+	switch( vclStatus ) \
+	{ \
+		case	CL_SUCCESS:		/**< No error */ \
+			break; \
+		default: \
+		{ \
+			terr << _T( "OPENCL_V< " ); \
+			terr << clfftErrorStatusAsString( vclStatus ); \
+			terr << _T( " > (" )<< static_cast<unsigned>( __LINE__ ) << _T( "): " ); \
+			terr << msg << std::endl; \
+			return	vclStatus; \
+		} \
+	} \
+}
+
+static inline bool IsPo2 (size_t u) {
+	return (u != 0) &&  (0 == (u & (u-1)));
+}
+
+template<typename T>
+static inline T DivRoundingUp (T a, T b) {
+	return (a + (b-1)) / b;
+}
+
+static inline size_t BitScanF (size_t n) {
+	assert (n != 0);
+	unsigned long tmp = 0;
+	BSF (& tmp, n);
+	return (size_t) tmp;
+}
+
+#define ARG_CHECK(_proposition)	\
+{ bool btmp = (_proposition);	assert (btmp); if (! btmp)	return CLFFT_INVALID_ARG_VALUE; }
+
+#define BUG_CHECK(_proposition)	\
+	{ bool btmp = (_proposition);	assert (btmp); if (! btmp)	return CLFFT_BUGCHECK; }
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CLFFTAPI clfftStatus clfftLocalMemSize( const clfftPlanHandle plHandle, cl_ulong* local_mem_size );
+
+/*! @brief Save to disk a file containing the contents of a baked plan.
+*  @details A plan is a repository of state for calculating FFT's. Saves the details for a plan to allow the user
+*	to easily recreate a plan and execute it without having to first build the kernel.
+*  @param[in] plHandle Handle to the plan to be written to disk
+*  @param[in] filename The desired name of the output file for the stored plan
+*  @return Enum describing error condition; superset of OpenCL error codes
+*/
+CLFFTAPI clfftStatus	clfftWritePlanToDisk( clfftPlanHandle plHandle, const char* filename );
+
+/*! @brief Read from disk a file containing the contents of a baked plan.
+*  @details A plan is a repository of state for calculating FFT's. Reads the details for a plan from a file on disk and duplicates
+*	the plan in the provided plan handle.
+*  @param[out] plHandle Handle to the plan to be set to details from the file
+*  @param[in] filename The name of the file containing the stored plan
+*  @return Enum describing error condition; superset of OpenCL error codes
+*/
+CLFFTAPI clfftStatus	clfftReadPlanFromDisk( clfftPlanHandle plHandle, const char* filename );
+
+/* internal api to set up some plan paramters */
+CLFFTAPI clfftStatus clfftSetInternal( const clfftPlanHandle plHandle, void* dataInternal );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/library/repo.cpp b/src/library/repo.cpp
new file mode 100644
index 00000000..018dff5c
--- /dev/null
+++ b/src/library/repo.cpp
@@ -0,0 +1,320 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.repo.cpp : Defines the entry point for the console application.
+//
+
+#include "stdafx.h"
+#include "repo.h"
+
+using std::map;
+using std::string;
+
+//	Static initialization of the repo lock variable
+lockRAII FFTRepo::lockRepo( _T( "FFTRepo" ) );
+
+//	Static initialization of the plan count variable
+size_t FFTRepo::planCount	= 1;
+
+//	Handle/Address of the dynamic module that contains the timer, that we discover and load during runtime
+void* FFTRepo::timerHandle	= NULL;
+GpuStatTimer* FFTRepo::pStatTimer	= NULL;
+
+clfftStatus FFTRepo::releaseResources( )
+{
+	scopedLock sLock( lockRepo, _T( "releaseResources" ) );
+
+	//	Release all handles to Kernels
+	//
+	for(Kernel_iterator iKern = mapKernels.begin( ); iKern != mapKernels.end( ); ++iKern )
+	{
+		cl_kernel k = iKern->second.kernel_fwd;
+		iKern->second.kernel_fwd = NULL;
+		if (NULL != k)
+			clReleaseKernel( k );
+		k = iKern->second.kernel_back;
+		iKern->second.kernel_back = NULL;
+		if (NULL != k)
+			clReleaseKernel( k );
+	}
+	mapKernels.clear( );
+
+	//	Release all handles to programs
+	//
+	for (fftRepo_iterator iProg = mapFFTs.begin( ); iProg != mapFFTs.end( ); ++iProg )
+	{
+		cl_program p = iProg->second.clProgram;
+		iProg->second.clProgram = NULL;
+		if (NULL != p)
+			clReleaseProgram (p);
+	}
+
+	//	Free all memory allocated in the repoPlans; represents cached plans that were not destroyed by the client
+	//
+	for( repoPlansType::iterator iter = repoPlans.begin( ); iter != repoPlans.end( ); ++iter )
+	{
+		FFTPlan* plan	= iter->second.first;
+		lockRAII* lock	= iter->second.second;
+		if( plan != NULL )
+		{
+			delete plan;
+		}
+		if( lock != NULL )
+		{
+			delete lock;
+		}
+	}
+
+	//	Reset the plan count to zero because we are guaranteed to have destroyed all plans
+	planCount	= 1;
+
+	//	Release all strings
+	mapFFTs.clear( );
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel )
+{
+	scopedLock sLock( lockRepo, _T( "setProgramCode" ) );
+
+	std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
+
+	// Prefix copyright statement at the top of generated kernels
+	std::stringstream ss;
+	ss << std::endl << std::endl;
+	ss << "// Copyright (C) 2010-2013 Advanced Micro Devices, Inc. All Rights Reserved." << std::endl << std::endl;
+	std::string prefixCopyright = ss.str();
+
+	mapFFTs[ key ].ProgramString = prefixCopyright + kernel;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel )
+{
+	scopedLock sLock( lockRepo, _T( "getProgramCode" ) );
+
+	std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
+
+	fftRepo_iterator pos = mapFFTs.find( key);
+	if( pos == mapFFTs.end( ) )
+		return	CLFFT_FILE_NOT_FOUND;
+
+	kernel = pos->second.ProgramString;
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
+	const char * kernel_fwd, const char * kernel_back )
+{
+	scopedLock sLock( lockRepo, _T( "setProgramEntryPoints" ) );
+
+	std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
+
+	fftRepoValue& fft  = mapFFTs[ key ];
+	fft.EntryPoint_fwd  = kernel_fwd;
+	fft.EntryPoint_back = kernel_back;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
+			clfftDirection dir, std::string& kernel )
+{
+	scopedLock sLock( lockRepo, _T( "getProgramEntryPoint" ) );
+
+	std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
+
+	fftRepo_iterator pos = mapFFTs.find( key );
+	if( pos == mapFFTs.end( ) )
+		return	CLFFT_FILE_NOT_FOUND;
+
+	switch (dir) {
+	case CLFFT_FORWARD:
+		kernel = pos->second.EntryPoint_fwd;
+		break;
+	case CLFFT_BACKWARD:
+		kernel = pos->second.EntryPoint_back;
+		break;
+	default:
+		assert (false);
+		return CLFFT_INVALID_ARG_VALUE;
+	}
+
+	if (0 == kernel.size())
+		return	CLFFT_FILE_NOT_FOUND;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const cl_program& prog )
+{
+	scopedLock sLock( lockRepo, _T( "setclProgram" ) );
+
+	std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
+
+	fftRepo_iterator pos = mapFFTs.find( key );
+	if( pos == mapFFTs.end( ) )
+		mapFFTs[ key ].clProgram = prog;
+	else {
+		cl_program p = pos->second.clProgram;
+		assert (NULL == p);
+		if (NULL != p)
+			clReleaseProgram (p);
+		pos->second.clProgram = prog;
+	}
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog )
+{
+	scopedLock sLock( lockRepo, _T( "getclProgram" ) );
+
+	std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
+
+	fftRepo_iterator pos = mapFFTs.find( key );
+	if( pos == mapFFTs.end( ) )
+		return	CLFFT_INVALID_PROGRAM;
+	prog = pos->second.clProgram;
+	if (NULL == prog)
+		return	CLFFT_INVALID_PROGRAM;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::setclKernel( cl_program prog, clfftDirection dir, const cl_kernel& kernel )
+{
+	scopedLock sLock( lockRepo, _T( "setclKernel" ) );
+
+	fftKernels & Kernels = mapKernels[ prog ];
+
+	cl_kernel * pk;
+	switch (dir) {
+	case CLFFT_FORWARD:
+		pk = & Kernels.kernel_fwd;
+		break;
+	case CLFFT_BACKWARD:
+		pk = & Kernels.kernel_back;
+		break;
+	default:
+		assert (false);
+		return CLFFT_INVALID_ARG_VALUE;
+	}
+
+	assert (NULL == *pk);
+	if (NULL != *pk)
+		clReleaseKernel( *pk );
+
+	 *pk = kernel;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::getclKernel( cl_program prog, clfftDirection dir, cl_kernel& kernel )
+{
+	scopedLock sLock( lockRepo, _T( "getclKernel" ) );
+
+	Kernel_iterator pos = mapKernels.find( prog );
+	if (pos == mapKernels.end( ) )
+		return	CLFFT_INVALID_KERNEL;
+
+	switch (dir) {
+	case CLFFT_FORWARD:
+		kernel = pos->second.kernel_fwd;
+		break;
+	case CLFFT_BACKWARD:
+		kernel = pos->second.kernel_back;
+		break;
+	default:
+		assert (false);
+		return CLFFT_INVALID_ARG_VALUE;
+	}
+
+	if (NULL == kernel)
+		return	CLFFT_INVALID_KERNEL;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::createPlan( clfftPlanHandle* plHandle, FFTPlan*& fftPlan )
+{
+	scopedLock sLock( lockRepo, _T( "insertPlan" ) );
+
+	//	We keep track of this memory in our own collection class, to make sure it's freed in releaseResources
+	//	The lifetime of a plan is tracked by the client and is freed when the client calls ::clfftDestroyPlan()
+	fftPlan	= new FFTPlan;
+
+	//	We allocate a new lock here, and expect it to be freed in ::clfftDestroyPlan();
+	//	The lifetime of the lock is the same as the lifetime of the plan
+	lockRAII* lockPlan	= new lockRAII;
+
+	//	Add and remember the fftPlan in our map
+	repoPlans[ planCount ] = std::make_pair( fftPlan, lockPlan );
+
+	//	Assign the user handle the plan count (unique identifier), and bump the count for the next plan
+	*plHandle	= planCount++;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::getPlan( clfftPlanHandle plHandle, FFTPlan*& fftPlan, lockRAII*& planLock )
+{
+	scopedLock sLock( lockRepo, _T( "getPlan" ) );
+
+	//	First, check if we have already created a plan with this exact same FFTPlan
+	repoPlansType::iterator iter	= repoPlans.find( plHandle );
+	if( iter == repoPlans.end( ) )
+		return	CLFFT_INVALID_PLAN;
+
+	//	If plan is valid, return fill out the output pointers
+	fftPlan		= iter->second.first;
+	planLock	= iter->second.second;
+
+	return	CLFFT_SUCCESS;
+}
+
+clfftStatus FFTRepo::deletePlan( clfftPlanHandle* plHandle )
+{
+	scopedLock sLock( lockRepo, _T( "deletePlan" ) );
+
+	//	First, check if we have already created a plan with this exact same FFTPlan
+	repoPlansType::iterator iter	= repoPlans.find( *plHandle );
+	if( iter == repoPlans.end( ) )
+		return	CLFFT_INVALID_PLAN;
+
+	//	We lock the plan object while we are in the process of deleting it
+	{
+		scopedLock sLock( *iter->second.second, _T( "clfftDestroyPlan" ) );
+		clReleaseContext( iter->second.first->context );
+
+		//	Delete the FFTPlan
+		delete iter->second.first;
+	}
+
+		//	Delete the lockRAII
+	delete iter->second.second;
+
+	//	Remove entry from our map object
+	repoPlans.erase( iter );
+
+	//	Clear the client's handle to signify that the plan is gone
+	*plHandle = 0;
+
+	return	CLFFT_SUCCESS;
+}
diff --git a/src/library/repo.h b/src/library/repo.h
new file mode 100644
index 00000000..f68242de
--- /dev/null
+++ b/src/library/repo.h
@@ -0,0 +1,158 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_repo_H )
+#define CLFFT_repo_H
+#include <map>
+#include "private.h"
+#include "plan.h"
+#include "lock.h"
+#include "../statTimer/statisticalTimer.GPU.h"
+
+
+
+//	This class contains objects that we wish to retain between individual calls into the FFT interface.
+//	These objects will be shared across different individual FFT plans, and we wish to keep only one
+//	copy of these programs, objects and events.  When the client decides that they either want to reset
+//	the library or release all resources, this Repo will release all acquired resources and clean itself
+//	up as much as it can.  It is implemented as a Singleton object.
+class	FFTRepo
+{
+	//	Structure containing all the data we need to remember for a specific invokation of a kernel
+	//	generator
+	struct fftRepoValue {
+		std::string ProgramString;
+		std::string EntryPoint_fwd;
+		std::string EntryPoint_back;
+		cl_program  clProgram;
+
+		fftRepoValue ()
+		:	clProgram (NULL)
+		{}
+	};
+
+	//	Map structure to map parameters that a generator uses to a specific set of kernels that the generator
+	//	has created
+	typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey;
+	typedef std::map< fftRepoKey, fftRepoValue > fftRepoType;
+	typedef fftRepoType::iterator fftRepo_iterator;
+
+	fftRepoType	mapFFTs;
+
+	struct fftKernels {
+		cl_kernel kernel_fwd;
+		cl_kernel kernel_back;
+
+		fftKernels ()
+		:	kernel_fwd (NULL)
+		,	kernel_back (NULL)
+		{}
+	};
+
+	typedef std::map< cl_program, fftKernels > mapKernelType;
+	typedef mapKernelType::iterator Kernel_iterator;
+	mapKernelType mapKernels;
+
+	//	All plans that the user creates over the course of using the library are stored here.
+	//	Plans can be arbitrarily created and destroyed at anytime by the user, in arbitrary order, so vector
+	//	does not seem appropriate, so a map was chosen because of the O(log N) search properties
+	//	A lock object is created for each plan, such that any getter/setter can lock the 'plan' object before
+	//	reading/writing its values.  The lock object is kept seperate from the plan object so that the lock
+	//	object can be held the entire time a plan is getting destroyed in clfftDestroyPlan.
+	typedef std::pair< FFTPlan*, lockRAII* > repoPlansValue;
+	typedef std::map< clfftPlanHandle, repoPlansValue > repoPlansType;
+	repoPlansType repoPlans;
+
+	//	Static count of how many plans we have generated; always incrementing during the life of the library
+	//	This is used as a unique identifier for plans
+	static size_t planCount;
+
+	// Private constructor to stop explicit instantiation
+	FFTRepo( )
+	{}
+
+	// Private copy constructor to stop implicit instantiation
+	FFTRepo( const FFTRepo& );
+
+	// Private operator= to assure only 1 copy of singleton
+	FFTRepo& operator=( const FFTRepo& );
+
+	~FFTRepo( )
+	{
+		//	NOTE:  We can't release resources in our destructor because as a static object, the order of destruction of static objects
+		//	is not guaranteed, and openCL might already have cleaned itself up.  When clFFT tries to free its resources, an access
+		//	violation could occur.
+		//releaseResources( );
+
+		//	We should at least print out a warning message to the user if we are in our destructor and we still have resources
+		//	bound.  This should give the user a clue to remember to call clfftTeardown( )
+		if( (!mapKernels.empty( )) || (!mapFFTs.empty( )) )
+		{
+			terr << _T( "Warning:  Program terminating, but clFFT resources not freed." ) << std::endl;
+			terr << _T( "Please consider explicitly calling clfftTeardown( )." ) << std::endl;
+		}
+	};
+
+public:
+	//	Used to make the FFTRepo struct thread safe; STL is not thread safe by default
+	//	Optimally, we could use a lock object per STL struct, as two different STL structures
+	//	can be modified at the same time, but a single lock object is easier and performance should
+	//	still be good
+	static lockRAII lockRepo;
+
+	//	Our runtime library can instrument kernel timings with a GPU timer available in a shared module
+	//	Handle/Address of the dynamic module that contains timers
+	static void* timerHandle;
+
+	//	Pointer to the timer class queried from the timer shared library
+	static GpuStatTimer* pStatTimer;
+
+	//	Global debug flags that the FFT engine can check to control various debug logic
+	clfftSetupData setupData;
+
+	//	Everybody who wants to access the Repo calls this function to get a repo reference
+	static FFTRepo& getInstance( )
+	{
+		static FFTRepo fftRepo;
+		return	fftRepo;
+	};
+
+	clfftStatus releaseResources( );
+
+	clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel );
+	clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel );
+
+	clfftStatus setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
+		const char * kernel_fwd, const char * kernel_back );
+	clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel );
+
+	clfftStatus setclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const cl_program& kernel );
+	clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel );
+
+	clfftStatus setclKernel ( cl_program prog, clfftDirection dir, const cl_kernel& kernel );
+	clfftStatus getclKernel ( cl_program prog, clfftDirection dir, cl_kernel& kernel );
+
+	clfftStatus createPlan( clfftPlanHandle* plHandle, FFTPlan*& fftPlan );
+	clfftStatus getPlan( clfftPlanHandle plHandle, FFTPlan*& fftPlan, lockRAII*& planLock );
+	clfftStatus deletePlan( clfftPlanHandle* plHandle );
+
+};
+
+
+#endif
+
diff --git a/src/library/stdafx.cpp b/src/library/stdafx.cpp
new file mode 100644
index 00000000..d87a55d8
--- /dev/null
+++ b/src/library/stdafx.cpp
@@ -0,0 +1,24 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// stdafx.cpp : source file that includes just the standard includes
+// clfft.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// Reference any additional headers you need in STDAFX.H and not in this file
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
new file mode 100644
index 00000000..e53e830f
--- /dev/null
+++ b/src/library/transform.cpp
@@ -0,0 +1,1221 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// clfft.transform.cpp : Defines the entry point for the console application.
+//
+
+#include "stdafx.h"
+#include "private.h"
+#include "repo.h"
+#include "plan.h"
+
+//#define DEBUGGING
+
+using std::vector;
+
+clfftStatus clfftEnqueueTransform(
+											clfftPlanHandle plHandle,
+											clfftDirection dir,
+											cl_uint numQueuesAndEvents,
+											cl_command_queue* commQueues,
+											cl_uint numWaitEvents,
+											const cl_event* waitEvents,
+											cl_event* outEvents,
+											cl_mem* clInputBuffers,
+											cl_mem* clOutputBuffers,
+											cl_mem clTmpBuffers
+											)
+{
+	cl_int status = CLFFT_SUCCESS;
+
+	//	We do not currently support multiple command queues, which is necessary to support multi-gpu operations
+	if( numQueuesAndEvents > 1 )
+	{
+		return CLFFT_NOTIMPLEMENTED;
+	}
+
+	FFTRepo& fftRepo	= FFTRepo::getInstance( );
+	FFTPlan* fftPlan	= NULL;
+	lockRAII* planLock	= NULL;
+
+	//	At this point, the user wants to enqueue a plan to execute.  We lock the plan down now, such that
+	//	after we finish baking the plan (if the user did not do that explicitely before), the plan cannot
+	//	change again through the action of other thread before we enqueue this plan for execution.
+	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
+	scopedLock sLock( *planLock, _T( "clfftGetPlanBatchSize" ) );
+
+	if( fftPlan->baked == false )
+	{
+		OPENCL_V( clfftBakePlan( plHandle, numQueuesAndEvents, commQueues, NULL, NULL ), _T( "Failed to bake plan" ) );
+	}
+
+	if		(fftPlan->inputLayout == CLFFT_REAL)	dir = CLFFT_FORWARD;
+	else if	(fftPlan->outputLayout == CLFFT_REAL)	dir = CLFFT_BACKWARD;
+
+
+	// we do not check the user provided buffer at this release
+	cl_mem localIntBuffer = clTmpBuffers;
+
+	if( clTmpBuffers == NULL && fftPlan->tmpBufSize > 0 && fftPlan->intBuffer == NULL)
+	{
+		// create the intermediate buffers
+		// The intermediate buffer is always interleave and packed
+		// For outofplace operation, we have the choice not to create intermediate buffer
+		// input ->(col+Transpose) output ->(col) output
+		fftPlan->intBuffer = clCreateBuffer( fftPlan->context, CL_MEM_READ_WRITE,
+			fftPlan->tmpBufSize, 0, &status);
+		OPENCL_V( status, _T("Creating the intermediate buffer for large1D Failed") );
+
+#if defined(DEBUGGING)
+		std::cout << "One intermediate buffer is created" << std::endl;
+#endif
+	}
+
+	if( localIntBuffer == NULL && fftPlan->intBuffer != NULL )
+		localIntBuffer = fftPlan->intBuffer;
+
+	if( fftPlan->intBufferRC == NULL && fftPlan->tmpBufSizeRC > 0 )
+	{
+		fftPlan->intBufferRC = clCreateBuffer( fftPlan->context, CL_MEM_READ_WRITE, fftPlan->tmpBufSizeRC, 0, &status);
+		OPENCL_V( status, _T("Creating the intermediate buffer for large1D RC Failed") );
+	}
+
+	if( fftPlan->intBufferC2R == NULL && fftPlan->tmpBufSizeC2R > 0 )
+	{
+		fftPlan->intBufferC2R = clCreateBuffer( fftPlan->context, CL_MEM_READ_WRITE, fftPlan->tmpBufSizeC2R, 0, &status);
+		OPENCL_V( status, _T("Creating the intermediate buffer for large1D YZ C2R Failed") );
+	}
+
+	//	The largest vector we can transform in a single pass
+	//	depends on the GPU caps -- especially the amount of LDS
+	//	available
+	//
+	size_t Large1DThreshold = 0;
+	OPENCL_V(fftPlan->GetMax1DLength (&Large1DThreshold), _T("GetMax1DLength failed"));
+	BUG_CHECK (Large1DThreshold > 1);
+
+	//Large1DThreshold = 128;
+
+	if(fftPlan->gen != Copy)
+	switch( fftPlan->dim )
+	{
+		case CLFFT_1D:
+		{
+			if (fftPlan->length[0] <= Large1DThreshold)
+				break;
+
+			if( fftPlan->inputLayout == CLFFT_REAL )
+			{
+				cl_event colOutEvents = NULL;
+				cl_event copyInEvents = NULL;
+
+				// First pass
+				// column with twiddle first, OUTOFPLACE, + transpose
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_FORWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+					waitEvents, &colOutEvents, clInputBuffers, &(fftPlan->intBufferRC), localIntBuffer),
+					_T("clfftEnqueueTransform large1D col pass failed"));
+
+
+				// another column FFT output, INPLACE
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+					&copyInEvents, &(fftPlan->intBufferRC), &(fftPlan->intBufferRC), localIntBuffer ),
+					_T("clfftEnqueueTransform large1D second column failed"));
+				clReleaseEvent(colOutEvents);
+
+				cl_mem *out_local;
+				out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+
+				// copy from full complex to hermitian
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planRCcopy, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &copyInEvents,
+					outEvents, &(fftPlan->intBufferRC), out_local, localIntBuffer ),
+					_T("clfftEnqueueTransform large1D RC copy failed"));
+				clReleaseEvent(copyInEvents);
+
+				return	CLFFT_SUCCESS;
+
+			}
+			else if( fftPlan->outputLayout == CLFFT_REAL )
+			{
+				cl_event colOutEvents = NULL;
+				cl_event copyOutEvents = NULL;
+
+				// copy from hermitian to full complex
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planRCcopy, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+					waitEvents, &copyOutEvents, clInputBuffers, &(fftPlan->intBufferRC), localIntBuffer ),
+					_T("clfftEnqueueTransform large1D RC copy failed"));
+
+				// First pass
+				// column with twiddle first, INPLACE,
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1,
+					&copyOutEvents, &colOutEvents, &(fftPlan->intBufferRC), &(fftPlan->intBufferRC), localIntBuffer),
+					_T("clfftEnqueueTransform large1D col pass failed"));
+				clReleaseEvent(copyOutEvents);
+
+				cl_mem *out_local;
+				out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+
+				// another column FFT output, OUTOFPLACE + transpose
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+					outEvents, &(fftPlan->intBufferRC), out_local, localIntBuffer ),
+					_T("clfftEnqueueTransform large1D second column failed"));
+				clReleaseEvent(colOutEvents);
+
+
+				return	CLFFT_SUCCESS;
+			}
+			else
+			{
+#if defined(DEBUGGING)
+				// For debugging interleave data only, initialize the intermediate buffer
+				// to a data pattern.  This will show which data in the buffer
+				// are being written by the kernel
+				//
+				size_t buffSizeBytes_complex = fftPlan->tmpBufSize;
+				size_t buffersize = buffSizeBytes_complex/sizeof( std::complex< float > );
+				std::vector<std::complex< float> > temp(buffersize);
+
+				for (size_t u = 0; u < buffersize; ++u) {
+					temp[u] = std::complex<float> (float(u+1), float(buffersize-u));
+				}
+
+				if (fftPlan->large1D == 0)
+				{
+					//First time usage, we can initialize tmp buffer
+					OPENCL_V(clEnqueueWriteBuffer( *commQueues,
+						localIntBuffer,
+						1,		// blocking write
+						0,
+						buffSizeBytes_complex,
+						&temp[0],
+						0,
+						NULL,
+						NULL), _T("clEnqueueWriteBuffer failed") );
+				}
+#endif
+
+				if (fftPlan->transflag)
+				{
+					//First transpose
+					// Input->tmp
+					cl_event transTXOutEvents = NULL;
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &transTXOutEvents, clInputBuffers, &localIntBuffer, NULL ),
+						_T("clfftEnqueueTransform for large1D transTX failed"));
+
+					cl_mem *mybuffers;
+					if (fftPlan->placeness==CLFFT_INPLACE)
+						mybuffers = clInputBuffers;
+					else
+						mybuffers = clOutputBuffers;
+
+					//First Row
+					//tmp->output
+					cl_event rowXOutEvents = NULL;
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1,
+						&transTXOutEvents, &rowXOutEvents, &localIntBuffer, mybuffers, NULL ),
+						_T("clfftEnqueueTransform for large1D rowX failed"));
+					clReleaseEvent(transTXOutEvents);
+
+					//Second Transpose
+					// output->tmp
+					cl_event transTYOutEvents = NULL;
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1,
+						&rowXOutEvents, &transTYOutEvents, mybuffers, &localIntBuffer, NULL ),
+						_T("clfftEnqueueTransform for large1D transTY failed"));
+					clReleaseEvent(rowXOutEvents);
+
+					//Second Row
+					//tmp->tmp, inplace
+					cl_event rowYOutEvents = NULL;
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1,
+						&transTYOutEvents, &rowYOutEvents, &localIntBuffer, NULL, NULL ),
+						_T("clfftEnqueueTransform for large1D rowY failed"));
+					clReleaseEvent(transTYOutEvents);
+
+					//Third Transpose
+					// tmp->output
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTZ, dir, numQueuesAndEvents, commQueues, 1,
+						&rowYOutEvents, outEvents, &localIntBuffer, mybuffers, NULL ),
+						_T("clfftEnqueueTransform for large1D transTZ failed"));
+					clReleaseEvent(rowYOutEvents);
+
+					if( fftRepo.pStatTimer )
+					{
+						fftRepo.pStatTimer->AddSample( plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >( ) );
+					}
+
+					return	CLFFT_SUCCESS;
+				}
+
+				cl_event colOutEvents = NULL;
+				if (fftPlan->large1D == 0)
+				{
+					// First pass
+					// column with twiddle first, OUTOFPLACE, + transpose
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &colOutEvents, clInputBuffers, &localIntBuffer, localIntBuffer),
+						_T("clfftEnqueueTransform large1D col pass failed"));
+
+#if defined(DEBUGGING)
+					// debug purpose, interleave input <-> interleave output
+					// read the intermediate buffer and print part of it.
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes_complex, &temp[ 0 ], 1,
+						&colOutEvents, NULL ),
+						_T("Reading the result buffer failed") );
+					{
+						FFTPlan* fftPlanX	= NULL;
+						lockRAII* planLockX	= NULL;
+						OPENCL_V( fftRepo.getPlan( fftPlan->planX, fftPlanX, planLockX ), _T( "fftRepo.getPlan failed" ) );
+
+						size_t rows = fftPlanX->length[0];
+						size_t cols = fftPlanX->batchsize;
+						BUG_CHECK (rows * cols <= temp.size())
+						size_t print_cols = std::min<size_t> (4, cols);
+						size_t print_rows = std::min<size_t> (4, rows);
+						//std::cout << std::endl << "Intermediate buffer:" << std::endl;
+						//for (size_t jrow = 0; jrow < print_rows; ++jrow) {
+						//	for (size_t icol = 0; icol < print_cols; ++icol) {
+						//		size_t index = jrow *cols + icol;
+						//		std::complex<float> data = temp[index];
+						//		std::cout << data;
+						//	}
+						//	std::cout << std::endl;
+						//}
+					}
+#endif
+
+					//another column FFT output, OUTOFPLACE
+					if (fftPlan->placeness == CLFFT_INPLACE)
+					{
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+							outEvents, &localIntBuffer, clInputBuffers, localIntBuffer ),
+							_T("clfftEnqueueTransform large1D second column failed"));
+
+#if defined(DEBUGGING)
+						//  For debugging interleave data only,
+						//  read the input buffer back into memory.
+						OPENCL_V( clEnqueueReadBuffer( *commQueues, clInputBuffers[0], CL_TRUE, 0, buffSizeBytes_complex, &temp[ 0 ], 1,
+							outEvents, NULL ),
+							_T("Reading the result buffer failed") );
+#endif
+					}
+					else
+					{
+#if defined(DEBUGGING)
+					// debug purpose, interleave input <-> interleave output
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes_complex, &temp[ 0 ], 1,
+						&colOutEvents, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+							outEvents, &localIntBuffer, clOutputBuffers, localIntBuffer ),
+							_T("clfftEnqueueTransform large1D second column failed"));
+
+#if defined(DEBUGGING)
+						//  For debugging interleave data only, read back the output buffer
+						//
+						OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes_complex, &temp[ 0 ], 1,
+							outEvents, NULL ),
+							_T("Reading the result buffer failed") );
+#endif
+					}
+				}
+				else
+				{
+					// second pass for huge 1D
+					// column with twiddle first, OUTOFPLACE, + transpose
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &colOutEvents, &localIntBuffer, clOutputBuffers, localIntBuffer),
+						_T("clfftEnqueueTransform Huge1D col pass failed"));
+#if defined(DEBUGGING)
+					// debug purpose, interleave input <-> interleave output
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes_complex, &temp[ 0 ], 1,
+						&colOutEvents, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+						outEvents, clOutputBuffers, clOutputBuffers, localIntBuffer ),
+						_T("clfftEnqueueTransform large1D second column failed"));
+
+				}
+
+				clReleaseEvent(colOutEvents);
+
+				if( fftRepo.pStatTimer )
+				{
+					fftRepo.pStatTimer->AddSample( plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >( ) );
+				}
+
+				return	CLFFT_SUCCESS;
+			}
+			break;
+		}
+		case CLFFT_2D:
+		{
+			// if transpose kernel, we will fall below
+			if (fftPlan->transflag && !(fftPlan->planTX)) break;
+
+			cl_event rowOutEvents = NULL;
+
+#if defined(DEBUGGING)
+			size_t buffersize = fftPlan->length[0] * fftPlan->length[1] * fftPlan->batchsize;
+			if (fftPlan->length.size() > 2) buffersize *= fftPlan->length[2];
+			//size_t buffSizeBytes=sizeof( std::complex< float > )*buffersize;
+			//std::vector< std::complex< float > > output2( buffersize );
+			size_t buffSizeBytes=sizeof( float) * buffersize;
+			std::vector<float> output2(buffersize*2);
+#endif
+#if defined(DEBUGGING)
+			OPENCL_V( clEnqueueReadBuffer( *commQueues, clInputBuffers[0], CL_TRUE, 0, buffSizeBytes, &output2[ 0 ], 0,
+				NULL, NULL ),
+				_T("Reading the result buffer failed") );
+
+			if (fftPlan->placeness == CLFFT_OUTOFPLACE)
+			{
+				OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes, &output2[ 0 ], 0,
+					NULL, NULL ),
+					_T("Reading the result buffer failed") );
+			}
+#endif
+			if (fftPlan->transflag)
+			{//first time set up transpose kernel for 2D
+				//First row
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+					waitEvents, &rowOutEvents, clInputBuffers, clOutputBuffers, NULL ),
+					_T("clfftEnqueueTransform for row failed"));
+
+				cl_mem *mybuffers;
+
+				if (fftPlan->placeness==CLFFT_INPLACE)
+					mybuffers = clInputBuffers;
+				else
+					mybuffers = clOutputBuffers;
+
+#if defined(DEBUGGING)
+				OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[ 0 ], 0,
+					NULL, NULL ),
+					_T("Reading the result buffer failed") );
+#endif
+
+				cl_event transXOutEvents = NULL;
+				cl_event colOutEvents = NULL;
+				bool xyflag = (fftPlan->length[0] == fftPlan->length[1]) ? false : true;
+
+				if (xyflag)
+				{
+					//First transpose
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						&transXOutEvents, mybuffers, &localIntBuffer, NULL ),
+						_T("clfftEnqueueTransform for first transpose failed"));
+					clReleaseEvent(rowOutEvents);
+
+#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+
+					if (fftPlan->transposed == CLFFT_NOTRANSPOSE)
+					{
+						//Second Row transform
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+							&colOutEvents, &localIntBuffer, NULL, NULL ),
+							_T("clfftEnqueueTransform for second row failed"));
+						clReleaseEvent(transXOutEvents);
+
+#if defined(DEBUGGING)
+						OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+							NULL, NULL ),
+							_T("Reading the result buffer failed") );
+#endif
+
+						//Second transpose
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+							outEvents, &localIntBuffer, mybuffers, NULL ),
+							_T("clfftEnqueueTransform for second transpose failed"));
+						clReleaseEvent(colOutEvents);
+
+#if defined(DEBUGGING)
+						OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+							NULL, NULL ),
+							_T("Reading the result buffer failed") );
+#endif
+					}
+					else
+					{
+						//Second Row transform
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+							outEvents, &localIntBuffer, mybuffers, NULL ),
+							_T("clfftEnqueueTransform for second row failed"));
+						clReleaseEvent(transXOutEvents);
+					}
+				}
+				else
+				{
+					// First Transpose
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						&transXOutEvents, mybuffers, NULL, NULL ),
+						_T("clfftEnqueueTransform for first transpose failed"));
+					clReleaseEvent(rowOutEvents);
+
+					if (fftPlan->transposed == CLFFT_NOTRANSPOSE)
+					{
+						//Second Row transform
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+							&colOutEvents, mybuffers, NULL, NULL ),
+							_T("clfftEnqueueTransform for Second Row failed"));
+						clReleaseEvent(transXOutEvents);
+
+						//Second transpose
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+							outEvents, mybuffers, NULL, NULL ),
+							_T("clfftEnqueueTransform for second transpose failed"));
+						clReleaseEvent(colOutEvents);
+					}
+					else
+					{
+						//Second Row transform
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+							outEvents, mybuffers, NULL, NULL ),
+							_T("clfftEnqueueTransform for second row failed"));
+						clReleaseEvent(transXOutEvents);
+					}
+
+				}
+
+				if( fftRepo.pStatTimer )
+				{
+					fftRepo.pStatTimer->AddSample( plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >( ) );
+				}
+
+				return CLFFT_SUCCESS;
+			}
+
+			if ( (fftPlan->large2D || fftPlan->length.size()>2) &&
+				(fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
+			{
+				if (fftPlan->placeness==CLFFT_INPLACE)
+				{
+					//deal with row first
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, NULL, localIntBuffer ),
+						_T("clfftEnqueueTransform for row failed"));
+
+					//deal with column
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						outEvents, clInputBuffers, NULL, localIntBuffer ),
+						_T("clfftEnqueueTransform for column failed"));
+				}
+				else
+				{
+					//deal with row first
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, clOutputBuffers, localIntBuffer ),
+						_T("clfftEnqueueTransform for row failed"));
+
+					//deal with column
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						outEvents, clOutputBuffers, NULL, localIntBuffer ),
+						_T("clfftEnqueueTransform for column failed"));
+
+				}
+			}
+			else
+			{
+				if(fftPlan->inputLayout == CLFFT_REAL)
+				{
+					if (fftPlan->placeness==CLFFT_INPLACE)
+					{
+						// deal with row
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_FORWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+							waitEvents, &rowOutEvents, clInputBuffers, NULL, localIntBuffer ),
+							_T("clfftEnqueueTransform for row failed"));
+
+						// deal with column
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+							outEvents, clInputBuffers, NULL, localIntBuffer ),
+							_T("clfftEnqueueTransform for column failed"));
+					}
+					else
+					{
+						// deal with row
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_FORWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+							waitEvents, &rowOutEvents, clInputBuffers, clOutputBuffers, localIntBuffer ),
+							_T("clfftEnqueueTransform for row failed"));
+
+						// deal with column
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+							outEvents, clOutputBuffers, NULL, localIntBuffer ),
+							_T("clfftEnqueueTransform for column failed"));
+					}
+				}
+				else if(fftPlan->outputLayout == CLFFT_REAL)
+				{
+					cl_mem *out_local, *int_local, *out_y;
+
+					if(fftPlan->length.size() > 2)
+					{
+						out_local = clOutputBuffers;
+						int_local = NULL;
+						out_y = clInputBuffers;
+					}
+					else
+					{
+						out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+						int_local = fftPlan->tmpBufSizeC2R ? &(fftPlan->intBufferC2R) : &localIntBuffer;
+						out_y = int_local;
+					}
+
+
+					// deal with column
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
+						_T("clfftEnqueueTransform for row failed"));
+
+					// deal with row
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						outEvents, out_y, out_local, localIntBuffer ),
+						_T("clfftEnqueueTransform for column failed"));
+
+				}
+				else
+				{
+					//deal with row first
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, &localIntBuffer, localIntBuffer ),
+						_T("clfftEnqueueTransform for row failed"));
+
+
+					if (fftPlan->placeness==CLFFT_INPLACE)
+					{
+						//deal with column
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+							outEvents, &localIntBuffer, clInputBuffers, localIntBuffer ),
+							_T("clfftEnqueueTransform for column failed"));
+					}
+					else
+					{
+						//deal with column
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+							outEvents, &localIntBuffer, clOutputBuffers, localIntBuffer ),
+							_T("clfftEnqueueTransform for column failed"));
+
+		#if defined(DEBUGGING)
+						OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes, &output2[ 0 ], 1,
+							outEvents, NULL ),
+							_T("Reading the result buffer failed") );
+		#endif
+					}
+				}
+			}
+
+			clReleaseEvent(rowOutEvents);
+
+			if( fftRepo.pStatTimer )
+			{
+				fftRepo.pStatTimer->AddSample( plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >( ) );
+			}
+
+			return	CLFFT_SUCCESS;
+		}
+		case CLFFT_3D:
+		{
+			cl_event rowOutEvents = NULL;
+
+#if defined(DEBUGGING)
+			size_t buffersize = fftPlan->length[0] * fftPlan->length[1] *fftPlan->length[2] *fftPlan->batchsize;
+			size_t buffSizeBytes=sizeof( std::complex< float > )*buffersize;
+			std::vector< std::complex< float > > output3( buffersize );
+#endif
+			if(fftPlan->inputLayout == CLFFT_REAL)
+			{
+				cl_mem *tmp_local, *out_local;
+
+				tmp_local = (fftPlan->placeness==CLFFT_INPLACE) ? NULL : clOutputBuffers;
+				out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+
+				//deal with 2D row first
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_FORWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+					waitEvents, &rowOutEvents, clInputBuffers, tmp_local, localIntBuffer ),
+					_T("clfftEnqueueTransform for 3D-XY row failed"));
+
+				//deal with 1D Z column
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+					outEvents, out_local, NULL, localIntBuffer ),
+					_T("clfftEnqueueTransform for 3D-Z column failed"));
+
+			}
+			else if(fftPlan->outputLayout == CLFFT_REAL)
+			{
+				cl_mem *out_local;
+				out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+
+				cl_mem *int_local;
+				int_local = fftPlan->tmpBufSizeC2R ? &(fftPlan->intBufferC2R) : &localIntBuffer;
+
+				//deal with 1D Z column first
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+					waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
+					_T("clfftEnqueueTransform for 3D-Z column failed"));
+
+				//deal with 2D row
+				OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+					outEvents, int_local, out_local, localIntBuffer ),
+					_T("clfftEnqueueTransform for 3D-XY row failed"));
+			}
+			else
+			{
+				if (fftPlan->placeness==CLFFT_INPLACE)
+				{
+					//deal with 2D row first
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, NULL, localIntBuffer ),
+						_T("clfftEnqueueTransform for 3D-XY row failed"));
+
+					//deal with 1D Z column
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						outEvents, clInputBuffers, NULL, localIntBuffer ),
+						_T("clfftEnqueueTransform for 3D-Z column failed"));
+				}
+				else
+				{
+	#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes, &output3[ 0 ], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+	#endif
+
+					//deal with 2D row first
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, clOutputBuffers, localIntBuffer ),
+						_T("clfftEnqueueTransform for 3D-XY row failed"));
+
+	#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes, &output3[ 0 ], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+	#endif
+
+					//deal with 1D Z column
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						outEvents, clOutputBuffers, NULL, localIntBuffer ),
+						_T("clfftEnqueueTransform for 3D-Z column failed"));
+	#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, clOutputBuffers[0], CL_TRUE, 0, buffSizeBytes, &output3[ 0 ], 1,
+						outEvents, NULL ),
+						_T("Reading the result buffer failed") );
+	#endif
+				}
+			}
+
+			clReleaseEvent(rowOutEvents);
+
+			if( fftRepo.pStatTimer )
+			{
+				fftRepo.pStatTimer->AddSample( plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >( ) );
+			}
+
+			return	CLFFT_SUCCESS;
+		}
+	}
+
+	// 1d with normal length will fall into the below category
+	// add: 2d transpose kernel will fall into here too.
+	vector< cl_mem >	inputBuff;
+	vector< cl_mem >	outputBuff;
+	inputBuff.reserve( 2 );
+	outputBuff.reserve( 2 );
+
+	//	Decode the relevant properties from the plan paramter to figure out how many input/output buffers we have
+	switch( fftPlan->inputLayout )
+	{
+		case CLFFT_COMPLEX_INTERLEAVED:
+		{
+			switch( fftPlan->outputLayout )
+			{
+				case CLFFT_COMPLEX_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_COMPLEX_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						//	Invalid to be an inplace transform, and go from 1 to 2 buffers
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_REAL:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				default:
+				{
+					//	Don't recognize output layout
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+			}
+
+			break;
+		}
+		case CLFFT_COMPLEX_PLANAR:
+		{
+			switch( fftPlan->outputLayout )
+			{
+				case CLFFT_COMPLEX_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_COMPLEX_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_REAL:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				default:
+				{
+					//	Don't recognize output layout
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+			}
+
+			break;
+		}
+		case CLFFT_HERMITIAN_INTERLEAVED:
+		{
+			switch( fftPlan->outputLayout )
+			{
+				case CLFFT_COMPLEX_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_COMPLEX_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_INTERLEAVED:
+				{
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+				case CLFFT_HERMITIAN_PLANAR:
+				{
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+				case CLFFT_REAL:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				default:
+				{
+					//	Don't recognize output layout
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+			}
+
+			break;
+		}
+		case CLFFT_HERMITIAN_PLANAR:
+		{
+			switch( fftPlan->outputLayout )
+			{
+				case CLFFT_COMPLEX_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_COMPLEX_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_INTERLEAVED:
+				{
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+				case CLFFT_HERMITIAN_PLANAR:
+				{
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+				case CLFFT_REAL:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						inputBuff.push_back( clInputBuffers[ 1 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				default:
+				{
+					//	Don't recognize output layout
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+			}
+
+			break;
+		}
+		case CLFFT_REAL:
+		{
+			switch( fftPlan->outputLayout )
+			{
+				case CLFFT_COMPLEX_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_COMPLEX_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_INTERLEAVED:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+					}
+
+					break;
+				}
+				case CLFFT_HERMITIAN_PLANAR:
+				{
+					if( fftPlan->placeness == CLFFT_INPLACE )
+					{
+						return CLFFT_INVALID_ARG_VALUE;
+					}
+					else
+					{
+						inputBuff.push_back( clInputBuffers[ 0 ] );
+
+						outputBuff.push_back( clOutputBuffers[ 0 ] );
+						outputBuff.push_back( clOutputBuffers[ 1 ] );
+					}
+
+					break;
+				}
+				default:
+				{
+					//	Don't recognize output layout
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+			}
+
+			break;
+		}
+		default:
+		{
+			//	Don't recognize output layout
+			return CLFFT_INVALID_ARG_VALUE;
+		}
+	}
+
+	//	TODO:  In the case of length == 1, FFT is a trivial NOP, but we still need to apply the forward and backwards tranforms
+	//	TODO:  Are map lookups expensive to call here?  We can cache a pointer to the cl_program/cl_kernel in the plan
+
+	FFTKernelGenKeyParams fftParams;
+	//	Translate the user plan into the structure that we use to map plans to clPrograms
+	OPENCL_V( fftPlan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") );
+
+	cl_program	prog;
+	cl_kernel	kern;
+	OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog ), _T( "fftRepo.getclProgram failed" ) );
+	OPENCL_V( fftRepo.getclKernel( prog, dir, kern ), _T( "fftRepo.getclKernels failed" ) );
+
+
+
+	cl_uint uarg = 0;
+	if (!fftPlan->transflag && !(fftPlan->gen == Copy))
+	{
+		//	::clSetKernelArg() is not thread safe, according to the openCL spec for the same cl_kernel object
+		//	TODO:  Need to verify that two different plans (which would get through our lock above) with exactly the same
+		//	parameters would NOT share the same cl_kernel objects
+
+		/* constant buffer */
+		OPENCL_V( clSetKernelArg( kern, uarg++, sizeof( cl_mem ), (void*)&fftPlan->const_buffer ), _T( "clSetKernelArg failed" ) );
+	}
+
+	//	Input buffer(s)
+	//	Input may be 1 buffer  (CLFFT_COMPLEX_INTERLEAVED)
+	//	          or 2 buffers (CLFFT_COMPLEX_PLANAR)
+
+	for (size_t i = 0; i < inputBuff.size(); ++i)
+	{
+		OPENCL_V( clSetKernelArg( kern, uarg++, sizeof( cl_mem ), (void*)&inputBuff[i] ), _T( "clSetKernelArg failed" ) );
+	}
+	//	Output buffer(s)
+	//	Output may be 0 buffers (CLFFT_INPLACE)
+	//	           or 1 buffer  (CLFFT_COMPLEX_INTERLEAVED)
+	//	           or 2 buffers (CLFFT_COMPLEX_PLANAR)
+	for (size_t o = 0; o < outputBuff.size(); ++o)
+	{
+		OPENCL_V( clSetKernelArg( kern, uarg++, sizeof( cl_mem ), (void*)&outputBuff[o] ), _T( "clSetKernelArg failed" ) );
+	}
+
+	vector< size_t > gWorkSize;
+	vector< size_t > lWorkSize;
+	clfftStatus result = fftPlan->GetWorkSizes (gWorkSize, lWorkSize);
+
+	// TODO:  if GetWorkSizes returns CLFFT_INVALID_GLOBAL_WORK_SIZE, that means
+	// that this multidimensional input data array is too large to be transformed
+	// with a single call to clEnqueueNDRangeKernel.  For now, we will just return
+	// the error code back up the call stack.
+	// The *correct* course of action would be to split the work into mutliple
+	// calls to clEnqueueNDRangeKernel.
+	if (CLFFT_INVALID_GLOBAL_WORK_SIZE == result)
+	{
+		OPENCL_V( result, _T("Work size too large for clEnqueNDRangeKernel()"));
+	}
+	else
+	{
+		OPENCL_V( result, _T("FFTPlan::GetWorkSizes failed"));
+	}
+	BUG_CHECK (gWorkSize.size() == lWorkSize.size());
+
+	size_t *lwSize = NULL;
+	if(fftPlan->gen != Copy) lwSize = &lWorkSize[ 0 ];
+
+	status = clEnqueueNDRangeKernel( *commQueues, kern, static_cast< cl_uint >( gWorkSize.size( ) ),
+		NULL, &gWorkSize[ 0 ], lwSize, numWaitEvents, waitEvents, outEvents );
+	OPENCL_V( status, _T( "clEnqueueNDRangeKernel failed" ) );
+
+	if( fftRepo.pStatTimer )
+	{
+		fftRepo.pStatTimer->AddSample( plHandle, fftPlan, kern, numQueuesAndEvents, outEvents, gWorkSize );
+	}
+
+	return	CLFFT_SUCCESS;
+}
diff --git a/src/scripts/perf/CMakeLists.txt b/src/scripts/perf/CMakeLists.txt
new file mode 100644
index 00000000..95add5a7
--- /dev/null
+++ b/src/scripts/perf/CMakeLists.txt
@@ -0,0 +1,30 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+set(GRAPHING_SCRIPTS 	measurePerformance.py 
+						plotPerformance.py 
+						fftPerformanceTesting.py 
+						errorHandler.py 
+						performanceUtility.py
+						)
+
+if( BUILD64 )
+    set( BIN_DIR bin64 )
+else()
+    set( BIN_DIR bin32 )
+endif()
+
+install( FILES ${GRAPHING_SCRIPTS} DESTINATION ${BIN_DIR} )
\ No newline at end of file
diff --git a/src/scripts/perf/errorHandler.py b/src/scripts/perf/errorHandler.py
new file mode 100644
index 00000000..8471f9db
--- /dev/null
+++ b/src/scripts/perf/errorHandler.py
@@ -0,0 +1,68 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+#---------------------------------File Note------------------------------------
+#Date: 27 January 2012
+#This file defines all the error code and error handler mechanism
+#--------------------------------Global Variables------------------------------
+
+UINS_CAT = 100
+WIN_REG_SEARCH_FAIL = 101
+UNIMPL_APP = 200
+SYS_ERR = 300
+TIME_OUT = 400
+DIM_INCO_FILE_FMT = 500 #incorrect file format for dimension
+DIM_FILE_VAL_INCO = 501 #Value coming from dimension file is incorrect
+
+#__errorTable : Defines all the errors in the system. Add a new error code and
+#               error message here 
+"""Error table is defined as private to this module""" 
+errorTable = {
+              UINS_CAT: 'Application is not able to find the installed catalyst',
+              WIN_REG_SEARCH_FAIL: 'Windows Registry search for catalysts version is unsuccessful',
+              UNIMPL_APP: 'Unimplemented Application requirement',
+              SYS_ERR:    'System error occurred - Please check the source code',
+              TIME_OUT: 'Operation is timed out',
+              DIM_INCO_FILE_FMT: 'incorrect file format for dimension - Not able to find dimension',
+              DIM_FILE_VAL_INCO: 'Value coming from dimension file is incorrect'
+              }
+
+#--------------------------------Class Definitions-----------------------------
+class TimeoutException(Exception): 
+    pass
+
+"""Base class for handling all the application generated exception"""
+class ApplicationException(Exception):
+    
+    def __init__(self, fileName, errno, msg = ""):
+        self.fileName = fileName
+        self.errno = errno
+        self.mess = errorTable[errno] + msg
+        self.message = 'Application ERROR:'+repr(self.fileName+'-'+str(self.errno)+'-'+self.mess)
+        
+    def __str__(self):
+        return repr(self.fileName+'-'+str(self.errno)+'-'+self.mess)
+    
+
+#--------------------------------Global Function-------------------------------
+if __name__ == '__main__':
+    #print errorTable
+    try:
+        raise ApplicationException('errorHandler', SYS_ERR)
+
+    except:
+        print 'Generic exception'
+
diff --git a/src/scripts/perf/fftPerformanceTesting.py b/src/scripts/perf/fftPerformanceTesting.py
new file mode 100644
index 00000000..2d1df690
--- /dev/null
+++ b/src/scripts/perf/fftPerformanceTesting.py
@@ -0,0 +1,315 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+import itertools
+import re#gex
+import subprocess
+import os
+import sys
+from datetime import datetime
+
+# Common data and functions for the performance suite
+
+tableHeader = 'lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS'
+
+class TestCombination:
+    def __init__(self,
+                 lengthx, lengthy, lengthz, batchsize,
+                 device, inlayout, outlayout, placeness, precision,                 
+                 label):
+        self.x = lengthx
+        self.y = lengthy
+        self.z = lengthz
+        self.batchsize = batchsize
+        self.device = device
+        self.inlayout = inlayout
+        self.outlayout = outlayout
+        self.placeness = placeness
+        self.precision = precision
+        self.label = label
+
+    def __str__(self):
+        return self.x + 'x' + self.y + 'x' + self.z + ':' + self.batchsize + ', ' + self.device + ', ' + self.inlayout + '/' + self.outlayout + ', ' + self.placeness + ', ' + self.precision + ' -- ' + self.label
+
+class GraphPoint:
+    def __init__(self,
+                 lengthx, lengthy, lengthz, batchsize,
+				 precision, device, label,
+                 gflops):
+        self.x = lengthx
+        self.y = lengthy
+        self.z = lengthz
+        self.batchsize = batchsize
+        self.device = device
+        self.label = label
+        self.precision = precision
+        self.gflops = gflops
+        self.problemsize = str(int(self.x) * int(self.y) * int(self.z) * int(self.batchsize))
+
+    def __str__(self):
+        # ALL members must be represented here (x, y, z, batch, device, label, etc)
+        return self.x + 'x' + self.y + 'x' + self.z + ':' + self.batchsize + ', ' + self.precision + ' precision, ' + self.device + ', -- ' + self.label + '; ' + self.gflops
+
+class TableRow:
+    # parameters = class TestCombination instantiation
+    def __init__(self, parameters, gflops):
+        self.parameters = parameters
+        self.gflops = gflops
+
+    def __str__(self):
+        return self.parameters.__str__() + '; ' + self.gflops
+
+def transformDimension(x,y,z):
+    if int(z) != 1:
+        return 3
+    elif int(y) != 1:
+        return 2
+    elif int(x) != 1:
+        return 1
+
+def executable(library):
+    if type(library) != str:
+        print 'ERROR: expected library name to be a string'
+        quit()
+
+    if sys.platform != 'win32' and sys.platform != 'linux2':
+        print 'ERROR: unknown operating system'
+        quit()
+
+    if library == 'clFFT' or library == 'null':
+        if sys.platform == 'win32':
+            exe = 'Client.exe'
+        elif sys.platform == 'linux2':
+            exe = 'Client'
+    else:
+        print 'ERROR: unknown library -- cannot determine executable name'
+        quit()
+
+    if not os.path.isfile(exe):
+        error_message = 'ERROR: could not find client named ' + exe
+        print error_message
+        quit()
+
+    return exe
+
+def max_mem_available_in_bytes(exe, device):
+    arguments = [exe, '-i', device]
+    
+    deviceInfo = subprocess.check_output(arguments, stderr=subprocess.STDOUT).split(os.linesep)
+    deviceInfo = itertools.ifilter( lambda x: x.count('MAX_MEM_ALLOC_SIZE'), deviceInfo)
+    deviceInfo = list(itertools.islice(deviceInfo, None))
+    maxMemoryAvailable = re.search('\d+', deviceInfo[0])
+    return int(maxMemoryAvailable.group(0))
+
+def max_problem_size(exe, layout, precision, device):
+    if layout == 'ci' or layout == 'cp':
+        numbers_in_one_datapoint = 2
+    else:
+        print 'max_problem_size(): unknown layout'
+        quit()
+
+    if precision == 'single':
+        bytes_in_one_number = 4
+    elif precision == 'double':
+        bytes_in_one_number = 8
+    else:
+        print 'max_problem_size(): unknown precision'
+        quit()
+
+    max_problem_size = max_mem_available_in_bytes(exe, device) / (numbers_in_one_datapoint * bytes_in_one_number)
+    max_problem_size = max_problem_size / 16
+    return max_problem_size
+
+def maxBatchSize(lengthx, lengthy, lengthz, layout, precision, exe, device):
+    problemSize = int(lengthx) * int(lengthy) * int(lengthz)
+    maxBatchSize = max_problem_size(exe, layout, precision, device) / problemSize
+    if int(lengthx) == pow(2,16) or int(lengthx) == pow(2,17):
+        # special cases in the kernel. extra padding is added in, so we need to shrink the batch size to accommodate
+        return str(maxBatchSize/2)
+    else:
+        return str(maxBatchSize)
+
+def create_ini_file_if_requested(args):
+    if args.createIniFilename:
+        for x in vars(args):
+            if (type(getattr(args,x)) != file) and x.count('File') == 0:
+                args.createIniFilename.write('--' + x + os.linesep)
+                args.createIniFilename.write(str(getattr(args,x)) + os.linesep)
+        quit()
+    
+def load_ini_file_if_requested(args, parser):
+    if args.useIniFilename:
+        argument_list = args.useIniFilename.readlines()
+        argument_list = [x.strip() for x in argument_list]
+        args = parser.parse_args(argument_list)
+    return args
+
+def is_numeric_type(x):
+    return type(x) == int or type(x) == long or type(x) == float
+
+def split_up_comma_delimited_lists(args):
+    for x in vars(args):
+        attr = getattr(args, x)
+        if attr == None:
+            setattr(args, x, [None])
+        elif is_numeric_type(attr):
+            setattr(args, x, [attr])
+        elif type(attr) == str:
+            setattr(args, x, attr.split(','))
+    return args
+
+class Range:
+    def __init__(self, ranges, defaultStep='+1'):
+        # we might be passed in a single value or a list of strings
+        # if we receive a single value, we want to feed it right back
+        if type(ranges) != list:
+            self.expanded = ranges
+        elif ranges[0] == None:
+            self.expanded = [None]
+        else:
+            self.expanded = []
+            for thisRange in ranges:
+                thisRange = str(thisRange)
+                if re.search('^\+\d+$', thisRange):
+                    self.expanded = self.expanded + [thisRange]
+                elif thisRange == 'max':
+                    self.expanded = self.expanded + ['max']
+                else:
+                #elif thisRange != 'max':
+                    if thisRange.count(':'):
+                        self._stepAmount = thisRange.split(':')[1]
+                    else:
+                        self._stepAmount = defaultStep
+                    thisRange = thisRange.split(':')[0]
+
+                    if self._stepAmount.count('x'):
+                        self._stepper = '_mult'
+                    else:
+                        self._stepper = '_add'
+                    self._stepAmount = self._stepAmount.lstrip('+x')
+                    self._stepAmount = int(self._stepAmount)
+
+                    if thisRange.count('-'):
+                        self.begin = int(thisRange.split('-')[0])
+                        self.end = int(thisRange.split('-')[1])
+                    else:
+                        self.begin = int(thisRange.split('-')[0])
+                        self.end = int(thisRange.split('-')[0])
+                    self.current = self.begin
+
+                    if self.begin == 0 and self._stepper == '_mult':
+                        self.expanded = self.expanded + [0]
+                    else:
+                        while self.current <= self.end:
+                            self.expanded = self.expanded + [self.current]
+                            self._step()
+
+                # now we want to uniquify and sort the expanded range
+                self.expanded = list(set(self.expanded))
+                self.expanded.sort()
+
+    # advance current value to next
+    def _step(self):
+        getattr(self, self._stepper)()
+
+    def _mult(self):
+        self.current = self.current * self._stepAmount
+
+    def _add(self):
+        self.current = self.current + self._stepAmount
+
+def expand_range(a_range):
+    return Range(a_range).expanded
+
+def decode_parameter_problemsize(problemsize):
+    if not problemsize.count(None):
+        i = 0
+        while i < len(problemsize):
+            problemsize[i] = problemsize[i].split(':')
+            j = 0
+            while j < len(problemsize[i]):
+                problemsize[i][j] = problemsize[i][j].split('x')
+                j = j+1
+            i = i+1
+
+    return problemsize
+
+def gemm_table_header():
+    return 'm,n,k,lda,ldb,ldc,alpha,beta,order,transa,transb,function,device,library,label,GFLOPS'
+
+class GemmTestCombination:
+    def __init__(self,
+                 sizem, sizen, sizek, lda, ldb, ldc,
+                 alpha, beta, order, transa, transb,
+                 function, device, library, label):
+        self.sizem = str(sizem)
+        self.sizen = str(sizen)
+        self.sizek = str(sizek)
+        self.lda = str(lda)
+        self.ldb = str(ldb)
+        self.ldc = str(ldc)
+        self.alpha = str(alpha)
+        self.beta = str(beta)
+        self.order = order
+        self.transa = transa
+        self.transb = transb
+        self.function = function
+        self.device = device
+        self.library = library
+        self.label = label
+
+    def __str__(self):
+        return self.sizem + 'x' + self.sizen + 'x' + self.sizek + ':' + self.lda + 'x' + self.ldb + 'x' + self.ldc + ', ' + self.device + ', ' + self.function + ', ' + self.library + ', alpha(' + self.alpha + '), beta(' + self.beta + '), order(' + self.order + '), transa(' + self.transa + '), transb(' + self.transb + ') -- ' + self.label
+
+class GemmGraphPoint:
+    def __init__(self,
+                 sizem, sizen, sizek,
+                 lda, ldb, ldc,
+                 device, order, transa, transb,
+                 function, library, label,
+                 gflops):
+        self.sizem = sizem
+        self.sizen = sizen
+        self.sizek = sizek
+        self.lda = lda
+        self.ldb = ldb
+        self.ldc = ldc
+        self.device = device
+        self.order = order
+        self.transa = transa
+        self.transb = transb
+        self.function = function
+        self.library = library
+        self.label = label
+        self.gflops = gflops
+
+    def __str__(self):
+        # ALL members must be represented here (x, y, z, batch, device, label, etc)
+        return self.sizem + 'x' + self.sizen + 'x' + self.sizek + ':' + self.device + ', ' + self.function + ', ' + self.library + ', order(' + self.order + '), transa(' + self.transa + '), transb(' + self.transb + ') -- ' + self.label + '; ' + self.gflops + ' gflops'
+
+def open_file( filename ):
+    if type(filename) == list:
+        filename = filename[0]
+    if filename == None:
+        filename = 'results' + datetime.now().isoformat().replace(':','.') + '.txt'
+    else:
+        if os.path.isfile(filename):
+            oldname = filename
+            filename = filename + datetime.now().isoformat().replace(':','.')
+            message = 'A file with the name ' + oldname + ' already exists. Changing filename to ' + filename
+            print message
+    
+    return open(filename, 'w')
diff --git a/src/scripts/perf/measurePerformance.py b/src/scripts/perf/measurePerformance.py
new file mode 100644
index 00000000..c0cbc6a0
--- /dev/null
+++ b/src/scripts/perf/measurePerformance.py
@@ -0,0 +1,705 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+import sys
+import argparse
+import subprocess
+import itertools
+import re#gex
+import os
+from threading import Timer, Thread
+import thread, time
+from platform import system
+
+from datetime import datetime
+
+import errorHandler
+from fftPerformanceTesting import *
+from performanceUtility import timeout, log, generate235Radices
+
+IAM = 'FFT'
+TIMOUT_VAL = 900  #In seconds
+   
+devicevalues = ['gpu', 'cpu']
+layoutvalues = ['cp', 'ci']
+placevalues = ['in', 'out']
+precisionvalues = ['single', 'double']
+libraryvalues = ['clFFT']
+pow10 = '1-9,10-90:10,100-900:100,1000-9000:1000,10000-90000:10000,100000-900000:100000,1000000-9000000:1000000'
+
+parser = argparse.ArgumentParser(description='Measure performance of the clFFT library')
+parser.add_argument('--device',
+    dest='device', default='gpu',
+    help='device(s) to run on; may be a comma-delimited list. choices are ' + str(devicevalues) + '. (default gpu)')
+parser.add_argument('-b', '--batchsize',
+    dest='batchSize', default='1',
+    help='number of FFTs to perform with one invocation of the client. the special value \'max\' may be used to adjust the batch size on a per-transform basis to the maximum problem size possible on the device. may be a range or a comma-delimited list. if a range is entered, you may follow it with \':X\', where X is the stepping of the range (if omitted, it defaults to a stepping of 1). e.g., 1-15 or 12,18 or 7,10-30:10,1050-1054. the special value \'pow10\' expands to \'{}\'. Note that \'max\' and \'pow10\' may not be used in a list; they must be used by themselves; max may only be used with --library clFFT. (default 1)'.format(pow10))
+parser.add_argument('-a', '--adaptivemax',
+    dest='constProbSize', default='-1',
+    help='Max problem size that you want to maintain across the invocations of client with different lengths. This is adaptive and adjusts itself automtically.'.format(pow10))
+parser.add_argument('-x', '--lengthx',
+    dest='lengthx', default='1',
+    help='length(s) of x to test; must be factors of 1, 2, 3, or 5 with clFFT; may be a range or a comma-delimited list. e.g., 16-128 or 1200 or 16,2048-32768 (default 1)')
+parser.add_argument('-y', '--lengthy',
+    dest='lengthy', default='1',
+    help='length(s) of y to test; must be factors of 1, 2, 3, or 5 with clFFT; may be a range or a comma-delimited list. e.g., 16-128 or 1200 or 16,32768 (default 1)')
+parser.add_argument('-z', '--lengthz',
+    dest='lengthz', default='1',
+    help='length(s) of z to test; must be factors of 1, 2, 3, or 5 with clFFT; may be a range or a comma-delimited list. e.g., 16-128 or 1200 or 16,32768 (default 1)')
+parser.add_argument('--problemsize',
+    dest='problemsize', default=None,
+    help='additional problems of a set size. may be used in addition to lengthx/y/z. each indicated problem size will be added to the list of FFTs to perform. should be entered in AxBxC:D format. A, B, and C indicate the sizes of the X, Y, and Z dimensions (respectively). D is the batch size. All values except the length of X are optional. may enter multiple in a comma-delimited list. e.g., 2x2x2:32768 or 256x256:100,512x512:256')
+parser.add_argument('-i', '--inputlayout',
+    dest='inputlayout', default='ci',
+    help='may enter multiple in a comma-delimited list. choices are ' + str(layoutvalues) + '. ci = complex interleaved, cp = complex planar (default ci)')
+parser.add_argument('-o', '--outputlayout',
+    dest='outputlayout', default='ci',
+    help='may enter multiple in a comma-delimited list. choices are ' + str(layoutvalues) + '. ci = complex interleaved, cp = complex planar (default ci)')
+parser.add_argument('-p', '--placeness',
+    dest='placeness', default='in',
+    help='may enter multiple in a comma-delimited list. choices are ' + str(placevalues) + '. in = in place, out = out of place (default in)')
+parser.add_argument('-r', '--precision',
+    dest='precision', default='single',
+    help='may enter multiple in a comma-delimited list. choices are ' + str(precisionvalues) + '. (default single)')
+parser.add_argument('--library',
+    dest='library', default='clFFT', choices=libraryvalues,
+    help='indicates the library to use for testing on this run')
+parser.add_argument('--label',
+    dest='label', default=None,
+    help='a label to be associated with all transforms performed in this run. if LABEL includes any spaces, it must be in \"double quotes\". note that the label is not saved to an .ini file. e.g., --label cayman may indicate that a test was performed on a cayman card or --label \"Windows 32\" may indicate that the test was performed on Windows 32')
+parser.add_argument('--createini',
+    dest='createIniFilename', default=None,
+    help='create an .ini file with the given name that saves the other parameters given at the command line, then quit. e.g., \'measureperformance.py -x 2048 --createini my_favorite_setup.ini\' will create an .ini file that will save the configuration for a 2048-datapoint 1D FFT.')
+parser.add_argument('--ini',
+    dest='iniFilename', default=None,
+    help='use the parameters in the named .ini file instead of the command line parameters.')
+parser.add_argument('--tablefile',
+    dest='tableOutputFilename', default=None,
+    help='save the results to a plaintext table with the file name indicated. this can be used with plotPerformance.py to generate graphs of the data (default: table prints to screen)')
+
+args = parser.parse_args()
+
+label = str(args.label)
+
+subprocess.call('mkdir perfLog', shell = True)
+logfile = os.path.join('perfLog', (label+'-'+'fftMeasurePerfLog.txt'))
+
+def printLog(txt):
+    print txt
+    log(logfile, txt)
+
+printLog("=========================MEASURE PERFORMANCE START===========================")
+printLog("Process id of Measure Performance:"+str(os.getpid()))
+
+currCommandProcess = None
+
+
+printLog('Executing measure performance for label: '+str(label))
+
+
+#This function is defunct now
+@timeout(1, "fileName") # timeout is 5 minutes, 5*60 = 300 secs
+def checkTimeOutPut2(args):
+    global currCommandProcess
+    #ret = subprocess.check_output(args, stderr=subprocess.STDOUT)
+    #return ret
+    currCommandProcess = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    printLog("Curr Command Process id = "+str(currCommandProcess.pid))
+    ret = currCommandProcess.communicate()    
+    if(ret[0] == None or ret[0] == ''):
+        errCode = currCommandProcess.poll()
+        raise subprocess.CalledProcessError(errCode, args, output=ret[1])
+    return ret[0]
+
+
+#Spawns a separate thread to execute the library command and wait for that thread to complete
+#This wait is of 900 seconds (15 minutes). If still the thread is alive then we kill the thread
+def checkTimeOutPut(args):
+    t = None
+    global currCommandProcess
+    global stde
+    global stdo
+    stde = None
+    stdo = None
+    def executeCommand():
+        global currCommandProcess
+        global stdo
+        global stde
+        try:
+            stdo, stde = currCommandProcess.communicate()
+            printLog('stdout:\n'+str(stdo))
+            printLog('stderr:\n'+str(stde))
+        except:
+            printLog("ERROR: UNKNOWN Exception - +checkWinTimeOutPut()::executeCommand()")
+
+    currCommandProcess = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    thread = Thread(target=executeCommand)
+    thread.start()
+    thread.join(TIMOUT_VAL) #wait for the thread to complete 
+    if thread.is_alive():
+        printLog('ERROR: Killing the process - terminating thread because it is taking too much of time to execute')
+        currCommandProcess.kill()
+        printLog('ERROR: Timed out exception')
+        raise errorHandler.ApplicationException(__file__, errorHandler.TIME_OUT)
+    if stdo == "" or stdo==None:
+        errCode = currCommandProcess.poll()
+        printLog('ERROR: @@@@@Raising Called processor exception')
+        raise subprocess.CalledProcessError(errCode, args, output=stde)
+    return stdo
+
+
+
+
+
+# don't try to create and use an .ini file at the same time (it will open a portal through which demons will emerge)
+if args.iniFilename and args.createIniFilename:
+    printLog('ERROR: --ini and --createini are mutually exclusive. Please choose only one.')
+    quit()
+
+#read in .ini parameters if --ini is used
+if args.iniFilename != None:
+    if not os.path.isfile(args.iniFilename):
+        printLog("No file with the name \'{}\' exists. Please indicate another filename.".format(args.iniFilename))
+        quit()
+    
+    ini = open(args.iniFilename, 'r')
+    iniContents = ini.read()
+    iniContents = iniContents.split(';')
+    for i in range(0,len(iniContents)):
+        line = iniContents.pop()
+        line = line.partition(' ')
+        parameter = line[0]
+        value = line[2]
+        value = value.replace('\'','').replace('[','').replace(']','').replace(' ','')
+        
+        if parameter == 'batchSize':
+            args.batchSize = value
+        elif parameter == 'constProbSize':
+            args.constProbSize = value
+        elif parameter == 'lengthx':
+            args.lengthx = value
+        elif parameter == 'lengthy':
+            args.lengthy = value
+        elif parameter == 'lengthz':
+            args.lengthz = value
+        elif parameter == 'problemsize':
+            args.problemsize = value
+        elif parameter == 'device':
+            args.device = value
+        elif parameter == 'inputlayout':
+            args.inputlayout = value
+        elif parameter == 'outputlayout':
+            args.outputlayout = value
+        elif parameter == 'placeness':
+            args.placeness = value
+        elif parameter == 'precision':
+            args.precision = value
+        else:
+            printLog('{} corrupted. Please re-create a .ini file with the --createini flag.'.format(args.iniFilename))
+            quit()
+
+#create ini file if requested
+if args.createIniFilename != None:
+    printLog('Creating Ini files')
+    if os.path.isfile(args.createIniFilename):
+        printLog('A file with the name \'{}\' already exists. Please delete the file or choose another name.'.format(args.createIniFilename))
+        quit()
+    printLog('Creating Ini file:'+args.createIniFilename+'\n')
+    ini = open(args.createIniFilename, 'w')
+    ini.write('batchSize {} ;'.format(args.batchSize))
+    ini.write('constProbSize {} ;'.format(args.constProbSize))
+    ini.write('lengthx {} ;'.format(args.lengthx))
+    ini.write('lengthy {} ;'.format(args.lengthy))
+    ini.write('lengthz {} ;'.format(args.lengthz))
+    ini.write('problemsize {} ;'.format(args.problemsize))
+    ini.write('device {} ;'.format(args.device))
+    ini.write('inputlayout {} ;'.format(args.inputlayout))
+    ini.write('outputlayout {} ;'.format(args.outputlayout))
+    ini.write('placeness {} ;'.format(args.placeness))
+    ini.write('precision {} ;'.format(args.precision))
+    printLog('Created Ini file:'+args.createIniFilename+'\n')
+    printLog("=========================MEASURE PERFORMANCE START===========================\n")
+    quit()
+
+
+#turn pow10 into its range list
+if args.batchSize.count('pow10'):
+    args.batchSize = pow10
+
+#split up comma-delimited lists
+args.batchSize = args.batchSize.split(',')
+args.constProbSize = int(args.constProbSize.split(',')[0])
+args.device = args.device.split(',')
+args.lengthx = args.lengthx.split(',')
+args.lengthy = args.lengthy.split(',')
+args.lengthz = args.lengthz.split(',')
+if args.problemsize:
+    args.problemsize = args.problemsize.split(',')
+args.inputlayout = args.inputlayout.split(',')
+args.outputlayout = args.outputlayout.split(',')
+args.placeness = args.placeness.split(',')
+args.precision = args.precision.split(',')
+
+
+
+printLog('Executing for label: '+str(args.label))
+#check parameters for sanity
+
+# batchSize of 'max' must not be in a list (does not get on well with others)
+#if args.batchSize.count('max') and len(args.batchSize) > 1:
+if ( args.batchSize.count('max') or args.batchSize.count('adapt') )and len(args.batchSize) > 1:
+    printLog('ERROR: --batchsize max must not be in a comma delimited list')
+    quit()
+
+
+# in case of an in-place transform, input and output layouts must be the same (otherwise: *boom*)
+for n in args.placeness:
+    if n == 'in' or n == 'inplace':
+        if len(args.inputlayout) > 1 or len(args.outputlayout) > 1 or args.inputlayout[0] != args.outputlayout[0]:
+            printLog('ERROR: if transformation is in-place, input and output layouts must match')
+            quit()
+
+# check for valid values in precision
+for n in args.precision:
+    if n != 'single' and n != 'double':
+        printLog('ERROR: invalid value for precision')
+        quit()
+
+def isPrime(n):
+    import math
+    n = abs(n)
+    i = 2
+    while i <= math.sqrt(n):
+        if n%i == 0:
+            return False
+        i += 1
+    return True
+
+def findFactors(number):
+    iter_space = range(1, number+1)
+    prime_factor_list = []
+    for curr_iter in iter_space:
+        if isPrime(curr_iter) == True:
+            #print 'curr_iter_prime: ', curr_iter
+            if number%curr_iter == 0:
+                prime_factor_list.append(curr_iter)
+    return prime_factor_list
+
+
+#Type : Function
+#Input: num, a number which we need to factorize
+#Return Type: list
+#Details: This function returns only the prime factors on an input number
+#         e.g: input: 20, returns: [2,2,5]
+#              input: 32, returns: [2,2,2,2,2]
+def factor(num):
+    if num == 1:
+        return [1]
+    i = 2
+    limit = num**0.5
+    while i <= limit:
+        if num % i == 0:
+            ret = factor(num/i)
+            ret.append(i)
+            return ret
+        i += 1
+    return [num]
+
+def validateFactors(flist):
+    ref_list = [1,2,3,5]
+    if flist==ref_list:
+        return True
+    if len(flist) > len(ref_list):
+        return False
+    for felement in flist:
+        if ref_list.count(felement) != 1:
+            return False
+    return True
+
+#Type : Function
+#Input: num, a number which we need to validate for 1,2,3 or 5 factors
+#Return Type: boolean
+#Details: This function validates an input number for its prime factors
+#         If factors has number other than 1,2,3 or 5 then return false else return true
+#         e.g: input: 20, returns: True
+#              input: 28, returns: False
+def validate_number_for_1235(num):
+    if num == 0:
+        return True
+    set1235 = set([1,2,3,5])
+    setPrimeFactors = set(factor(num))
+    setPrimeFactors = setPrimeFactors | set1235 #performed union of two sets
+    #if still the sets are same then we are done!!!
+    #else we got few factors other than 1,2,3 or 5 and we should invalidate
+    #the input number
+    if setPrimeFactors ==  set1235:
+        return True
+    return False
+
+
+def getValidNumbersInRange(rlist):
+    valid_number_list = []
+    for relement in rlist:
+        prime_factors = findFactors(relement)
+        if validateFactors(prime_factors) == True:
+            valid_number_list.append(relement)
+    return valid_number_list
+
+def get_next_num_with_1235_factors(start):
+    start+=1
+    while not validateFactors(findFactors(start)):
+        start+=1
+    return start
+
+
+def check_number_for_1235_factors(number):
+    #printLog('number:'+ number)
+    factors = findFactors(number)
+    #printLog('factors:'+ factors)
+    if not validateFactors(factors):
+        printLog("ERROR: --{0} must have only 1,2,3,5 as factors")
+        return False
+    return True
+
+
+
+def check_for_1235_factors(values, option):
+    #print 'values: ', values
+    for n in values:
+        for m in n.replace('-',',').split(','):
+            if not validate_number_for_1235(int(m)):
+                print 'ERROR: --{0} must specify number with only 1,2,3,5 as factors'.format(option)
+                quit()
+            #print 'Valid number for :',option,':', m
+       
+
+if args.library == 'clFFT':
+    check_for_1235_factors(args.lengthx, 'lengthx')
+    check_for_1235_factors(args.lengthy, 'lengthy')
+    check_for_1235_factors(args.lengthz, 'lengthz')
+
+
+
+if not os.path.isfile(executable(args.library)):
+    printLog("ERROR: Could not find client named {0}".format(executable(args.library)))
+    quit()
+
+
+def get235RadicesNumberInRange(minimum, maximum):
+    if minimum == 0 and maximum == 0:
+        return [0]
+    numbers = generate235Radices(maximum)
+    minIndex = numbers.index(minimum)
+    maxIndex = numbers.index(maximum)
+    return numbers[minIndex:maxIndex+1]
+   
+#expand ranges
+class Range:
+    def __init__(self, ranges, defaultStep='+1'):
+        self.expanded = []
+        for thisRange in ranges:
+            if thisRange != 'max' and thisRange != 'adapt' :
+                if thisRange.count(':'):
+                    self._stepAmount = thisRange.split(':')[1]
+                else:
+                    self._stepAmount = defaultStep
+                thisRange = thisRange.split(':')[0]
+
+                if self._stepAmount.count('x'):
+                    self._stepper = '_mult'
+                    self._stepAmount = self._stepAmount.lstrip('+x')
+                    self._stepAmount = int(self._stepAmount)
+                elif self._stepAmount.count('l'):
+                    self._stepper = '_next_num_with_1235_factor'
+                    self._stepAmount = 0
+                else:
+                    self._stepper = '_add'
+                    self._stepAmount = self._stepAmount.lstrip('+x')
+                    self._stepAmount = int(self._stepAmount)
+
+                if thisRange.count('-'):
+                    self.begin = int(thisRange.split('-')[0])
+                    self.end = int(thisRange.split('-')[1])
+                else:
+                    self.begin = int(thisRange.split('-')[0])
+                    self.end = int(thisRange.split('-')[0])
+                self.current = self.begin
+
+           # _thisRangeExpanded = []
+            if thisRange == 'max':
+                self.expanded = self.expanded + ['max']
+            elif thisRange == 'adapt':
+                self.expanded = self.expanded + ['adapt']
+            elif self.begin == 0 and self._stepper == '_mult':
+                self.expanded = self.expanded + [0]
+            else:
+                if self._stepper == '_next_num_with_1235_factor':
+                    self.expanded = self.expanded + get235RadicesNumberInRange(self.current, self.end)
+                else:
+                    while self.current <= self.end:
+                        self.expanded = self.expanded + [self.current]
+                        self._step()
+
+            # now we want to uniquify and sort the expanded range
+            self.expanded = list(set(self.expanded))
+            self.expanded.sort()
+
+    # advance current value to next
+    def _step(self):
+        getattr(self, self._stepper)()
+
+    def _mult(self):
+        self.current = self.current * self._stepAmount
+
+    def _add(self):
+        self.current = self.current + self._stepAmount
+
+    def _next_num_with_1235_factor(self):
+        self.current = get_next_num_with_1235_factors(self.current)
+
+
+args.batchSize = Range(args.batchSize).expanded
+args.lengthx = Range(args.lengthx, 'l').expanded
+args.lengthy = Range(args.lengthy, 'l').expanded
+args.lengthz = Range(args.lengthz, 'l').expanded
+
+
+
+#expand problemsizes ('XxYxZ:batch')
+#print "args.problemsize--1-->", args.problemsize
+if args.problemsize and args.problemsize[0] != 'None':
+    i = 0
+    while i < len(args.problemsize):
+        args.problemsize[i] = args.problemsize[i].split(':')
+        args.problemsize[i][0] = args.problemsize[i][0].split('x')
+        i = i+1
+
+
+#create the problem size combinations for each run of the client
+problem_size_combinations = itertools.product(args.lengthx, args.lengthy, args.lengthz, args.batchSize)
+
+problem_size_combinations = list(itertools.islice(problem_size_combinations, None))
+
+#print "args.problemsize--2-->", args.problemsize
+
+#add manually entered problem sizes to the list of FFTs to crank out
+manual_test_combinations = []
+
+if args.problemsize and args.problemsize[0] != 'None':
+    for n in args.problemsize:
+        x = []
+        y = []
+        z = []
+        batch = []
+    
+        x.append(int(n[0][0]))
+    
+        if len(n[0]) >= 2:
+            y.append(int(n[0][1]))
+        else:
+            y.append(1)
+    
+        if len(n[0]) >= 3:
+            z.append(int(n[0][2]))
+        else:
+            z.append(1)
+    
+        if len(n) > 1:
+            batch.append(int(n[1]))
+        else:
+            batch.append(1)
+    
+        combos = itertools.product(x, y, z, batch)
+        combos = list(itertools.islice(combos, None))
+        for n in combos:
+            manual_test_combinations.append(n)
+        # manually entered problem sizes should not be plotted (for now). they may still be output in a table if requested
+
+
+problem_size_combinations = problem_size_combinations + manual_test_combinations
+
+#create final list of all transformations (with problem sizes and transform properties)
+test_combinations = itertools.product(problem_size_combinations, args.device, args.inputlayout, args.outputlayout, args.placeness, args.precision)
+test_combinations = list(itertools.islice(test_combinations, None))
+test_combinations = [TestCombination(params[0][0], params[0][1], params[0][2], params[0][3], params[1], params[2], params[3], params[4], params[5], args.label) for params in test_combinations]
+
+
+#turn each test combination into a command, run the command, and then stash the gflops
+result = [] # this is where we'll store the results for the table
+
+
+#open output file and write the header
+
+if args.tableOutputFilename == None:
+   args.tableOutputFilename = 'results' + datetime.now().isoformat().replace(':','.') + '.txt'
+else:
+   if os.path.isfile(args.tableOutputFilename):
+       oldname = args.tableOutputFilename
+       args.tableOutputFilename = args.tableOutputFilename + datetime.now().isoformat().replace(':','.')
+       message = 'A file with the name ' + oldname + ' already exists. Changing filename to ' + args.tableOutputFilename
+       printLog(message)
+
+
+printLog('table header---->'+ str(tableHeader))
+
+table = open(args.tableOutputFilename, 'w')
+table.write(tableHeader + '\n')
+table.flush()
+
+if args.constProbSize == -1:
+   args.constProbSize = maxBatchSize(1, 1, 1, args.inputlayout[0], args.precision[0], executable(args.library), '--' + args.device[0])
+args.constProbSize = int(args.constProbSize)
+
+
+printLog('Total combinations =  '+str(len(test_combinations)))
+
+vi = 0
+#test_combinations = test_combinations[825:830]
+for params in test_combinations:
+    vi = vi+1
+    printLog("")
+    printLog('preparing command: '+ str(vi))    
+    device = params.device
+    lengthx = str(params.x)
+    lengthy = str(params.y)
+    lengthz = str(params.z)
+    
+    if params.batchsize == 'max':
+        batchSize = maxBatchSize(lengthx, lengthy, lengthz, params.inlayout, params.precision, executable(args.library), '--' + device)
+    elif params.batchsize == 'adapt':
+        batchSize = str(args.constProbSize/(int(lengthx)*int(lengthy)*int(lengthz)))
+    else:
+        batchSize = str(params.batchsize)
+
+    if params.inlayout == 'complexinterleaved' or params.inlayout == 'ci':
+        inputlayout = '1'
+    elif params.inlayout == 'complexplanar' or params.inlayout == 'cp':
+        inputlayout = '2'
+    else:
+        printLog('ERROR: invalid value for input layout when assembling client command')
+
+    if params.outlayout == 'complexinterleaved' or params.outlayout == 'ci':
+        outputlayout = '1'
+    elif params.outlayout == 'complexplanar' or params.outlayout == 'cp':
+        outputlayout = '2'
+    else:
+        printLog('ERROR: invalid value for output layout when assembling client command')
+
+    if params.placeness == 'inplace' or params.placeness == 'in':
+        placeness = ''
+    elif params.placeness == 'outofplace' or params.placeness == 'out':
+        placeness = '--outPlace'
+    else:
+        printLog('ERROR: invalid value for placeness when assembling client command')
+
+    if params.precision == 'single':
+        precision = ''
+    elif params.precision == 'double':
+        precision = '--double'
+    else:
+        printLog('ERROR: invalid value for precision when assembling client command')
+
+
+    #set up arguments here
+    if args.library == 'clFFT':
+        arguments = [executable(args.library),
+                     '--' + device,
+                     '-x', lengthx,
+                     '-y', lengthy,
+                     '-z', lengthz,
+                     '--batchSize', batchSize,
+                     '--inLayout', inputlayout,
+                     '--outLayout', outputlayout,
+                     placeness,
+                     precision,
+                     '-p', '10']
+   
+    writeline = True
+    try:
+        printLog('Executing Command: '+str(arguments))
+        output = checkTimeOutPut(arguments)
+        output = output.split(os.linesep);
+        printLog('Execution Successfull---------------\n')
+
+    except errorHandler.ApplicationException as ae:
+        writeline = False
+        printLog('ERROR: Command is taking too much of time '+ae.message+'\n'+'Command: \n'+str(arguments))
+        continue
+    except subprocess.CalledProcessError as clientCrash:
+        print 'Command execution failure--->'
+        if clientCrash.output.count('CLFFT_INVALID_BUFFER_SIZE'):
+            writeline = False
+            printLog('Omitting line from table - problem is too large')
+        else:
+            writeline = False
+            printLog('ERROR: client crash. Please report the following error message (with \'CLFFT_*\' error code, if given, and the parameters used to invoke measurePerformance.py) \n'+clientCrash.output+'\n')
+            printLog('IN ORIGINAL WE CALL QUIT HERE - 1\n')
+            continue
+
+    for x in output:
+        if x.count('out of memory'):
+            writeline = False
+            printLog('ERROR: Omitting line from table - problem is too large')
+
+    if writeline:
+        try:
+            output = itertools.ifilter( lambda x: x.count('Gflops'), output)
+            output = list(itertools.islice(output, None))
+            thisResult = re.search('\d+\.*\d*e*-*\d*$', output[-1])
+            thisResult = float(thisResult.group(0))
+            thisResult = (params.x, params.y, params.z, batchSize, params.device, params.inlayout, params.outlayout, params.placeness, params.precision, params.label, thisResult)
+
+            outputRow = ''
+            for x in thisResult:
+                outputRow = outputRow + str(x) + ','
+            outputRow = outputRow.rstrip(',')
+            table.write(outputRow + '\n')
+            table.flush()
+        except:
+			printLog('ERROR: Exception occurs in GFLOP parsing')
+    else:
+        if(len(output) > 0):
+            if output[0].find('nan') or output[0].find('inf'):
+                printLog( 'WARNING: output from client was funky for this run. skipping table row')
+            else:
+                prinLog('ERROR: output from client makes no sense')
+                printLog(str(output[0]))
+                printLog('IN ORIGINAL WE CALL QUIT HERE - 2\n')
+        else:
+            prinLog('ERROR: output from client makes no sense')
+            #quit()
+printLog("=========================MEASURE PERFORMANCE ENDS===========================\n")
+#
+#"""
+#print a pretty table
+#"""
+#if args.tableOutputFilename == None:
+#   args.tableOutputFilename = 'results' + datetime.now().isoformat().replace(':','.') + '.txt'
+#else:
+#   if os.path.isfile(args.tableOutputFilename):
+#       oldname = args.tableOutputFilename
+#       args.tableOutputFilename = args.tableOutputFilename + datetime.now().isoformat().replace(':','.')
+#       message = 'A file with the name ' + oldname + ' already exists. Changing filename to ' + args.tableOutputFilename
+#       print message
+#
+#table = open(args.tableOutputFilename, 'w')
+#table.write(tableHeader + '\n')
+#for x in result:
+#   row = ''
+#   for y in x:
+#       row = row + str(y) + ','
+#   row = row[:-1] #chomp off the trailing comma
+#   table.write(row + '\n')
diff --git a/src/scripts/perf/performanceUtility.py b/src/scripts/perf/performanceUtility.py
new file mode 100644
index 00000000..e3c76269
--- /dev/null
+++ b/src/scripts/perf/performanceUtility.py
@@ -0,0 +1,97 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+#This file contains a number of utilities function which could be independent of
+#any specific domain concept
+
+import signal
+from subprocess import check_output
+import errorHandler
+from datetime import datetime
+
+def currentUser():
+    try:
+        return check_output("who", shell = True).split()[0];
+    except:
+        print 'Unhandled Exception at performanceUtility::currentUser()'
+        raise
+    
+#Details: Generate sorted numbers in radices of 2,3 and 5 upto a given upper limit number
+def generate235Radices(maxSize):
+    sizeList = list()
+    i = 0
+    j = 0
+    k = 0
+    SUM = int()
+    sumj = int()
+    sumk = int()
+    sumi = 1
+    while(True):
+        sumj = 1
+        j = 0
+        while(True):
+            sumk = 1
+            k = 0
+            while(True):
+                SUM = sumi*sumj*sumk
+                if ( SUM > maxSize ): break
+                sizeList.append(SUM)
+                k += 1
+                sumk *= 2
+            if (k == 0): break
+            j += 1
+            sumj *= 3
+        if ( j == 0 and k == 0): break
+        i += 1
+        sumi *= 5
+    sizeList.sort()
+    return sizeList
+
+
+def timeout(timeout_time, default):
+    def timeout_function(f):
+        def f2(args):
+            def timeout_handler(signum, frame):
+                raise errorHandler.TimeoutException()
+ 
+            old_handler = signal.signal(signal.SIGALRM, timeout_handler) 
+            signal.alarm(timeout_time) # triger alarm in timeout_time seconds
+            retval = ""
+            try: 
+                retval = f(args)
+            except errorHandler.TimeoutException:
+                raise errorHandler.ApplicationException(__file__, errorHandler.TIME_OUT)
+            except:
+                signal.alarm(0)
+                raise
+            finally:
+                #print 'executing finally'
+                signal.signal(signal.SIGALRM, old_handler) 
+            signal.alarm(0)
+            return retval
+        return f2
+    return timeout_function
+
+
+def logTxtOutput(fileName, mode, txt):
+    todayFile =  fileName+'-'+datetime.now().strftime('%Y-%b-%d')+'.txt'
+    with open(todayFile, mode) as f:
+        f.write('------\n'+txt+'\n')
+        
+def log(filename, txt):
+    with open(filename, 'a') as f:
+        f.write(datetime.now().ctime()+'# '+txt+'\n')
+        
\ No newline at end of file
diff --git a/src/scripts/perf/plotPerformance.py b/src/scripts/perf/plotPerformance.py
new file mode 100644
index 00000000..3976512a
--- /dev/null
+++ b/src/scripts/perf/plotPerformance.py
@@ -0,0 +1,306 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+# to use this script, you will need to download and install the 32-BIT VERSION of:
+# - Python 2.7 x86 (32-bit) - http://www.python.org/download/releases/2.7.1
+#
+# you will also need the 32-BIT VERSIONS of the following packages as not all the packages are available in 64bit at the time of this writing
+# The ActiveState python distribution is recommended for windows
+# (make sure to get the python 2.7-compatible packages):
+# - NumPy 1.5.1 (32-bit, 64-bit unofficial, supports Python 2.4 - 2.7 and 3.1 - 3.2.) - http://sourceforge.net/projects/numpy/files/NumPy/
+# - matplotlib 1.0.1 (32-bit & 64-bit, supports Python 2.4 - 2.7) - http://sourceforge.net/projects/matplotlib/files/matplotlib/
+#
+# For ActiveState Python, all that one should need to type is 'pypm install matplotlib'
+
+import datetime
+import sys
+import argparse
+import subprocess
+import itertools
+import os
+import matplotlib
+import pylab
+from matplotlib.backends.backend_pdf import PdfPages
+from fftPerformanceTesting import *
+
+def plotGraph(dataForAllPlots, title, plottype, plotkwargs, xaxislabel, yaxislabel):
+  """
+  display a pretty graph
+  """
+  dh.write('Making graph\n')
+  colors = ['k','y','m','c','r','b','g']
+  #plottype = 'plot'
+  for thisPlot in dataForAllPlots:
+    getattr(pylab, plottype)(thisPlot.xdata, thisPlot.ydata,
+                             '{}.-'.format(colors.pop()), 
+                             label=thisPlot.label, **plotkwargs)
+  if len(dataForAllPlots) > 1:
+    pylab.legend(loc='best')
+  
+  pylab.title(title)
+  pylab.xlabel(xaxislabel)
+  pylab.ylabel(yaxislabel)
+  pylab.grid(True)
+  
+  if args.outputFilename == None:
+    # if no pdf output is requested, spit the graph to the screen . . .
+    pylab.show()
+  else:
+    pylab.savefig(args.outputFilename,dpi=(1024/8))
+    # . . . otherwise, gimme gimme pdf
+    #pdf = PdfPages(args.outputFilename)
+    #pdf.savefig()
+    #pdf.close()
+
+######## plotFromDataFile() Function to plot from data file begins ########
+def plotFromDataFile():
+  data = []
+  """
+  read in table(s) from file(s)
+  """
+  for thisFile in args.datafile:
+    if not os.path.isfile(thisFile):
+      print 'No file with the name \'{}\' exists. Please indicate another filename.'.format(thisFile)
+      quit()
+  
+    results = open(thisFile, 'r')
+    resultsContents = results.read()
+    resultsContents = resultsContents.rstrip().split('\n')
+  
+    firstRow = resultsContents.pop(0)
+    if firstRow != tableHeader:
+      print 'ERROR: input file \'{}\' does not match expected format.'.format(thisFile)
+      quit()
+  
+    for row in resultsContents:
+      row = row.split(',')
+      row = TableRow(TestCombination(row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8],row[9]), row[10])
+      data.append(GraphPoint(row.parameters.x, row.parameters.y, row.parameters.z, row.parameters.batchsize, row.parameters.precision, row.parameters.device, row.parameters.label, row.gflops))
+  
+  """
+  data sanity check
+  """
+  # if multiple plotvalues have > 1 value among the data rows, the user must specify which to plot
+  multiplePlotValues = []
+  for option in plotvalues:
+    values = []
+    for point in data:
+      values.append(getattr(point, option)) 
+    multiplePlotValues.append(len(set(values)) > 1)
+  if multiplePlotValues.count(True) > 1 and args.plot == None:
+    print 'ERROR: more than one parameter of {} has multiple values. Please specify which parameter to plot with --plot'.format(plotvalues)
+    quit()
+  
+  # if args.graphxaxis is not 'problemsize', the user should know that the results might be strange
+  if args.graphxaxis != 'problemsize':
+    xaxisvalueSet = []
+    for option in xaxisvalues:
+      if option != 'problemsize':
+        values = []
+        for point in data:
+          values.append(getattr(point, option)) 
+        xaxisvalueSet.append(len(set(values)) > 1)
+    if xaxisvalueSet.count(True) > 1:
+      print 'WARNING: more than one parameter of {} is varied. unexpected results may occur. please double check your graphs for accuracy.'.format(xaxisvalues)
+  
+  # multiple rows should not have the same input values
+  pointInputs = []
+  for point in data:
+    pointInputs.append(point.__str__().split(';')[0])
+  if len(set(pointInputs)) != len(data):
+    print 'ERROR: imported table has duplicate rows with identical input parameters'
+    quit()
+  
+  """
+  figure out if we have multiple plots on this graph (and what they should be)
+  """
+  if args.plot != None:
+    multiplePlots = args.plot
+  elif multiplePlotValues.count(True) == 1:
+    multiplePlots = plotvalues[multiplePlotValues.index(True)]
+  else:
+    # default to device if none of the options to plot have multiple values
+    multiplePlots = 'device'
+  
+  """
+  assemble data for the graphs
+  """
+  data.sort(key=lambda row: int(getattr(row, args.graphxaxis)))
+  
+  # choose scale for x axis
+  if args.xaxisscale == None:
+    # user didn't specify. autodetect
+    if int(getattr(data[len(data)-1], args.graphxaxis)) > 2000: # big numbers on x-axis
+      args.xaxisscale = 'log2'
+    elif int(getattr(data[len(data)-1], args.graphxaxis)) > 10000: # bigger numbers on x-axis
+      args.xaxisscale = 'log10'
+    else: # small numbers on x-axis
+      args.xaxisscale = 'linear'
+  
+  if args.xaxisscale == 'linear':
+    plotkwargs = {}
+    plottype = 'plot'
+  elif args.xaxisscale == 'log2':
+    plottype = 'semilogx'
+    plotkwargs = {'basex':2}
+  elif args.xaxisscale == 'log10':
+    plottype = 'semilogx'
+    plotkwargs = {'basex':10}
+  else:
+    print 'ERROR: invalid value for x-axis scale'
+    quit()
+  
+  plots = set(getattr(row, multiplePlots) for row in data)
+  
+  class DataForOnePlot:
+    def __init__(self, inlabel, inxdata, inydata):
+      self.label = inlabel
+      self.xdata = inxdata
+      self.ydata = inydata
+  
+  dataForAllPlots = []
+  for plot in plots:
+    dataForThisPlot = itertools.ifilter( lambda x: getattr(x, multiplePlots) == plot, data)
+    dataForThisPlot = list(itertools.islice(dataForThisPlot, None))
+    if args.graphxaxis == 'problemsize':
+      xdata = [int(row.x) * int(row.y) * int(row.z) * int(row.batchsize) for row in dataForThisPlot]
+    else:
+      xdata = [getattr(row, args.graphxaxis) for row in dataForThisPlot]
+    ydata = [getattr(row, args.graphyaxis) for row in dataForThisPlot]
+    dataForAllPlots.append(DataForOnePlot(plot,xdata,ydata))
+  
+  """
+  assemble labels for the graph or use the user-specified ones
+  """
+  if args.graphtitle:
+    # use the user selection
+    title = args.graphtitle
+  else:
+    # autogen a lovely title
+    title = 'Performance vs. ' + args.graphxaxis.capitalize()
+  
+  if args.xaxislabel:
+    # use the user selection
+    xaxislabel = args.xaxislabel
+  else:
+    # autogen a lovely x-axis label
+    if args.graphxaxis == 'cachesize':
+      units = '(bytes)'
+    else:
+      units = '(datapoints)'
+  
+    xaxislabel = args.graphxaxis + ' ' + units
+  
+  if args.yaxislabel:
+    # use the user selection
+    yaxislabel = args.yaxislabel
+  else:
+    # autogen a lovely y-axis label
+    if args.graphyaxis == 'gflops':
+      units = 'GFLOPS'
+    yaxislabel = 'Performance (' + units + ')'
+  
+  """
+  display a pretty graph
+  """
+  colors = ['k','y','m','c','r','b','g']
+  
+  for thisPlot in dataForAllPlots:
+    getattr(pylab, plottype)(thisPlot.xdata, thisPlot.ydata, '{}.-'.format(colors.pop()), label=thisPlot.label, **plotkwargs)
+  
+  if len(dataForAllPlots) > 1:
+    pylab.legend(loc='best')
+  
+  pylab.title(title)
+  pylab.xlabel(xaxislabel)
+  pylab.ylabel(yaxislabel)
+  pylab.grid(True)
+  
+  if args.outputFilename == None:
+    # if no pdf output is requested, spit the graph to the screen . . .
+    pylab.show()
+  else:
+    # . . . otherwise, gimme gimme pdf
+    #pdf = PdfPages(args.outputFilename)
+    #pdf.savefig()
+    #pdf.close()
+    pylab.savefig(args.outputFilename,dpi=(1024/8))
+######### plotFromDataFile() Function to plot from data file ends #########
+
+
+
+######## "main" program begins #####
+"""
+define and parse parameters
+"""
+
+xaxisvalues = ['x','y','z','batchsize','problemsize']
+yaxisvalues = ['gflops']
+plotvalues = ['device', 'precision', 'label']
+
+
+
+parser = argparse.ArgumentParser(description='Plot performance of the clfft\
+    library. clfft.plotPerformance.py reads in data tables from clfft.\
+    measurePerformance.py and plots their values')
+fileOrDb = parser.add_mutually_exclusive_group(required=True)
+fileOrDb.add_argument('-d', '--datafile',
+  dest='datafile', action='append', default=None, required=False,
+  help='indicate a file to use as input. must be in the format output by\
+  clfft.measurePerformance.py. may be used multiple times to indicate\
+  multiple input files. e.g., -d cypressOutput.txt -d caymanOutput.txt')
+parser.add_argument('-x', '--x_axis',
+  dest='graphxaxis', default=None, choices=xaxisvalues, required=True,
+  help='indicate which value will be represented on the x axis. problemsize\
+      is defined as x*y*z*batchsize')
+parser.add_argument('-y', '--y_axis',
+  dest='graphyaxis', default='gflops', choices=yaxisvalues,
+  help='indicate which value will be represented on the y axis')
+parser.add_argument('--plot',
+  dest='plot', default=None, choices=plotvalues,
+  help='indicate which of {} should be used to differentiate multiple plots.\
+      this will be chosen automatically if not specified'.format(plotvalues))
+parser.add_argument('--title',
+  dest='graphtitle', default=None,
+  help='the desired title for the graph generated by this execution. if\
+      GRAPHTITLE contains any spaces, it must be entered in \"double quotes\".\
+      if this option is not specified, the title will be autogenerated')
+parser.add_argument('--x_axis_label',
+  dest='xaxislabel', default=None,
+  help='the desired label for the graph\'s x-axis. if XAXISLABEL contains\
+      any spaces, it must be entered in \"double quotes\". if this option\
+      is not specified, the x-axis label will be autogenerated')
+parser.add_argument('--x_axis_scale',
+  dest='xaxisscale', default=None, choices=['linear','log2','log10'],
+  help='the desired scale for the graph\'s x-axis. if nothing is specified,\
+      it will be selected automatically')
+parser.add_argument('--y_axis_label',
+  dest='yaxislabel', default=None,
+  help='the desired label for the graph\'s y-axis. if YAXISLABEL contains any\
+      spaces, it must be entered in \"double quotes\". if this option is not\
+      specified, the y-axis label will be autogenerated')
+parser.add_argument('--outputfile',
+  dest='outputFilename', default=None,
+  help='name of the file to output graphs. Supported formats: emf, eps, pdf, png, ps, raw, rgba, svg, svgz.')
+
+args = parser.parse_args()
+
+if args.datafile != None:
+  plotFromDataFile()
+else:
+  print "Atleast specify if you want to use text files or database for plotting graphs. Use -h or --help option for more details"
+  quit()
+
diff --git a/src/statTimer/CMakeLists.txt b/src/statTimer/CMakeLists.txt
new file mode 100644
index 00000000..0b5d75f8
--- /dev/null
+++ b/src/statTimer/CMakeLists.txt
@@ -0,0 +1,90 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+
+# List the names of the files to compile for the external client . . .
+set( StatTimer.Source 	statisticalTimer.CPU.cpp
+								statisticalTimer.GPU.cpp
+								statisticalTimer.extern.cpp
+								stdafx.cpp )
+
+# Windows only uses dllmain
+if( MSVC )
+	set( StatTimer.Source ${StatTimer.Source} dllmain.cpp )
+endif( )
+
+set( StatTimer.Headers	statisticalTimer.h
+								statisticalTimer.extern.h
+								statisticalTimer.CPU.h
+								statisticalTimer.GPU.h
+								stdafx.h 
+								targetver.h 
+								../include/clFFT.h )
+								
+set( StatTimer.Files ${StatTimer.Source} ${StatTimer.Headers} )
+
+# For a rainy day, add pre-compiled header support
+#if( MSVC )
+#	if (USE_MSVC_PCH)
+	
+#		set_source_files_properties(LungAnalysisPCH.cxx
+#			PROPERTIES
+#			COMPILE_FLAGS "/YcLungAnalysisPCH.h"
+#			)
+#		foreach( src_file ${UPMC_LA_SRCS} )
+#			set_source_files_properties(
+#				${src_file}
+#				PROPERTIES
+#				COMPILE_FLAGS "/YuLungAnalysisPCH.h"
+#				)
+#		endforeach( src_file ${UPMC_LA_SRCS} )
+		
+#		list(APPEND UPMC_LA_SRCS LungAnalysisPCH.cxx)
+#		list(APPEND UPMC_LA_HDRS LungAnalysisPCH.h)
+
+#	endif(USE_MSVC_PCH)
+#endif (MSVC)
+
+add_definitions( "/DSTATTIMER_EXPORTS" )
+
+# Include standard OpenCL headers
+include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include )
+
+add_library( StatTimer SHARED ${StatTimer.Files} )
+set_target_properties( StatTimer PROPERTIES VERSION ${CLFFT_VERSION} )
+set_target_properties( StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
+target_link_libraries( StatTimer ${OPENCL_LIBRARIES} )
+
+if( UNIX )
+	# This library dependency is brought in by the high precision timer available in linux
+	target_link_libraries( StatTimer -lrt )
+endif( )
+
+if( BUILD64 )
+	# CPack configuration; include the executable into the package
+	install( TARGETS StatTimer
+			RUNTIME DESTINATION bin64
+			LIBRARY DESTINATION lib64
+			ARCHIVE DESTINATION lib64/import
+			)
+else()
+	# CPack configuration; include the executable into the package
+	install( TARGETS StatTimer
+			RUNTIME DESTINATION bin32
+			LIBRARY DESTINATION lib32
+			ARCHIVE DESTINATION lib32/import
+			)
+endif()
diff --git a/src/statTimer/ReadMe.txt b/src/statTimer/ReadMe.txt
new file mode 100644
index 00000000..19dd5395
--- /dev/null
+++ b/src/statTimer/ReadMe.txt
@@ -0,0 +1,56 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+========================================================================
+    DYNAMIC LINK LIBRARY : StatTimer Project Overview
+========================================================================
+
+AppWizard has created this StatTimer DLL for you.
+
+This file contains a summary of what you will find in each of the files that
+make up your StatTimer application.
+
+
+StatTimer.vcxproj
+    This is the main project file for VC++ projects generated using an Application Wizard.
+    It contains information about the version of Visual C++ that generated the file, and
+    information about the platforms, configurations, and project features selected with the
+    Application Wizard.
+
+StatTimer.vcxproj.filters
+    This is the filters file for VC++ projects generated using an Application Wizard. 
+    It contains information about the association between the files in your project 
+    and the filters. This association is used in the IDE to show grouping of files with
+    similar extensions under a specific node (for e.g. ".cpp" files are associated with the
+    "Source Files" filter).
+
+StatTimer.cpp
+    This is the main DLL source file.
+
+/////////////////////////////////////////////////////////////////////////////
+Other standard files:
+
+StdAfx.h, StdAfx.cpp
+    These files are used to build a precompiled header (PCH) file
+    named StatTimer.pch and a precompiled types file named StdAfx.obj.
+
+/////////////////////////////////////////////////////////////////////////////
+Other notes:
+
+AppWizard uses "TODO:" comments to indicate parts of the source code you
+should add to or customize.
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/src/statTimer/dllmain.cpp b/src/statTimer/dllmain.cpp
new file mode 100644
index 00000000..9afb3395
--- /dev/null
+++ b/src/statTimer/dllmain.cpp
@@ -0,0 +1,35 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+// dllmain.cpp : Defines the entry point for the DLL application.
+#include "stdafx.h"
+
+BOOL APIENTRY DllMain( HMODULE hModule,
+                       DWORD  ul_reason_for_call,
+                       LPVOID lpReserved
+					 )
+{
+	switch (ul_reason_for_call)
+	{
+	case DLL_PROCESS_ATTACH:
+	case DLL_THREAD_ATTACH:
+	case DLL_THREAD_DETACH:
+	case DLL_PROCESS_DETACH:
+		break;
+	}
+	return TRUE;
+}
+
diff --git a/src/statTimer/statisticalTimer.CPU.cpp b/src/statTimer/statisticalTimer.CPU.cpp
new file mode 100644
index 00000000..4e3c8c2e
--- /dev/null
+++ b/src/statTimer/statisticalTimer.CPU.cpp
@@ -0,0 +1,410 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// StatTimer.cpp : Defines the exported functions for the DLL application.
+//
+
+#include "stdafx.h"
+#include <cassert>
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <functional>
+#include <cmath>
+#include <limits>
+
+#include "statisticalTimer.CPU.h"
+#include "../library/private.h"
+
+#if defined( __GNUC__ )
+	#include <sys/time.h>
+#endif
+
+//	Format an unsigned number with comma thousands separator
+//
+template< typename T >		// T could be 32-bit or 64-bit
+std::basic_string<TCHAR> commatize (T number)
+{
+	static TCHAR scratch [8*sizeof(T)];
+
+	register TCHAR * ptr = scratch + countOf( scratch );
+	*(--ptr) = 0;
+
+	for (int digits = 3; ; )
+	{
+		*(--ptr) = '0' + int (number % 10);
+		number /= 10;
+		if (0 == number)
+			break;
+		if (--digits <= 0)
+		{
+			*(--ptr) = ',';
+			digits = 3;
+		}
+	}
+
+	return std::basic_string<TCHAR> (ptr);
+}
+
+//	Functor object to help with accumulating values in vectors
+template< typename T >
+struct Accumulator: public std::unary_function< T, void >
+{
+	T acc;
+
+	Accumulator( ): acc( 0 ) {}
+	void operator( )(T x) { acc += x; }
+};
+
+//	Unary predicate used for remove_if() algorithm
+//	Currently, RangeType is expected to be a floating point type, and ValType an integer type
+template< typename RangeType, typename ValType >
+struct PruneRange
+{
+	RangeType lower, upper;
+
+	PruneRange( RangeType mean, RangeType stdev ): lower( mean-stdev ), upper( mean+stdev ) {}
+
+	bool operator( )( ValType val )
+	{
+		//	These comparisons can be susceptible to signed/unsigned casting problems
+		//	This is why we cast ValType to RangeType, because RangeType should always be floating and signed
+		if( static_cast< RangeType >( val ) < lower )
+			return true;
+		else if( static_cast< RangeType >( val ) > upper )
+			return true;
+
+		return false;
+	}
+};
+
+CpuStatTimer&
+CpuStatTimer::getInstance( )
+{
+	static	CpuStatTimer	timer;
+	return	timer;
+}
+
+CpuStatTimer::CpuStatTimer( ): nEvents( 0 ), nSamples( 0 ), normalize( true )
+{
+#if defined( _WIN32 )
+	//	OS call to get ticks per second2
+	::QueryPerformanceFrequency( reinterpret_cast<LARGE_INTEGER*>( &clkFrequency ) );
+#else
+	res.tv_sec	= 0;
+	res.tv_nsec	= 0;
+	clkFrequency 	= 0;
+
+	//	clock_getres() return 0 for success
+	//	If the function fails (monotonic clock not supported), we default to a lower resolution timer
+//	if( ::clock_getres( CLOCK_MONOTONIC, &res ) )
+	{
+		clkFrequency = 1000000;
+	}
+//	else
+//	{
+//	    // Turn time into frequency
+//		clkFrequency = res.tv_nsec * 1000000000;
+//	}
+
+#endif
+}
+
+CpuStatTimer::~CpuStatTimer( )
+{}
+
+void
+CpuStatTimer::Clear( )
+{
+	labelID.clear( );
+	clkStart.clear( );
+	clkTicks.clear( );
+}
+
+void
+CpuStatTimer::Reset( )
+{
+	if( nEvents == 0 || nSamples == 0 )
+		throw	std::runtime_error( "StatisticalTimer::Reserve( ) was not called before Reset( )" );
+
+	clkStart.clear( );
+	clkTicks.clear( );
+
+	clkStart.resize( nEvents );
+	clkTicks.resize( nEvents );
+
+	for( cl_uint	i = 0; i < nEvents; ++i )
+	{
+		clkTicks.at( i ).reserve( nSamples );
+	}
+
+	return;
+}
+
+//	The caller can pre-allocate memory, to improve performance.
+//	nEvents is an approximate value for how many seperate events the caller will think
+//	they will need, and nSamples is a hint on how many samples we think we will take
+//	per event
+void
+CpuStatTimer::Reserve( size_t nEvents, size_t nSamples )
+{
+	this->nEvents	= std::max< size_t >( 1, nEvents );
+	this->nSamples	= std::max< size_t >( 1, nSamples );
+
+	Clear( );
+	labelID.reserve( nEvents );
+
+	clkStart.resize( nEvents );
+	clkTicks.resize( nEvents );
+
+	for( cl_uint i = 0; i < nEvents; ++i )
+	{
+		clkTicks.at( i ).reserve( nSamples );
+	}
+}
+
+void
+CpuStatTimer::setNormalize( bool norm )
+{
+	normalize = norm;
+}
+
+void
+CpuStatTimer::Start( size_t id )
+{
+#if defined( _WIN32 )
+	::QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &clkStart.at( id ) ) );
+#else
+	if( clkFrequency )
+	{
+		struct timeval s;
+		gettimeofday( &s, 0 );
+		clkStart.at( id ) = (cl_ulong)s.tv_sec * 1000000 + (cl_ulong)s.tv_usec;
+	}
+	else
+	{
+
+	}
+#endif
+}
+
+void
+CpuStatTimer::Stop( size_t id )
+{
+	cl_ulong n;
+
+#if defined( _WIN32 )
+	::QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &n ) );
+#else
+	struct timeval s;
+	gettimeofday( &s, 0 );
+	n = (cl_ulong)s.tv_sec * 1000000 + (cl_ulong)s.tv_usec;
+#endif
+
+	n		-= clkStart.at( id );
+	clkStart.at( id )	= 0;
+	AddSample( id, n );
+}
+
+void
+CpuStatTimer::AddSample( const size_t id, const cl_ulong n )
+{
+	clkTicks.at( id ).push_back( n );
+}
+
+//	This function's purpose is to provide a mapping from a 'friendly' human readable text string
+//	to an index into internal data structures.
+size_t
+CpuStatTimer::getUniqueID( const std::string& label, cl_uint groupID )
+{
+	//	I expect labelID will hardly ever grow beyond 30, so it's not of any use
+	//	to keep this sorted and do a binary search
+
+	labelPair	sItem	= std::make_pair( label, groupID );
+
+	stringVector::iterator	iter;
+	iter	= std::find( labelID.begin(), labelID.end(), sItem );
+
+	if( iter != labelID.end( ) )
+		return	std::distance( labelID.begin( ), iter );
+
+	labelID.push_back( sItem );
+
+	return	labelID.size( ) - 1;
+
+}
+
+cl_double
+CpuStatTimer::getMean( size_t id ) const
+{
+	if( clkTicks.empty( ) )
+		return	0;
+
+	size_t	N	= clkTicks.at( id ).size( );
+
+	Accumulator<cl_ulong> sum = std::for_each( clkTicks.at( id ).begin(), clkTicks.at( id ).end(), Accumulator<cl_ulong>() );
+
+	return	static_cast<cl_double>( sum.acc ) / N;
+}
+
+cl_double
+CpuStatTimer::getVariance( size_t id ) const
+{
+	if( clkTicks.empty( ) )
+		return	0;
+
+	cl_double	mean	= getMean( id );
+
+	size_t	N	= clkTicks.at( id ).size( );
+	cl_double	sum	= 0;
+
+	for( cl_uint i = 0; i < N; ++i )
+	{
+		cl_double	diff	= clkTicks.at( id ).at( i ) - mean;
+		diff	*= diff;
+		sum		+= diff;
+	}
+
+	return	 sum / N;
+}
+
+cl_double
+CpuStatTimer::getStdDev( size_t id ) const
+{
+	cl_double	variance	= getVariance( id );
+
+	return	sqrt( variance );
+}
+
+cl_double
+CpuStatTimer::getAverageTime( size_t id ) const
+{
+	if( normalize )
+		return getMean( id ) / clkFrequency;
+	else
+		return getMean( id );
+}
+
+cl_double
+CpuStatTimer::getMinimumTime( size_t id ) const
+{
+	clkVector::const_iterator iter	= std::min_element( clkTicks.at( id ).begin( ), clkTicks.at( id ).end( ) );
+
+	if( iter != clkTicks.at( id ).end( ) )
+	{
+		if( normalize )
+			return static_cast<cl_double>( *iter ) / clkFrequency;
+		else
+			return static_cast<cl_double>( *iter );
+	}
+	else
+		return	0;
+}
+
+std::vector< size_t >
+CpuStatTimer::pruneOutliers( size_t id , cl_double multiple )
+{
+	//if( clkTicks.empty( ) )
+	//	return	std::vector< size_t >( );
+
+	//cl_double	mean	= getMean( id );
+	//cl_double	stdDev	= getStdDev( id );
+
+	//clkVector&	clks = clkTicks.at( id );
+
+	////	Look on p. 379, "The C++ Standard Library"
+	////	std::remove_if does not actually erase, it only copies elements, it returns new 'logical' end
+	//clkVector::iterator	newEnd	= std::remove_if( clks.begin( ), clks.end( ), PruneRange< cl_double,cl_ulong >( mean, multiple*stdDev ) );
+
+	//clkVector::difference_type dist	= std::distance( newEnd, clks.end( ) );
+
+	//if( dist != 0 )
+	//	clks.erase( newEnd, clks.end( ) );
+
+	//assert( dist < std::numeric_limits< cl_uint >::max( ) );
+
+	return std::vector< size_t >( );
+}
+
+size_t
+CpuStatTimer::pruneOutliers( cl_double multiple )
+{
+	size_t	tCount	= 0;
+
+	//for( cl_uint l = 0; l < labelID.size( ); ++l )
+	//{
+	//	size_t lCount	= pruneOutliers( l , multiple );
+	//	std::clog << "\tStatisticalTimer:: Pruning " << lCount << " samples from " << labelID[l].first << std::endl;
+	//	tCount += lCount;
+	//}
+
+	return	tCount;
+}
+
+void
+CpuStatTimer::Print( )
+{
+	//double flops = fFunc( );
+
+	//for( cl_uint i = 0; i < labelID.size( ); ++i )
+	//{
+	//	double timeNs	= getAverageTime( i );
+	//	double gFlops	= flops / timeNs;
+
+	//	std::cout << labelID[ i ].first << std::endl;
+	//	tout << std::setw( 10 ) << "Time:" << std::setw( 10 )  << commatize( static_cast< cl_ulong >( timeNs ) )
+	//		<< " ns" << std::endl;
+	//	tout << std::setw( 10 ) << "Gflops:" << std::setw( 10 ) << gFlops << std::endl;
+	//}
+}
+
+//	Defining an output print operator
+std::ostream&
+operator<<( std::ostream& os, const CpuStatTimer& st )
+{
+	if( st.clkTicks.empty( ) )
+		return	os;
+
+	std::ios::fmtflags bckup	= os.flags( );
+
+	for( cl_uint l = 0; l < st.labelID.size( ); ++l )
+	{
+		cl_ulong min	= 0;
+		CpuStatTimer::clkVector::const_iterator iter	= std::min_element( st.clkTicks.at( l ).begin( ), st.clkTicks.at( l ).end( ) );
+
+		if( iter != st.clkTicks.at( l ).end( ) )
+			min		= *iter;
+
+		os << st.labelID[l].first << ", " << st.labelID[l].second << std::fixed << std::endl;
+		os << "Min:," << min << std::endl;
+		os << "Mean:," << st.getMean( l ) << std::endl;
+		os << "StdDev:," << st.getStdDev( l ) << std::endl;
+		os << "AvgTime:," << st.getAverageTime( l ) << std::endl;
+		os << "MinTime:," << st.getMinimumTime( l ) << std::endl;
+
+		//for( cl_uint	t = 0; t < st.clkTicks[l].size( ); ++t )
+		//{
+		//	os << st.clkTicks[l][t]<< ",";
+		//}
+		os << "\n" << std::endl;
+
+	}
+
+	os.flags( bckup );
+
+	return	os;
+}
diff --git a/src/statTimer/statisticalTimer.CPU.h b/src/statTimer/statisticalTimer.CPU.h
new file mode 100644
index 00000000..876250e1
--- /dev/null
+++ b/src/statTimer/statisticalTimer.CPU.h
@@ -0,0 +1,169 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#ifndef _STATISTICALTIMER_CPU_H_
+#define _STATISTICALTIMER_CPU_H_
+#include <iosfwd>
+#include <vector>
+#include <algorithm>
+#include "statisticalTimer.h"
+
+/**
+ * \file clfft.StatisticalTimer.CPU.h
+ * \brief A timer class that provides a cross platform timer for use
+ * in timing code progress with a high degree of accuracy.
+ *	This class is implemented entirely in the header, to facilitate inclusion into multiple
+ *	projects without needing to compile an object file for each project.
+ */
+
+class CpuStatTimer : public baseStatTimer
+{
+	//	Private typedefs
+	typedef std::vector< cl_ulong > clkVector;
+	typedef	std::pair< std::string, cl_uint > labelPair;
+	typedef	std::vector< labelPair > stringVector;
+
+	//	In order to calculate statistics <std. dev.>, we need to keep a history of our timings
+	stringVector	labelID;
+	clkVector	clkStart;
+	std::vector< clkVector >	clkTicks;
+
+	//	How many clockticks in a second
+	cl_ulong	clkFrequency;
+
+	//	For linux; the resolution of a high-precision timer
+#if defined( __GNUC__ )
+	timespec res;
+#endif
+
+	//	Saved sizes for our vectors, used in Reset() to reallocate vectors
+	clkVector::size_type	nEvents, nSamples;
+
+	//	This setting controls whether the Timer should convert samples into time by dividing by the
+	//	clock frequency
+	bool normalize;
+
+	/**
+	 * \fn StatisticalTimer()
+	 * \brief Constructor for StatisticalTimer that initializes the class
+	 *	This is private so that user code cannot create their own instantiation.  Instead, you
+	 *	must go through getInstance( ) to get a reference to the class.
+	 */
+	CpuStatTimer( );
+
+	/**
+	 * \fn ~StatisticalTimer()
+	 * \brief Destructor for StatisticalTimer that cleans up the class
+	 */
+	~CpuStatTimer( );
+
+	/**
+	 * \fn StatisticalTimer(const StatisticalTimer& )
+	 * \brief Copy constructors do not make sense for a singleton, disallow copies
+	 */
+	CpuStatTimer( const CpuStatTimer& );
+
+	/**
+	 * \fn operator=( const StatisticalTimer& )
+	 * \brief Assignment operator does not make sense for a singleton, disallow assignments
+	 */
+	CpuStatTimer& operator=( const CpuStatTimer& );
+
+	friend std::ostream& operator<<( std::ostream& os, const CpuStatTimer& s );
+
+	/**
+	 * \fn void AddSample( const size_t id, const cl_ulong n )
+	 * \brief Explicitely add a timing sample into the class
+	 */
+	void AddSample( const size_t id, const cl_ulong n );
+
+	//	Calculate the average/mean of data for a given event
+	cl_double	getMean( size_t id ) const;
+
+	//	Calculate the variance of data for a given event
+	//	Variance - average of the squared differences between data points and the mean
+	cl_double	getVariance( size_t id ) const;
+
+	//	Sqrt of variance, also in units of the original data
+	cl_double	getStdDev( size_t id ) const;
+
+	/**
+	 * \fn double getAverageTime(size_t id) const
+	 * \return Return the arithmetic mean of all the samples that have been saved
+	 */
+	cl_double getAverageTime( size_t id ) const;
+
+	/**
+	 * \fn double getMinimumTime(size_t id) const
+	 * \return Return the arithmetic min of all the samples that have been saved
+	 */
+	cl_double getMinimumTime( size_t id ) const;
+
+public:
+	/**
+	 * \fn getInstance()
+	 * \brief This returns a reference to the singleton timer.  Guarantees only 1 timer class is ever
+	 *	instantiated within a compilable executable.
+	 */
+	static CpuStatTimer& getInstance( );
+
+	/**
+	 * \fn void Start( size_t id )
+	 * \brief Start the timer
+	 * \sa Stop(), Reset()
+	 */
+	void Start( size_t id );
+
+	/**
+	 * \fn void Stop( size_t id )
+	 * \brief Stop the timer
+	 * \sa Start(), Reset()
+	 */
+	void Stop( size_t id );
+
+	/**
+	 * \fn void Reset(void)
+	 * \brief Reset the timer to 0
+	 * \sa Start(), Stop()
+	 */
+	void Clear( );
+
+	/**
+	 * \fn void Reset(void)
+	 * \brief Reset the timer to 0
+	 * \sa Start(), Stop()
+	 */
+	void Reset( );
+
+	void Reserve( size_t nEvents, size_t nSamples );
+
+	size_t getUniqueID( const std::string& label, cl_uint groupID );
+
+	//	Calculate the average/mean of data for a given event
+	void	setNormalize( bool norm );
+
+	void Print( );
+
+	//	Using the stdDev of the entire population (of an id), eliminate those samples that fall
+	//	outside some specified multiple of the stdDev.  This assumes that the population
+	//	form a gaussian curve.
+	size_t	pruneOutliers( cl_double multiple );
+	std::vector< size_t > pruneOutliers( size_t id , cl_double multiple );
+};
+
+#endif // _STATISTICALTIMER_CPU_H_
diff --git a/src/statTimer/statisticalTimer.GPU.cpp b/src/statTimer/statisticalTimer.GPU.cpp
new file mode 100644
index 00000000..269378ec
--- /dev/null
+++ b/src/statTimer/statisticalTimer.GPU.cpp
@@ -0,0 +1,628 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// StatTimer.cpp : Defines the exported functions for the DLL application.
+//
+
+#include "stdafx.h"
+#include <cassert>
+#include <iomanip>
+#include <functional>
+#include <cmath>
+#include "statisticalTimer.GPU.h"
+#include "../library/private.h"
+
+//	Format an unsigned number with comma thousands separator
+//
+template< typename T >		// T could be 32-bit or 64-bit
+std::basic_string<TCHAR> commatize (T number)
+{
+	static TCHAR scratch [8*sizeof(T)];
+
+	register TCHAR * ptr = scratch + countOf( scratch );
+	*(--ptr) = 0;
+
+	for (int digits = 3; ; )
+	{
+		*(--ptr) = '0' + int (number % 10);
+		number /= 10;
+		if (0 == number)
+			break;
+		if (--digits <= 0)
+		{
+			*(--ptr) = ',';
+			digits = 3;
+		}
+	}
+
+	return std::basic_string<TCHAR> (ptr);
+}
+
+//	Functor object to help with accumulating values in vectors
+template< typename T >
+struct Accumulator: public std::unary_function< T, void >
+{
+	T acc;
+
+	Accumulator( ): acc( 0 ) {}
+	void operator( )(T x) { acc += x; }
+};
+
+//	Functor object to help with accumulating values in vectors
+template< >
+struct Accumulator< StatData >
+{
+	StatData acc;
+
+	Accumulator( ) {}
+	void operator( )( const StatData& x )
+	{
+		acc.deltaNanoSec += x.deltaNanoSec;
+	}
+};
+
+
+//	Unary predicate used for remove_if() algorithm
+//	Currently, RangeType is expected to be a floating point type, and ValType an integer type
+template< typename T, typename R >
+struct PruneRange: public std::binary_function< T, R, bool >
+{
+	R lower, upper;
+
+	PruneRange( R mean, R stdev ): lower( mean-stdev ), upper( mean+stdev ) {}
+
+	bool operator( )( T val )
+	{
+		//	These comparisons can be susceptible to signed/unsigned casting problems
+		//	This is why we cast ValType to RangeType, because RangeType should always be floating and signed
+		if( static_cast< R >( val ) < lower )
+			return true;
+		else if( static_cast< R >( val ) > upper )
+			return true;
+
+		return false;
+	}
+};
+
+//	Template specialization for StatData datatypes
+template< >
+struct PruneRange< StatData, cl_double >
+{
+	StatData mean;
+	cl_double stdDev;
+
+	PruneRange( StatData m, cl_double s ): mean( m ), stdDev( s ) {}
+
+	bool operator( )( StatData val )
+	{
+		//	These comparisons can be susceptible to signed/unsigned casting problems
+		//	This is why we cast ValType to RangeType, because RangeType should always be floating and signed
+		if( val.doubleNanoSec < (mean.doubleNanoSec - stdDev) )
+			return true;
+		else if( val.doubleNanoSec > (mean.doubleNanoSec + stdDev) )
+			return true;
+
+		return false;
+	}
+};
+
+//	Sorting operator for struct StatData, such that it can be used in a map
+bool operator<( const StatData& lhs, const StatData& rhs)
+{
+	if( lhs.deltaNanoSec < rhs.deltaNanoSec )
+		return true;
+	else
+		return false;
+}
+
+GpuStatTimer&
+GpuStatTimer::getInstance( )
+{
+	static	GpuStatTimer	timer;
+	return	timer;
+}
+
+GpuStatTimer::GpuStatTimer( ): nEvents( 0 ), nSamples( 0 ), currID( 0 ), currSample( 0 ), currRecord( 0 )
+{}
+
+GpuStatTimer::~GpuStatTimer( )
+{}
+
+void
+GpuStatTimer::Clear( )
+{
+	labelID.clear( );
+	timerData.clear( );
+
+	nEvents = 0;
+	nSamples = 0;
+	currID = 0;
+	currSample = 0;
+	currRecord = 0;
+}
+
+//	The caller can pre-allocate memory, to improve performance.
+//	nEvents is an approximate value for how many seperate events the caller will think
+//	they will need, and nSamples is a hint on how many samples we think we will take
+//	per event
+void
+GpuStatTimer::Reserve( size_t nE, size_t nS )
+{
+	Clear( );
+	nEvents		= std::max< size_t >( 1, nE );
+	nSamples	= std::max< size_t >( 1, nS );
+
+	labelID.reserve( nEvents );
+	timerData.resize( nEvents );
+}
+
+void
+GpuStatTimer::Reset( )
+{
+	if( nEvents == 0 || nSamples == 0 )
+		throw	std::runtime_error( "StatisticalTimer::Reserve( ) was not called before Reset( )" );
+
+	Reserve( nEvents, nSamples );
+
+	return;
+}
+
+void
+GpuStatTimer::setNormalize( bool norm )
+{
+}
+
+void
+GpuStatTimer::Start( size_t id )
+{
+	currID		= id;
+	currSample	= 0;
+}
+
+void
+GpuStatTimer::Stop( size_t id )
+{
+	++currRecord;
+}
+
+void
+GpuStatTimer::AddSample( clfftPlanHandle plHandle, FFTPlan* plan, cl_kernel kern, cl_uint numEvents, cl_event* ev,
+	const std::vector< size_t >& gWorkSize )
+{
+	if( timerData.empty( ) )
+		return;
+
+	if( currRecord == 0 )
+	{
+		timerData.at( currID ).push_back( StatDataVec( ) );
+		timerData.at( currID ).back( ).reserve( nSamples );
+		timerData.at( currID ).back( ).push_back( StatData( plHandle, plan, kern, numEvents, ev, gWorkSize ) );
+	}
+	else
+	{
+		timerData.at( currID ).at( currSample )
+			.push_back( StatData( plHandle, plan, kern, numEvents, ev, gWorkSize ) );
+		++currSample;
+	}
+}
+
+//	This function's purpose is to provide a mapping from a 'friendly' human readable text string
+//	to an index into internal data structures.
+size_t
+GpuStatTimer::getUniqueID( const std::string& label, cl_uint groupID )
+{
+	//	I expect labelID will hardly ever grow beyond 30, so it's not of any use
+	//	to keep this sorted and do a binary search
+
+	idPair	sItem	= std::make_pair( label, groupID );
+
+	idVector::iterator	iter;
+	iter	= std::find( labelID.begin(), labelID.end(), sItem );
+
+	if( iter != labelID.end( ) )
+		return	std::distance( labelID.begin( ), iter );
+
+	labelID.push_back( sItem );
+
+	return	labelID.size( ) - 1;
+
+}
+
+void GpuStatTimer::queryOpenCL( size_t id )
+{
+	for( size_t s = 0; s < timerData.at( id ).size( ); ++s )
+	{
+		for( size_t n = 0; n < timerData.at( id ).at( s ).size( ); ++n )
+		{
+			StatData& sd = timerData[ id ][ s ][ n ];
+
+			cl_ulong profStart, profEnd = 0;
+			sd.deltaNanoSec = 0;
+
+			for( size_t i = 0; i < sd.outEvents.size( ); ++i )
+			{
+				if( ::clGetEventProfilingInfo( sd.outEvents[ i ], CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &profStart, NULL ) != CL_SUCCESS )
+				{
+					profStart = 0;
+				}
+
+				if( ::clGetEventProfilingInfo( sd.outEvents[ i ], CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &profEnd, NULL ) != CL_SUCCESS )
+				{
+					profEnd = 0;
+				}
+				sd.deltaNanoSec += (profEnd - profStart);
+			}
+
+			sd.doubleNanoSec = static_cast< cl_double >( sd.deltaNanoSec );
+		}
+	}
+}
+
+std::vector< StatData >
+GpuStatTimer::getMean( size_t id )
+{
+	//	Prep the data; query openCL for the timer information
+	queryOpenCL( id );
+
+	std::vector< StatData > meanVec;
+	for( size_t s = 0; s < timerData.at( id ).size( ); ++s )
+	{
+		Accumulator< StatData > sum = std::for_each( timerData.at( id ).at( s ).begin( ), timerData.at( id ).at( s ).end( ),
+			Accumulator< StatData >() );
+
+		StatData tmp = timerData[ id ][ s ].front( );
+		tmp.doubleNanoSec = static_cast< cl_double >( sum.acc.deltaNanoSec ) / timerData.at( id ).at( s ).size( );
+		meanVec.push_back( tmp );
+	}
+
+	return meanVec;
+}
+
+std::vector< StatData >
+GpuStatTimer::getVariance( size_t id )
+{
+	std::vector< StatData > variance = getMean( id );
+
+	for( cl_uint v = 0; v < variance.size( ); ++v )
+	{
+		double sum = 0;
+		for( cl_uint n = 0; n < timerData[ id ][ v ].size( ); ++n )
+		{
+			cl_double	diff	= static_cast< cl_double >( timerData[ id ][ v ][ n ].deltaNanoSec ) - variance[ v ].doubleNanoSec;
+			diff	*= diff;
+			sum		+= diff;
+		}
+
+		variance[ v ].doubleNanoSec = sum / timerData[ id ][ v ].size( );
+	}
+
+	return variance;
+}
+
+std::vector< StatData >
+GpuStatTimer::getStdDev( size_t id )
+{
+	std::vector< StatData > stddev = getVariance( id );
+
+	for( cl_uint v = 0; v < stddev.size( ); ++v )
+	{
+		stddev[ v ].doubleNanoSec = sqrt( stddev[ v ].doubleNanoSec );
+	}
+
+	return stddev;
+}
+
+std::vector< StatData >
+GpuStatTimer::getAverageTime( size_t id )
+{
+	return getMean( id );
+}
+
+std::vector< StatData >
+GpuStatTimer::getMinimumTime( size_t id )
+{
+	//	Prep the data; query openCL for the timer information
+	queryOpenCL( id );
+
+	std::vector< StatData > minTime;
+	for( size_t s = 0; s < timerData.at( id ).size( ); ++s )
+	{
+		StatDataVec::iterator iter
+			= std::min_element( timerData.at( id ).at( s ).begin( ), timerData.at( id ).at( s ).end( ) );
+
+		if( iter != timerData.at( id ).at( s ).end( ) )
+		{
+			iter->doubleNanoSec = static_cast< cl_double >( iter->deltaNanoSec ) / timerData.at( id ).at( s ).size( );
+			minTime.push_back( *iter );
+		}
+		else
+			return std::vector< StatData >( );
+	}
+
+	return minTime;
+}
+
+std::vector< size_t >
+GpuStatTimer::pruneOutliers( size_t id , cl_double multiple )
+{
+	std::vector< StatData > mean	= getMean( id );
+	std::vector< StatData > stdDev	= getStdDev( id );
+
+	std::vector< size_t > totalPrune;
+	for( size_t s = 0; s < timerData.at( id ).size( ); ++s )
+	{
+		//	Look on p. 379, "The C++ Standard Library"
+		//	std::remove_if does not actually erase, it only copies elements, it returns new 'logical' end
+		StatDataVec::iterator newEnd	= std::remove_if( timerData.at( id ).at( s ).begin( ), timerData.at( id ).at( s ).end( ),
+			PruneRange< StatData,cl_double >( mean[ s ], multiple * stdDev[ s ].doubleNanoSec ) );
+
+		StatDataVec::difference_type dist	= std::distance( newEnd, timerData.at( id ).at( s ).end( ) );
+
+		if( dist != 0 )
+			timerData.at( id ).at( s ).erase( newEnd, timerData.at( id ).at( s ).end( ) );
+
+		totalPrune.push_back( dist );
+	}
+
+	return totalPrune;
+}
+
+size_t
+GpuStatTimer::pruneOutliers( cl_double multiple )
+{
+	const int tableWidth = 60;
+	const int tableHalf = tableWidth / 2;
+	const int tableThird = tableWidth / 3;
+	const int tableFourth = tableWidth / 4;
+	const int tableFifth = tableWidth / 5;
+
+	//	Print label of timer, in a header
+	std::string header( "StdDev" );
+	size_t	sizeTitle = (header.size( ) + 6) /2;
+
+	std::cout << std::endl;
+	std::cout << std::setfill( '=' ) << std::setw( tableHalf ) << header << " ( " << multiple << " )"
+			<< std::setw( tableHalf - sizeTitle ) << "=" << std::endl;
+	tout << std::setfill( _T( ' ' ) );
+
+	size_t tCount = 0;
+	for( cl_uint l = 0; l < labelID.size( ); ++l )
+	{
+		std::vector< size_t > lCount = pruneOutliers( l , multiple );
+
+		for( cl_uint c = 0; c < lCount.size( ); ++c )
+		{
+			std::cout << labelID[l].first << "[ " << c << " ]" << ": Pruning " << lCount[ c ] << " samples out of " << currRecord << std::endl;
+			tCount += lCount[ c ];
+		}
+	}
+
+	return tCount;
+}
+
+void
+GpuStatTimer::Print( )
+{
+	const int tableWidth = 60;
+	const int tableHalf = tableWidth / 2;
+	const int tableThird = tableWidth / 3;
+	const int tableFourth = tableWidth / 4;
+	const int tableFifth = tableWidth / 5;
+
+	for( cl_uint id = 0; id < labelID.size( ); ++id )
+	{
+		size_t	halfString = labelID[ id ].first.size( ) / 2;
+
+		//	Print label of timer, in a header
+		std::cout << std::endl << std::setw( tableHalf + halfString ) << std::setfill( '=' ) << labelID[ id ].first
+				<< std::setw( tableHalf - halfString ) << "=" << std::endl;
+		tout << std::setfill( _T( ' ' ) );
+
+		std::vector< StatData > mean	= getMean( id );
+
+		//	Print each individual dimension
+		tstringstream catLengths;
+		for( cl_uint t = 0; t < mean.size( ); ++t )
+		{
+			cl_double time		= 0;
+			if( mean[ t ].kernel == NULL )
+			{
+				for( cl_uint m = 0; m < t; ++m )
+				{
+					if( mean[ m ].plHandle == mean[ t ].planX ||
+						mean[ m ].plHandle == mean[ t ].planY ||
+						mean[ m ].plHandle == mean[ t ].planZ ||
+						mean[ m ].plHandle == mean[ t ].planTX ||
+						mean[ m ].plHandle == mean[ t ].planTY ||
+						mean[ m ].plHandle == mean[ t ].planTZ )
+					{
+						time	+= mean[ m ].doubleNanoSec;
+					}
+				}
+				mean[ t ].doubleNanoSec = time;
+			}
+			else
+			{
+				time	= mean[ t ].doubleNanoSec;
+			}
+			double gFlops = mean[ t ].calcFlops( ) / time;
+
+			tout << std::setw( tableFourth ) << _T( "Handle:" )
+				<< std::setw( tableThird )  << mean[ t ].plHandle << std::endl;
+
+			if( mean[ t ].kernel != 0 )
+			{
+				tout << std::setw( tableFourth ) << _T( "Kernel:" )
+					<< std::setw( tableThird )  << reinterpret_cast<void*>( mean[ t ].kernel ) << std::endl;
+			}
+
+			if( ( mean[ t ].planX + mean[ t ].planY + mean[ t ].planZ ) > 0 ||
+				( mean[ t ].planTX + mean[ t ].planTY + mean[ t ].planTZ ) > 0 )
+			{
+				tout << std::setw( tableFourth ) << _T( "Child Handles:" );
+				catLengths.str( _T( "" ) );
+				catLengths << _T( "(" );
+				if( mean[ t ].planX != 0 )
+					catLengths << mean[ t ].planX;
+				if( mean[ t ].planTX != 0 )
+				{
+					catLengths << _T( "," );
+					catLengths << mean[ t ].planTX;
+				}
+				if( mean[ t ].planY != 0 )
+				{
+					catLengths << _T( "," );
+					catLengths << mean[ t ].planY;
+				}
+				if( mean[ t ].planTY != 0 )
+				{
+					catLengths << _T( "," );
+					catLengths << mean[ t ].planTY;
+				}
+				if( mean[ t ].planZ != 0 )
+				{
+					catLengths << _T( "," );
+					catLengths << mean[ t ].planZ;
+				}
+				if( mean[ t ].planTZ != 0 )
+				{
+					catLengths << _T( "," );
+					catLengths << mean[ t ].planTZ;
+				}
+				catLengths << _T( ")" );
+				tout << std::setw( tableThird ) << catLengths.str( ) << std::endl;
+			}
+
+			if( mean[ t ].outEvents.size( ) != 0 )
+			{
+				tout << std::setw( tableFourth ) << _T( "OutEvents:" ) << std::setw( tableThird );
+				for( size_t i = 0; i < mean[ t ].outEvents.size( ); ++i )
+				{
+					tout << mean[ t ].outEvents[ i ];
+					if( i < (mean[ t ].outEvents.size( )-1) )
+					{
+						tout << _T( "," ) << std::endl;
+						tout << std::setw( tableFourth+tableThird );
+					}
+				}
+				tout << std::endl;
+			}
+
+			tout << std::setw( tableFourth ) << _T( "Length:" );
+			catLengths.str( _T( "" ) );
+			catLengths << _T( "(" );
+			for( size_t i = 0; i < mean[ t ].lengths.size( ); ++i )
+			{
+				catLengths << mean[ t ].lengths.at( i );
+				if( i < (mean[ t ].lengths.size( )-1) )
+					catLengths << _T( "," );
+			}
+			catLengths << _T( ")" );
+			tout << std::setw( tableThird ) << catLengths.str( ) << std::endl;
+
+			if( mean[ t ].batchSize > 1 )
+			{
+				tout << std::setw( tableFourth ) << _T( "Batch:" )
+					<< std::setw( tableThird )  << mean[ t ].batchSize << std::endl;
+			}
+
+			tout << std::setw( tableFourth ) << _T( "Input Stride:" );
+
+			catLengths.str( _T( "" ) );
+			catLengths << _T( "(" );
+			for( size_t i = 0; i < mean[ t ].inStride.size( ); ++i )
+			{
+				catLengths << mean[ t ].inStride.at( i );
+				if( i < (mean[ t ].inStride.size( )-1) )
+					catLengths << _T( "," );
+			}
+			catLengths << _T( ")" );
+			tout << std::setw( tableThird ) << catLengths.str( ) << std::endl;
+
+			tout << std::setw( tableFourth ) << _T( "Output Stride:" );
+
+			catLengths.str( _T( "" ) );
+			catLengths << _T( "(" );
+			for( size_t i = 0; i < mean[ t ].outStride.size( ); ++i )
+			{
+				catLengths << mean[ t ].outStride.at( i );
+				if( i < (mean[ t ].outStride.size( )-1) )
+					catLengths << _T( "," );
+			}
+			catLengths << _T( ")" );
+			tout << std::setw( tableThird ) << catLengths.str( ) << std::endl;
+
+			if( mean[ t ].enqueueWorkSize.size( ) != 0 )
+			{
+				tout << std::setw( tableFourth ) << _T( "Global Work:" );
+				catLengths.str( _T( "" ) );
+				catLengths << _T( "(" );
+				for( size_t i = 0; i < mean[ t ].enqueueWorkSize.size( ); ++i )
+				{
+					catLengths << mean[ t ].enqueueWorkSize.at( i );
+					if( i < (mean[ t ].enqueueWorkSize.size( )-1) )
+						catLengths << _T( "," );
+				}
+				catLengths << _T( ")" );
+				tout << std::setw( tableThird ) << catLengths.str( ) << std::endl;
+			}
+
+			tout << std::setw( tableFourth ) << _T( "Gflops:" )
+				<< std::setw( 2*tableFourth ) << gFlops << std::endl;
+			tout << std::setw( tableFourth ) << _T( "Time (ns):" )
+				<< std::setw( 3*tableFourth ) << commatize( static_cast< cl_ulong >( time ) ) << std::endl;
+			tout << std::endl;
+		}
+	}
+}
+
+//	Defining an output print operator
+std::ostream&
+operator<<( std::ostream& os, const GpuStatTimer& st )
+{
+	//if( st.clkTicks.empty( ) )
+	//	return	os;
+
+	//std::ios::fmtflags bckup	= os.flags( );
+
+	//for( cl_uint l = 0; l < st.labelID.size( ); ++l )
+	//{
+	//	cl_ulong min	= 0;
+	//	clkVector::const_iterator iter	= std::min_element( st.clkTicks.at( l ).begin( ), st.clkTicks.at( l ).end( ) );
+
+	//	if( iter != st.clkTicks.at( l ).end( ) )
+	//		min		= *iter;
+
+	//	os << st.labelID[l].first << ", " << st.labelID[l].second << std::fixed << std::endl;
+	//	os << "Min:," << min << std::endl;
+	//	os << "Mean:," << st.getMean( l ) << std::endl;
+	//	os << "StdDev:," << st.getStdDev( l ) << std::endl;
+	//	os << "AvgTime:," << st.getAverageTime( l ) << std::endl;
+	//	os << "MinTime:," << st.getMinimumTime( l ) << std::endl;
+
+	//	//for( cl_uint	t = 0; t < st.clkTicks[l].size( ); ++t )
+	//	//{
+	//	//	os << st.clkTicks[l][t]<< ",";
+	//	//}
+	//	os << "\n" << std::endl;
+
+	//}
+
+	//os.flags( bckup );
+
+	return	os;
+}
diff --git a/src/statTimer/statisticalTimer.GPU.h b/src/statTimer/statisticalTimer.GPU.h
new file mode 100644
index 00000000..62e3c29a
--- /dev/null
+++ b/src/statTimer/statisticalTimer.GPU.h
@@ -0,0 +1,244 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#ifndef _STATISTICALTIMER_GPU_H_
+#define _STATISTICALTIMER_GPU_H_
+#include <iosfwd>
+#include <vector>
+#include <algorithm>
+#include <cmath>
+#include "statisticalTimer.h"
+#include "../library/plan.h"
+
+/**
+ * \file clfft.StatisticalTimer.GPU.h
+ * \brief A timer class that provides a cross platform timer for use
+ * in timing code progress with a high degree of accuracy.
+ *	This class is implemented entirely in the header, to facilitate inclusion into multiple
+ *	projects without needing to compile an object file for each project.
+ */
+
+struct StatData
+{
+	cl_kernel kernel;
+	cl_ulong deltaNanoSec;
+	double doubleNanoSec;
+	size_t batchSize;
+	clfftDim dim;
+	clfftPlanHandle plHandle;
+	clfftPlanHandle planX;
+	clfftPlanHandle planY;
+	clfftPlanHandle planZ;
+	clfftPlanHandle planTX;
+	clfftPlanHandle planTY;
+	clfftPlanHandle planTZ;
+
+	std::vector< size_t > lengths;
+	std::vector< size_t > inStride;
+	std::vector< size_t > outStride;
+	std::vector< size_t > enqueueWorkSize;
+	std::vector< cl_event > outEvents;
+
+	StatData( ): deltaNanoSec( 0 )
+	{}
+
+	StatData( clfftPlanHandle id, FFTPlan* plan, cl_kernel kern, cl_uint nEv, cl_event* Ev,
+		const std::vector< size_t >& gWorkSize ):
+		deltaNanoSec( 0 ), kernel( kern ), batchSize( plan->batchsize ), dim( plan->dim ),
+		plHandle( id ), planX( plan->planX ), planY( plan->planY ), planZ( plan->planZ ),
+		planTX( plan->planTX ), planTY( plan->planTY ), planTZ( plan->planTZ ),
+		inStride( plan->inStride ), outStride( plan->outStride ),
+		lengths( plan->length ), enqueueWorkSize( gWorkSize )
+	{
+		for( cl_uint e = 0; e < nEv; ++e )
+		{
+			outEvents.push_back( Ev[ e ] );
+		}
+	}
+
+	double calcFlops( )
+	{
+		size_t	fftLength = 0;
+		size_t	dimIndex = 0;
+
+		if( dim == CLFFT_1D )
+		{
+			fftLength	= lengths.at( 0 );
+			dimIndex	= 1;
+		}
+		else if( dim == CLFFT_2D )
+		{
+			fftLength	= lengths.at( 0 ) * lengths.at( 1 );
+			dimIndex	= 2;
+		}
+		else if( dim == CLFFT_3D )
+		{
+			fftLength	= lengths.at( 0 ) * lengths.at( 1 ) * lengths.at( 2 );
+			dimIndex	= 3;
+		}
+
+		size_t cumulativeBatch = 1;
+		for( ; dimIndex < lengths.size(); ++dimIndex )
+		{
+			cumulativeBatch *= std::max< size_t >( 1, lengths[ dimIndex ] );
+		}
+		cumulativeBatch *= batchSize;
+
+		double flops	= cumulativeBatch * 5 * fftLength * ( log( static_cast< double >( fftLength ) ) / log( 2.0 ) );
+
+		return flops;
+	}
+
+};
+
+//	Sorting operator for struct StatData, such that it can be used in a map
+bool operator<( const StatData& lhs, const StatData& rhs);
+
+class GpuStatTimer : public baseStatTimer
+{
+	//	Typedefs to handle the data that we store
+	typedef std::vector< StatData > StatDataVec;
+	typedef std::vector< StatDataVec > PerEnqueueVec;
+
+	//	In order to calculate statistics <std. dev.>, we need to keep a history of our timings
+	std::vector< PerEnqueueVec > timerData;
+
+	//	Typedefs to handle the identifiers we use for our timers
+	typedef	std::pair< std::string, cl_uint > idPair;
+	typedef	std::vector< idPair > idVector;
+	idVector labelID;
+
+	//	Between each Start/Stop pair, we need to count how many AddSamples were made.
+	size_t currSample, currRecord;
+
+	//	Saved sizes for our vectors, used in Reset() to reallocate vectors
+	StatDataVec::size_type	nEvents, nSamples;
+	size_t currID;
+
+	/**
+	 * \fn GpuStatTimer()
+	 * \brief Constructor for StatisticalTimer that initializes the class
+	 *	This is private so that user code cannot create their own instantiation.  Instead, you
+	 *	must go through getInstance( ) to get a reference to the class.
+	 */
+	GpuStatTimer( );
+
+	/**
+	 * \fn ~GpuStatTimer()
+	 * \brief Destructor for StatisticalTimer that cleans up the class
+	 */
+	~GpuStatTimer( );
+
+	/**
+	 * \fn GpuStatTimer(const StatisticalTimer& )
+	 * \brief Copy constructors do not make sense for a singleton, disallow copies
+	 */
+	GpuStatTimer( const GpuStatTimer& );
+
+	/**
+	 * \fn operator=( const StatisticalTimer& )
+	 * \brief Assignment operator does not make sense for a singleton, disallow assignments
+	 */
+	GpuStatTimer& operator=( const GpuStatTimer& );
+
+	friend std::ostream& operator<<( std::ostream& os, const GpuStatTimer& s );
+
+	//	Calculate the average/mean of data for a given event
+	std::vector< StatData > getMean( size_t id );
+
+	//	Calculate the variance of data for a given event
+	//	Variance - average of the squared differences between data points and the mean
+	std::vector< StatData >	getVariance( size_t id );
+
+	//	Sqrt of variance, also in units of the original data
+	std::vector< StatData >	getStdDev( size_t id );
+
+	/**
+	 * \fn double getAverageTime(size_t id) const
+	 * \return Return the arithmetic mean of all the samples that have been saved
+	 */
+	std::vector< StatData > getAverageTime( size_t id );
+
+	/**
+	 * \fn double getMinimumTime(size_t id) const
+	 * \return Return the arithmetic min of all the samples that have been saved
+	 */
+	std::vector< StatData > getMinimumTime( size_t id );
+
+	void queryOpenCL( size_t id );
+
+public:
+	/**
+	 * \fn getInstance()
+	 * \brief This returns a reference to the singleton timer.  Guarantees only 1 timer class is ever
+	 *	instantiated within a compilable executable.
+	 */
+	static GpuStatTimer& getInstance( );
+
+	/**
+	 * \fn void Start( size_t id )
+	 * \brief Start the timer
+	 * \sa Stop(), Reset()
+	 */
+	void Start( size_t id );
+
+	/**
+	 * \fn void Stop( size_t id )
+	 * \brief Stop the timer
+	 * \sa Start(), Reset()
+	 */
+	void Stop( size_t id );
+
+	/**
+	 * \fn void AddSample( const cl_event ev )
+	 * \brief Explicitely add a timing sample into the class
+	 */
+	virtual void AddSample( clfftPlanHandle plHandle, FFTPlan* plan, cl_kernel kern, cl_uint numQueuesAndEvents, cl_event* ev,
+		const std::vector< size_t >& gWorkSize );
+
+	/**
+	 * \fn void Reset(void)
+	 * \brief Reset the timer to 0
+	 * \sa Start(), Stop()
+	 */
+	void Clear( );
+
+	/**
+	 * \fn void Reset(void)
+	 * \brief Reset the timer to 0
+	 * \sa Start(), Stop()
+	 */
+	void Reset( );
+
+	void Reserve( size_t nEvents, size_t nSamples );
+
+	size_t getUniqueID( const std::string& label, cl_uint groupID );
+
+	//	Calculate the average/mean of data for a given event
+	void	setNormalize( bool norm );
+
+	void Print( );
+
+	//	Using the stdDev of the entire population (of an id), eliminate those samples that fall
+	//	outside some specified multiple of the stdDev.  This assumes that the population
+	//	form a gaussian curve.
+	size_t	pruneOutliers( cl_double multiple );
+	std::vector< size_t > pruneOutliers( size_t id , cl_double multiple );
+};
+
+#endif // _STATISTICALTIMER_GPU_H_
diff --git a/src/statTimer/statisticalTimer.extern.cpp b/src/statTimer/statisticalTimer.extern.cpp
new file mode 100644
index 00000000..3af90a7c
--- /dev/null
+++ b/src/statTimer/statisticalTimer.extern.cpp
@@ -0,0 +1,35 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// StatTimer.cpp : Defines the exported functions for the DLL application.
+//
+
+#include "stdafx.h"
+#include "statisticalTimer.extern.h"
+#include "statisticalTimer.CPU.h"
+#include "statisticalTimer.GPU.h"
+
+//	Even though the individual getInstance functions of the timer classes return references,
+//	we convert those to pointers before returning from here so that the clients can initialize
+//	their local variables to NULL, which refernces do not allow.
+baseStatTimer* getStatTimer( const clfftTimerType type )
+{
+	if( type == CLFFT_CPU )
+		return	&CpuStatTimer::getInstance( );
+
+	return	&GpuStatTimer::getInstance( );
+}
diff --git a/src/statTimer/statisticalTimer.extern.h b/src/statTimer/statisticalTimer.extern.h
new file mode 100644
index 00000000..1169baaa
--- /dev/null
+++ b/src/statTimer/statisticalTimer.extern.h
@@ -0,0 +1,71 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#ifndef _STATISTICALTIMER_EXTERN_H_
+#define _STATISTICALTIMER_EXTERN_H_
+#include "../include/clFFT.h"
+#include "statisticalTimer.h"
+
+/**
+ * \file clfft.StatisticalTimer.extern.h
+ * \brief A timer class that provides a cross platform timer for use
+ * in timing code progress with a high degree of accuracy.
+ *	This class is implemented entirely in the header, to facilitate inclusion into multiple
+ *	projects without needing to compile an object file for each project.
+ */
+
+// The following ifdef block is the standard way of creating macros which make exporting
+// from a DLL simpler. All files within this DLL are compiled with the STATTIMER_EXPORTS
+// symbol defined on the command line. this symbol should not be defined on any project
+// that uses this DLL. This way any other project whose source files include this file see
+// STATTIMER_API functions as being imported from a DLL, whereas this DLL sees symbols
+// defined with this macro as being exported.
+#if defined( _WIN32 )
+	#if !defined( __cplusplus )
+		#define inline __inline
+	#endif
+
+	#if defined( STATTIMER_EXPORTS )
+		#define STATTIMER_API __declspec( dllexport )
+	#else
+		#define STATTIMER_API __declspec( dllimport )
+	#endif
+#else
+	#define STATTIMER_API
+#endif
+
+//	The type of timer to be returned from ::getStatTimer( )
+typedef enum clfftTimerType_
+{
+	CLFFT_GPU			= 1,
+	CLFFT_CPU,
+} clfftTimerType;
+
+//	Table of typedef definitions for all exported functions from this shared module.
+//	Clients of this module can use these typedefs to help create function pointers
+//	that can be initialized to point to the functions exported from this module.
+typedef baseStatTimer* (*PFGETSTATTIMER)( const clfftTimerType type );
+
+	/**
+	* \fn getInstance()
+	* \brief This returns a reference to the singleton timer.  Guarantees only 1 timer class is ever
+	*	instantiated within a compilable executable.
+	*/
+extern "C" STATTIMER_API baseStatTimer* getStatTimer( const clfftTimerType type );
+
+#endif // _STATISTICALTIMER_EXTERN_H_
diff --git a/src/statTimer/statisticalTimer.h b/src/statTimer/statisticalTimer.h
new file mode 100644
index 00000000..58c03b70
--- /dev/null
+++ b/src/statTimer/statisticalTimer.h
@@ -0,0 +1,106 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#ifndef _STATISTICALTIMER_H_
+#define _STATISTICALTIMER_H_
+#include <vector>
+#include <functional>
+#include <string>
+
+#include "../include/clFFT.h"
+
+/**
+ * \file clfft.StatisticalTimer.h
+ * \brief A timer class that provides a cross platform timer for use
+ * in timing code progress with a high degree of accuracy.
+ *	This class is implemented entirely in the header, to facilitate inclusion into multiple
+ *	projects without needing to compile an object file for each project.
+ */
+
+//	Definition of a functor object that is passed by reference into the Print statement
+//	of the timing class.
+//	Functor object to help with accumulating values in vectors
+template< typename A, typename R >
+class flopsFunc: public std::unary_function< A, R >
+{
+public:
+	virtual typename std::unary_function<A, R>::result_type operator( )( ) = 0;
+};
+
+/**
+ * \class StatisticalTimer
+ * \brief Counter that provides a fairly accurate timing mechanism for both
+ * windows and linux. This timer is used extensively in all the samples.
+ */
+class baseStatTimer
+{
+protected:
+	/**
+	 * \fn ~baseStatTimer()
+	 * \brief Destructor for StatisticalTimer that cleans up the class
+	 */
+	virtual ~baseStatTimer( ){ };
+
+//	friend std::ostream& operator<<( std::ostream& os, const baseStatTimer& s );
+
+public:
+	/**
+	 * \fn void Start( sTimerID id )
+	 * \brief Start the timer
+	 * \sa Stop(), Reset()
+	 */
+	virtual void Start( size_t id ) = 0;
+
+	/**
+	 * \fn void Stop( size_t id )
+	 * \brief Stop the timer
+	 * \sa Start(), Reset()
+	 */
+	virtual void Stop( size_t id ) = 0;
+
+	/**
+	 * \fn void Reset(void)
+	 * \brief Reset the timer to 0
+	 * \sa Start(), Stop()
+	 */
+	virtual void Clear( ) = 0;
+
+	/**
+	 * \fn void Reset(void)
+	 * \brief Reset the timer to 0
+	 * \sa Start(), Stop()
+	 */
+	virtual void Reset( ) = 0;
+
+	virtual void Reserve( size_t nEvents, size_t nSamples ) = 0;
+
+	virtual size_t getUniqueID( const std::string& label, cl_uint groupID ) = 0;
+
+	//	Calculate the average/mean of data for a given event
+	virtual void	setNormalize( bool norm ) = 0;
+
+	virtual void Print( ) = 0;
+
+	//	Using the stdDev of the entire population (of an id), eliminate those samples that fall
+	//	outside some specified multiple of the stdDev.  This assumes that the population
+	//	form a gaussian curve.
+	virtual size_t	pruneOutliers( cl_double multiple ) = 0;
+	virtual std::vector< size_t > pruneOutliers( size_t id , cl_double multiple ) = 0;
+};
+
+#endif // _STATISTICALTIMER_H_
diff --git a/src/statTimer/stdafx.cpp b/src/statTimer/stdafx.cpp
new file mode 100644
index 00000000..d87a55d8
--- /dev/null
+++ b/src/statTimer/stdafx.cpp
@@ -0,0 +1,24 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// stdafx.cpp : source file that includes just the standard includes
+// clfft.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// Reference any additional headers you need in STDAFX.H and not in this file
diff --git a/src/statTimer/stdafx.h b/src/statTimer/stdafx.h
new file mode 100644
index 00000000..09875f9d
--- /dev/null
+++ b/src/statTimer/stdafx.h
@@ -0,0 +1,48 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#define _CRT_SECURE_NO_WARNINGS
+
+//#include <iostream>
+//#include <sstream>
+//#include <fstream>
+//#include <iomanip>
+//#include <cstring>
+//#include <memory>
+#include <vector>
+//#include <cstring>
+//#include <stdarg.h>
+#include <assert.h>
+//#include <complex>
+
+//	_WIN32 is defined for both 32 & 64 bit environments
+#if defined( _WIN32 )
+//	#include <tchar.h>
+	#include "targetver.h"
+
+	#define NOMINMAX
+	#define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
+	// Windows Header Files:
+	#include <windows.h>
+#endif
diff --git a/src/statTimer/targetver.h b/src/statTimer/targetver.h
new file mode 100644
index 00000000..dafe7141
--- /dev/null
+++ b/src/statTimer/targetver.h
@@ -0,0 +1,24 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
new file mode 100644
index 00000000..87510efb
--- /dev/null
+++ b/src/tests/CMakeLists.txt
@@ -0,0 +1,109 @@
+# ########################################################################
+# Copyright 2013 Advanced Micro Devices, Inc.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ########################################################################
+
+
+# List the names of the files to compile
+set( clFFT.Test.Source
+	 test_constants.cpp
+	 buffer_memory.cpp
+	 buffer.cpp
+	 unit_test.cpp
+	 accuracy_test_common.cpp
+	 accuracy_test_pow2.cpp
+	 accuracy_test_pow3.cpp
+	 accuracy_test_pow5.cpp
+	 accuracy_test_mixed_radices.cpp
+	 accuracy_test_random.cpp
+	 gtest_main.cpp
+	 ${PROJECT_SOURCE_DIR}/client/openCL.misc.cpp
+	 c-compliance.c
+   )
+   
+set( clFFT.Test.Headers
+	${PROJECT_SOURCE_DIR}/include/clFFT.h
+	${PROJECT_SOURCE_DIR}/include/unicode.compatibility.h
+	${PROJECT_SOURCE_DIR}/include/convenienceFunctions.h
+	${PROJECT_SOURCE_DIR}/library/private.h
+	${PROJECT_SOURCE_DIR}/client/openCL.misc.h
+	accuracy_test_common.h
+	test_constants.h
+	buffer_memory.h
+	buffer.h
+	cl_transform.h
+	fftw_transform.h
+	typedefs.h
+  )
+  
+set( clFFT.Test.Files ${clFFT.Test.Source} ${clFFT.Test.Headers} )
+
+
+
+if( BUILD64 )
+    set( BIN_DIR bin64 )
+	set( LIB_DIR lib64 )
+else()
+    set( BIN_DIR bin32 )
+	set( LIB_DIR lib32 )
+endif()
+
+set( LD_PTHREAD "" )
+if( CMAKE_COMPILER_IS_GNUCXX )
+	set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" )
+	set( LD_PTHREAD "-lpthread" )
+endif( )
+
+# Include standard OpenCL headers
+include_directories( ${Boost_INCLUDE_DIRS} ${GTEST_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS}  ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include )
+
+add_executable( Test ${clFFT.Test.Files} )
+
+# If the runtime is being built by the project, use it, otherwise link to a runtime library specified in the install prefix
+if( BUILD_RUNTIME )
+	target_link_libraries( Test clFFT ${Boost_LIBRARIES} ${GTEST_LIBRARIES} ${FFTW_LIBRARIES} ${OPENCL_LIBRARIES} ${LD_PTHREAD} )
+else( )
+	# Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
+	get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
+	if( LIB64 )
+		set( clFFT.library "${CMAKE_INSTALL_PREFIX}/lib64" )
+	else( )
+		set( clFFT.library "${CMAKE_INSTALL_PREFIX}/lib32" )
+	endif( )
+	
+	if( WIN32 )
+		set( clFFT.library "${clFFT.library}/import/clFFT${CMAKE_STATIC_LIBRARY_SUFFIX}" )
+	else( )
+		set( clFFT.library "${clFFT.library}/${CMAKE_SHARED_LIBRARY_PREFIX}clFFT${CMAKE_SHARED_LIBRARY_SUFFIX}" )
+	endif( )
+
+	target_link_libraries( Test ${clFFT.library} ${Boost_LIBRARIES} ${GTEST_LIBRARIES} ${FFTW_LIBRARIES} ${OPENCL_LIBRARIES} ${LD_PTHREAD} )
+
+endif( )
+
+# The following set_target_properties is to get around a bug in cmake 2.8.2, where the suffix after the first '.' is dropped
+IF( (MSVC_VERSION VERSION_EQUAL 1600) AND (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} VERSION_LESS 2.8.3) )
+	message( STATUS "Detected vs2010 and Cmake version less than 2.8.3; renaming Test with underscores " )
+	set_target_properties( Test PROPERTIES OUTPUT_NAME "clFFT_Test" )
+ENDIF( )
+
+set_target_properties( Test PROPERTIES VERSION ${CLFFT_VERSION} )
+set_target_properties( Test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
+
+# CPack configuration; include the executable into the package
+install( TARGETS Test
+		RUNTIME DESTINATION ${BIN_DIR}
+		LIBRARY DESTINATION ${LIB_DIR}
+		ARCHIVE DESTINATION ${LIB_DIR}/import
+		)
diff --git a/src/tests/accuracy_test_common.cpp b/src/tests/accuracy_test_common.cpp
new file mode 100644
index 00000000..8561b143
--- /dev/null
+++ b/src/tests/accuracy_test_common.cpp
@@ -0,0 +1,55 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include<math.h>
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "typedefs.h"
+#include "accuracy_test_common.h"
+#include <stdexcept>
+#include <vector>
+
+/*****************************************************/
+clfftResultLocation cl_placeness( placeness::placeness_t placeness )
+{
+	if( placeness == placeness::in_place )
+		return CLFFT_INPLACE;
+	else if( placeness == placeness::out_of_place )
+		return CLFFT_OUTOFPLACE;
+	else
+		throw std::runtime_error( "invalid placeness" );
+}
+
+/*****************************************************/
+clfftLayout cl_layout( layout::buffer_layout_t layout_in )
+{
+	if( layout_in == layout::real )
+		return CLFFT_REAL;
+	else if( layout_in == layout::hermitian_planar )
+		return CLFFT_HERMITIAN_PLANAR;
+	else if( layout_in == layout::complex_planar )
+		return CLFFT_COMPLEX_PLANAR;
+	else if( layout_in == layout::hermitian_interleaved )
+		return CLFFT_HERMITIAN_INTERLEAVED;
+	else if( layout_in == layout::complex_interleaved )
+		return CLFFT_COMPLEX_INTERLEAVED;
+	else
+		throw std::runtime_error( "invalid layout_in" );
+}
diff --git a/src/tests/accuracy_test_common.h b/src/tests/accuracy_test_common.h
new file mode 100644
index 00000000..428bd8ba
--- /dev/null
+++ b/src/tests/accuracy_test_common.h
@@ -0,0 +1,364 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include<math.h>
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "buffer.h"
+#include "typedefs.h"
+#include <stdexcept>
+#include <vector>
+
+namespace placeness
+{
+	enum placeness_t { in_place = CLFFT_INPLACE, out_of_place = CLFFT_OUTOFPLACE };
+}
+
+enum data_pattern { impulse, sawtooth, value, erratic };
+
+namespace direction
+{
+	enum direction_t { forward, backward };
+}
+
+clfftResultLocation cl_placeness( placeness::placeness_t placeness );
+clfftLayout cl_layout( layout::buffer_layout_t layout_in );
+
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
+template< class T, class cl_T, class fftw_T >
+void complex_to_complex( data_pattern pattern, direction::direction_t direction,
+	std::vector<size_t> lengths, size_t batch,
+	std::vector<size_t> input_strides, std::vector<size_t> output_strides,
+	size_t input_distance, size_t output_distance,
+	layout::buffer_layout_t in_layout, layout::buffer_layout_t out_layout,
+	placeness::placeness_t placeness,
+	T scale = 1.0f )
+{
+	clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+		input_strides.empty() ? NULL : &input_strides[0],
+		output_strides.empty() ? NULL : &output_strides[0],
+		batch, input_distance, output_distance,
+		cl_layout(in_layout), cl_layout(out_layout),
+		cl_placeness(placeness) );
+
+	fftw<T, fftw_T> reference( lengths.size(), &lengths[0], batch, c2c );
+
+	if( pattern == sawtooth )
+	{
+		test_fft.set_input_to_sawtooth( 1.0f );
+		reference.set_data_to_sawtooth( 1.0f );
+	}
+	else if( pattern == value )
+	{
+		test_fft.set_input_to_value( 2.0f, 2.5f );
+		reference.set_all_data_to_value( 2.0f, 2.5f );
+	}
+	else if( pattern == impulse )
+	{
+		test_fft.set_input_to_impulse();
+		reference.set_data_to_impulse();
+	}
+	else if( pattern == erratic )
+	{
+		test_fft.set_input_to_random();
+		reference.set_data_to_random();
+	}
+	else
+	{
+		throw std::runtime_error( "invalid pattern type in complex_to_complex()" );
+	}
+
+	// if we're starting with unequal data, we're destined for failure
+	EXPECT_EQ( true, test_fft.input_buffer() == reference.input_buffer() );
+
+	if( direction == direction::forward )
+	{
+		test_fft.set_forward_transform();
+		test_fft.forward_scale( scale );
+
+		reference.set_forward_transform();
+		reference.forward_scale( scale );
+	}
+	else if( direction == direction::backward )
+	{
+		test_fft.set_backward_transform();
+		test_fft.backward_scale( scale );
+
+		reference.set_backward_transform();
+		reference.backward_scale( scale );
+	}
+	else
+		throw std::runtime_error( "invalid direction in complex_to_complex()" );
+
+	reference.transform();
+	test_fft.transform();
+
+	EXPECT_EQ( true, test_fft.result() == reference.result() );
+}
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
+// input layout is always real
+template< class T, class cl_T, class fftw_T >
+void real_to_complex( data_pattern pattern,
+	std::vector<size_t> lengths, size_t batch,
+	std::vector<size_t> input_strides, std::vector<size_t> output_strides,
+	size_t input_distance, size_t output_distance,
+	layout::buffer_layout_t out_layout,
+	placeness::placeness_t placeness,
+	T scale = 1.0f )
+{
+	clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+		input_strides.empty() ? NULL : &input_strides[0],
+		output_strides.empty() ? NULL : &output_strides[0],
+		batch, input_distance, output_distance,
+		cl_layout(layout::real), cl_layout(out_layout),
+		cl_placeness(placeness) );
+
+	fftw<T, fftw_T> reference( lengths.size(), &lengths[0], batch, r2c );
+
+	if( pattern == sawtooth )
+	{
+		test_fft.set_input_to_sawtooth( 1.0f );
+		reference.set_data_to_sawtooth( 1.0f );
+	}
+	else if( pattern == value )
+	{
+		test_fft.set_input_to_value( 2.0f );
+		reference.set_all_data_to_value( 2.0f );
+	}
+	else if( pattern == impulse )
+	{
+		test_fft.set_input_to_impulse();
+		reference.set_data_to_impulse();
+	}
+	else if( pattern == erratic )
+	{
+		test_fft.set_input_to_random();
+		reference.set_data_to_random();
+	}
+	else
+	{
+		throw std::runtime_error( "invalid pattern type in real_to_complex()" );
+	}
+
+	// if we're starting with unequal data, we're destined for failure
+	EXPECT_EQ( true, test_fft.input_buffer() == reference.input_buffer() );
+
+	test_fft.forward_scale( scale );
+	reference.forward_scale( scale );
+
+	test_fft.transform();
+	reference.transform();
+
+	EXPECT_EQ( true, test_fft.result() == reference.result() );
+}
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
+// output layout is always real
+template< class T, class cl_T, class fftw_T >
+void complex_to_real( data_pattern pattern,
+	std::vector<size_t> lengths, size_t batch,
+	std::vector<size_t> input_strides, std::vector<size_t> output_strides,
+	size_t input_distance, size_t output_distance,
+	layout::buffer_layout_t in_layout,
+	placeness::placeness_t placeness,
+	T scale = 1.0f )
+{
+	fftw<T, fftw_T> data_maker( lengths.size(), &lengths[0], batch, r2c );
+
+	if( pattern == sawtooth )
+	{
+		data_maker.set_data_to_sawtooth(1.0f);
+	}
+	else if( pattern == value )
+	{
+		data_maker.set_all_data_to_value(2.0f);
+	}
+	else if( pattern == impulse )
+	{
+		data_maker.set_data_to_impulse();
+	}
+	else if( pattern == erratic )
+	{
+		data_maker.set_data_to_random();
+	}
+	else
+	{
+		throw std::runtime_error( "invalid pattern type in complex_to_real()" );
+	}
+
+	data_maker.transform();
+
+	clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+		input_strides.empty() ? NULL : &input_strides[0],
+		output_strides.empty() ? NULL : &output_strides[0],
+		batch, input_distance, output_distance,
+		cl_layout(in_layout), cl_layout(layout::real),
+		cl_placeness(placeness) );
+	test_fft.set_input_to_buffer( data_maker.result() );
+
+	fftw<T, fftw_T> reference( lengths.size(), &lengths[0], batch, c2r );
+	reference.set_input_to_buffer(data_maker.result());
+
+	// if we're starting with unequal data, we're destined for failure
+	EXPECT_EQ( true, test_fft.input_buffer() == reference.input_buffer() );
+
+	test_fft.backward_scale( scale );
+	reference.backward_scale( scale );
+
+	test_fft.transform();
+	reference.transform();
+
+	EXPECT_EQ( true, test_fft.result() == reference.result() );
+}
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
+
+// no need to support non-unit strides and distances here
+// they are covered in plenty of other places
+// and just needlessly complicate things in this case
+template< class T, class cl_T, class fftw_T >
+void complex_to_complex_round_trip( data_pattern pattern,
+									std::vector<size_t> lengths, size_t batch,
+									layout::buffer_layout_t layout )
+{
+	placeness::placeness_t placeness = placeness::in_place;
+
+	clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+		NULL, NULL,	batch, 0, 0,
+		cl_layout(layout), cl_layout(layout),
+		cl_placeness( placeness ) );
+
+	buffer<T> expected( lengths.size(), &lengths[0], NULL, batch, 0, layout, CLFFT_OUTOFPLACE );
+
+	if( pattern == sawtooth )
+	{
+		test_fft.set_input_to_sawtooth( 1.0f );
+		expected.set_all_to_sawtooth( 1.0f );
+	}
+	else if( pattern == value )
+	{
+		test_fft.set_input_to_value( 2.0f, 2.5f );
+		expected.set_all_to_value( 2.0f, 2.5f );
+	}
+	else if( pattern == impulse )
+	{
+		test_fft.set_input_to_impulse();
+		expected.set_all_to_impulse();
+	}
+	else if( pattern == erratic )
+	{
+		test_fft.set_input_to_random();
+		expected.set_all_to_random_data( 10, super_duper_global_seed );
+	}
+	else
+	{
+		throw std::runtime_error( "invalid pattern type in complex_to_complex_round_trip()" );
+	}
+
+	// if we're starting with unequal data, we're destined for failure
+	EXPECT_EQ( true, test_fft.input_buffer() == expected );
+
+	test_fft.set_forward_transform();
+	test_fft.transform();
+
+	// confirm that we actually did something
+	bool stash_suppress_output = suppress_output;
+	suppress_output = true;
+	EXPECT_EQ( false, test_fft.result() == expected );
+	suppress_output = stash_suppress_output;
+
+	test_fft.set_backward_transform();
+	test_fft.transform();
+
+	EXPECT_EQ( true, test_fft.result() == expected );
+}
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
+template< class T, class cl_T, class fftw_T >
+void real_to_complex_round_trip( data_pattern pattern,
+								 std::vector<size_t> lengths, size_t batch )
+{
+	placeness::placeness_t placeness = placeness::in_place;
+
+	clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+		NULL, NULL,	batch, 0, 0,
+		cl_layout(layout::real), cl_layout(layout::hermitian_interleaved),
+		cl_placeness( placeness ) );
+
+	buffer<T> expected( lengths.size(), &lengths[0], NULL, batch, 0, layout::real, CLFFT_OUTOFPLACE );
+
+	if( pattern == sawtooth )
+	{
+		test_fft.set_input_to_sawtooth( 1.0f );
+		expected.set_all_to_sawtooth( 1.0f );
+	}
+	else if( pattern == value )
+	{
+		test_fft.set_input_to_value( 2.0f );
+		expected.set_all_to_value( 2.0f );
+	}
+	else if( pattern == impulse )
+	{
+		test_fft.set_input_to_impulse();
+		expected.set_all_to_impulse();
+	}
+	else if( pattern == erratic )
+	{
+		test_fft.set_input_to_random();
+		expected.set_all_to_random_data( 10, super_duper_global_seed );
+	}
+	else
+	{
+		throw std::runtime_error( "invalid pattern type in real_to_complex_round_trip()" );
+	}
+
+	// if we're starting with unequal data, we're destined for failure
+	EXPECT_EQ( true, test_fft.input_buffer() == expected );
+
+	test_fft.transform();
+
+	// confirm that we actually did something
+	bool stash_suppress_output = suppress_output;
+	suppress_output = true;
+	EXPECT_EQ( false, test_fft.result() == expected );
+	suppress_output = stash_suppress_output;
+
+	test_fft.swap_layouts();
+	test_fft.transform();
+
+	EXPECT_EQ( true, test_fft.result() == expected );
+}
diff --git a/src/tests/accuracy_test_mixed_radices.cpp b/src/tests/accuracy_test_mixed_radices.cpp
new file mode 100644
index 00000000..30892614
--- /dev/null
+++ b/src/tests/accuracy_test_mixed_radices.cpp
@@ -0,0 +1,458 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include<math.h>
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "accuracy_test_common.h"
+#include <stdexcept>
+#include <vector>
+
+class mixed_radix : public ::testing::TestWithParam<size_t> {
+	protected:
+		mixed_radix(){}
+		virtual ~mixed_radix(){}
+		virtual void SetUp(){}
+		virtual void TearDown(){}
+};
+
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_complex_to_complex( size_t problem_size )
+{
+	try
+	{
+		if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+		std::vector<size_t> lengths;
+		lengths.push_back( problem_size );
+		size_t batch = 1;
+
+		std::vector<size_t> input_strides;
+		std::vector<size_t> output_strides;
+
+		size_t input_distance = 0;
+		size_t output_distance = 0;
+
+		layout::buffer_layout_t in_layout = layout::complex_planar;
+		layout::buffer_layout_t out_layout = layout::complex_planar;
+
+		placeness::placeness_t placeness = placeness::in_place;
+
+		direction::direction_t direction = direction::forward;
+
+		data_pattern pattern = sawtooth;
+		complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+	}
+	catch( const std::exception& err ) {
+		handle_exception(err);
+	}
+}
+
+TEST_P( mixed_radix, single_precision_complex_to_complex_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_complex_to_complex<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix, double_precision_complex_to_complex_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_complex_to_complex<double, cl_double, fftw_complex>(problem_size);
+}
+
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_real_to_hermitian( size_t problem_size )
+{
+	try
+	{
+		if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+		std::vector<size_t> lengths;
+		lengths.push_back( problem_size );
+		size_t batch = 1;
+
+		std::vector<size_t> input_strides;
+		std::vector<size_t> output_strides;
+
+		size_t input_distance = 0;
+		size_t output_distance = 0;
+
+		layout::buffer_layout_t layout = layout::hermitian_interleaved;
+
+		placeness::placeness_t placeness = placeness::in_place;
+
+		data_pattern pattern = sawtooth;
+		real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+	}
+	catch( const std::exception& err ) {
+		handle_exception(err);
+	}
+}
+
+TEST_P( mixed_radix, single_precision_real_to_hermitian_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_real_to_hermitian<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix, double_precision_real_to_hermitian_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_real_to_hermitian<double, cl_double, fftw_complex>(problem_size);
+}
+
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_hermitian_to_real( size_t problem_size )
+{
+	try
+	{
+		if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+		std::vector<size_t> lengths;
+		lengths.push_back( problem_size );
+		size_t batch = 1;
+
+		std::vector<size_t> input_strides;
+		std::vector<size_t> output_strides;
+
+		size_t input_distance = 0;
+		size_t output_distance = 0;
+
+		layout::buffer_layout_t layout = layout::hermitian_interleaved;
+
+		placeness::placeness_t placeness = placeness::in_place;
+
+		data_pattern pattern = sawtooth;
+		complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+	}
+	catch( const std::exception& err ) {
+		handle_exception(err);
+	}
+}
+
+TEST_P( mixed_radix, single_precision_hermitian_to_real_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_hermitian_to_real<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix, double_precision_hermitian_to_real_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_hermitian_to_real<double, cl_double, fftw_complex>(problem_size);
+}
+
+class Supported_Fft_Sizes
+{
+public:
+	std::vector<size_t> sizes;
+	const size_t max_mixed_radices_to_test;
+
+	Supported_Fft_Sizes()
+	: max_mixed_radices_to_test( 4096 )
+	{
+		size_t i=0, j=0, k=0;
+		size_t sum, sumi, sumj, sumk;
+
+		sumi = 1; i = 0;
+		while(1)
+		{
+			sumj = 1; j = 0;
+			while(1)
+			{
+				sumk = 1; k = 0;
+				while(1)
+				{
+					sum = (sumi*sumj*sumk);
+					if( sum > max_mixed_radices_to_test ) break;
+
+					sizes.push_back(sum);
+					k++;
+					sumk *= 2;
+				}
+
+				if(k == 0) break;
+				j++;
+				sumj *= 3;
+			}
+
+			if( (j == 0) && (k == 0) ) break;
+			i++;
+			sumi *= 5;
+		}
+	}
+} supported_sizes;
+
+INSTANTIATE_TEST_CASE_P(
+	mixed_radices,
+	mixed_radix,
+	::testing::ValuesIn( supported_sizes.sizes )
+);
+
+ // ============================================== //
+ // the following is a place to stick static tests //
+ // with mixed radices. the tests will most likely //
+ // be created in response to failed random tests. //
+ // ============================================== //
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_mixed_single : public ::testing::Test {
+protected:
+	accuracy_test_mixed_single(){}
+	virtual ~accuracy_test_mixed_single(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_mixed_double : public ::testing::Test {
+protected:
+	accuracy_test_mixed_double(){}
+	virtual ~accuracy_test_mixed_double(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void hermitian_to_real_transforms_with_non_unit_output_strides_should_pass()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 10 );
+	size_t batch = 1;
+
+	std::vector<size_t> input_strides;
+	size_t input_distance = 0;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	size_t output_distance = 0;
+
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_mixed_single, hermitian_to_real_transforms_with_non_unit_output_strides_should_pass)
+{
+	try { hermitian_to_real_transforms_with_non_unit_output_strides_should_pass< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_mixed_double, hermitian_to_real_transforms_with_non_unit_output_strides_should_pass)
+{
+	try { hermitian_to_real_transforms_with_non_unit_output_strides_should_pass< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void hermitian_to_real_transforms_with_non_unit_input_strides_should_pass()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 6 );
+	lengths.push_back( 67500 );
+	size_t batch = 1;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( 12 );
+	size_t input_distance = 810074;
+
+	std::vector<size_t> output_strides;
+	size_t output_distance = 0;
+
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_mixed_single, hermitian_to_real_transforms_with_non_unit_input_strides_should_pass)
+{
+	try { hermitian_to_real_transforms_with_non_unit_input_strides_should_pass< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_mixed_double, hermitian_to_real_transforms_with_non_unit_input_strides_should_pass)
+{
+	try { hermitian_to_real_transforms_with_non_unit_input_strides_should_pass< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_targeted_real_to_hermitian_transform()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 15 );
+	lengths.push_back( 2 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 1 );
+	input_strides.push_back( 16 );
+	size_t input_distance = 32;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 1 );
+	output_strides.push_back( 8 );
+	size_t output_distance = 16;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_mixed_single, small_targeted_real_to_hermitian_transform)
+{
+	try { small_targeted_real_to_hermitian_transform< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_mixed_double, small_targeted_real_to_hermitian_transform)
+{
+	try { small_targeted_real_to_hermitian_transform< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void larger_targeted_real_to_hermitian_transform()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 15 );
+	lengths.push_back( 4500 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 1 );
+	input_strides.push_back( 16 );
+	size_t input_distance = 72000;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 1 );
+	output_strides.push_back( 8 );
+	size_t output_distance = 36000;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_mixed_single, larger_targeted_real_to_hermitian_transform)
+{
+	try { larger_targeted_real_to_hermitian_transform< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_mixed_double, larger_targeted_real_to_hermitian_transform)
+{
+	try { larger_targeted_real_to_hermitian_transform< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void another_targeted_real_to_hermitian_transform()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 30 );
+	lengths.push_back( 10125 );
+	size_t batch = 1;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 1 );
+	input_strides.push_back( 32 );
+	size_t input_distance = 324000;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 1 );
+	output_strides.push_back( 16 );
+	size_t output_distance = 162000;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_mixed_single, another_targeted_real_to_hermitian_transform)
+{
+	try { another_targeted_real_to_hermitian_transform< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_mixed_double, another_targeted_real_to_hermitian_transform)
+{
+	try { another_targeted_real_to_hermitian_transform< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void possible_driver_bug_1D_length_375_fails()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 375 );
+	size_t batch = 1;
+
+	std::vector<size_t> input_strides;
+	size_t input_distance = 0;
+
+	std::vector<size_t> output_strides;
+	size_t output_distance = 0;
+
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_mixed_single, possible_driver_bug_1D_length_375_fails)
+{
+	try { possible_driver_bug_1D_length_375_fails< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_mixed_double, possible_driver_bug_1D_length_375_fails)
+{
+	try { possible_driver_bug_1D_length_375_fails< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
\ No newline at end of file
diff --git a/src/tests/accuracy_test_pow2.cpp b/src/tests/accuracy_test_pow2.cpp
new file mode 100644
index 00000000..56f6bfdf
--- /dev/null
+++ b/src/tests/accuracy_test_pow2.cpp
@@ -0,0 +1,7408 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include<math.h>
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "typedefs.h"
+#include "accuracy_test_common.h"
+#include <stdexcept>
+#include <vector>
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_pow2_single : public ::testing::Test {
+protected:
+	accuracy_test_pow2_single(){}
+	virtual ~accuracy_test_pow2_single(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_pow2_double : public ::testing::Test {
+protected:
+	accuracy_test_pow2_double(){}
+	virtual ~accuracy_test_pow2_double(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+namespace power2
+{
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void len65536_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 65536 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, len65536_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, len65536_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 1D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 1D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 2D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 2D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(2) );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 3D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 3D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, _small_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, _small_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, _small_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, _small_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, _small_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, _small_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, _small_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, _small_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, _small_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, _small_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, _small_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, _small_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 3D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 2 );
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 2 );
+	lengths.push_back( large2 );
+	lengths.push_back( 2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ special ^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_array_complex_to_complex)
+{
+	try { normal_1D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_array_complex_to_complex)
+{
+	try { normal_1D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_complex_to_complex_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_1D_array_complex_to_complex_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_1D_array_complex_to_complex_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_array_real_to_hermitian)
+{
+	try { normal_1D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_array_real_to_hermitian)
+{
+	try { normal_1D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { normal_1D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { normal_1D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_array_hermitian_to_real)
+{
+	try { normal_1D_array_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_array_hermitian_to_real)
+{
+	try { normal_1D_array_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_hermitian_to_real_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { normal_1D_array_hermitian_to_real_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { normal_1D_array_hermitian_to_real_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_array_real_to_hermitian)
+{
+	try { small_2D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_array_real_to_hermitian)
+{
+	try { small_2D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { small_2D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { small_2D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_array_hermitian_to_real)
+{
+	try { small_2D_array_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_array_hermitian_to_real)
+{
+	try { small_2D_array_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_hermitian_to_real_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { small_2D_array_hermitian_to_real_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { small_2D_array_hermitian_to_real_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_array_complex_to_complex)
+{
+	try { large_1D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_array_complex_to_complex)
+{
+	try { large_1D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void astoundingly_large_1D_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1024 );
+	size_t batch = 65536;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, DISABLED_astoundingly_large_1D_complex_to_complex)
+{
+	try { astoundingly_large_1D_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, DISABLED_astoundingly_large_1D_complex_to_complex)
+{
+	try { astoundingly_large_1D_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_small_1D_non_unit_stride_complex_to_complex)
+{
+	try { very_small_1D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_small_1D_non_unit_stride_complex_to_complex)
+{
+	try { very_small_1D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 8 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {very_small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { very_small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 8 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_very_small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {very_very_small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { very_very_small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_very_small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_very_small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_very_small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { normal_1D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { normal_1D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 2;
+
+	std::vector<size_t> output_strides( input_strides );
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_small_1D_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_small_1D_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 16 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 128;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 2;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 16 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 128;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 2;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_forward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_forward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.5f );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_backward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_backward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { normal_1D_non_unit_stride_and_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { normal_1D_non_unit_stride_and_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_user_defined_scale_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_user_defined_scale_real_to_hermitian)
+{
+	try { normal_1D_user_defined_scale_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_user_defined_scale_real_to_hermitian)
+{
+	try { normal_1D_user_defined_scale_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { normal_1D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { normal_1D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 5 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 1 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 2 );
+
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_non_unit_stride_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_non_unit_stride_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+
+	size_t input_distance = lengths[0] * lengths[1] + 4;
+	size_t output_distance = lengths[0] * lengths[1] + 5;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_non_unit_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_non_unit_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 5 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 1 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 2 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 30;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 42;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_and_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_and_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4 );
+	lengths.push_back( 4 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 12 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 9 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 7 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 32 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 50;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 60;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { small_2D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { small_2D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_user_defined_scale_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_user_defined_scale_hermitian_to_real)
+{
+	try { normal_1D_user_defined_scale_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_user_defined_scale_hermitian_to_real)
+{
+	try { normal_1D_user_defined_scale_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_1D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow2_single, single_point_1D_forward_complex_to_complex)
+{
+	try { single_point_1D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, single_point_1D_forward_complex_to_complex)
+{
+	try { single_point_1D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_1D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow2_single, single_point_1D_backward_complex_to_complex)
+{
+	try { single_point_1D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, single_point_1D_backward_complex_to_complex)
+{
+	try { single_point_1D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 20 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 20 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_non_unit_stride_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_non_unit_stride_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 19 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 19 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_2D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_2D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_forward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_forward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.5f );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_backward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_backward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void rectangular_2D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( normal2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, rectangular_2D_array_complex_to_complex)
+{
+	try { rectangular_2D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, rectangular_2D_array_complex_to_complex)
+{
+	try { rectangular_2D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_array_complex_to_complex_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( small2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_2D_array_complex_to_complex_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_2D_array_complex_to_complex_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_array_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( large2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_array_forward_complex_to_complex)
+{
+	try { large_2D_array_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_array_forward_complex_to_complex)
+{
+	try { large_2D_array_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_array_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( large2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, large_2D_array_backward_complex_to_complex)
+{
+	try { large_2D_array_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_2D_array_backward_complex_to_complex)
+{
+	try { large_2D_array_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_2D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow2_single, single_point_2D_forward_complex_to_complex)
+{
+	try { single_point_2D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, single_point_2D_forward_complex_to_complex)
+{
+	try { single_point_2D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_2D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow2_single, single_point_2D_backward_complex_to_complex)
+{
+	try { single_point_2D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, single_point_2D_backward_complex_to_complex)
+{
+	try { single_point_2D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_3D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow2_single, single_point_3D_forward_complex_to_complex)
+{
+	try { single_point_3D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, single_point_3D_forward_complex_to_complex)
+{
+	try { single_point_3D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_3D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow2_single, single_point_3D_backward_complex_to_complex)
+{
+	try { single_point_3D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, single_point_3D_backward_complex_to_complex)
+{
+	try { single_point_3D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 20 );
+	input_strides.push_back( lengths[1] * input_strides[1] + 17 );
+
+	std::vector<size_t> output_strides( input_strides );
+
+	size_t input_distance = 0;
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_non_unit_stride_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_non_unit_stride_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 19 );
+	input_strides.push_back( lengths[1] * input_strides[1] + 3 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+
+	std::vector<size_t> output_strides( input_strides );
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_interleaved;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_round_trip_complex_to_complex)
+{
+	try { normal_1D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_round_trip_complex_to_complex)
+{
+	try { normal_1D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+template< class T, class cl_T, class fftw_T >
+void testcase_2D_round_trip_complex_to_complex(size_t l0, size_t l1)
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( l0 );
+	lengths.push_back( l1 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+// added this regression test to catch failures seen in transposes
+TEST_F(accuracy_test_pow2_single, testcase1_2D_round_trip_complex_to_complex)
+{
+	try { testcase_2D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(1024, 16); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_round_trip_complex_to_complex)
+{
+	try { normal_2D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_round_trip_complex_to_complex)
+{
+	try { normal_2D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_round_trip_complex_to_complex)
+{
+	try { small_3D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_round_trip_complex_to_complex)
+{
+	try { small_3D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_1D_round_trip_real_to_complex)
+{
+	try { normal_1D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_1D_round_trip_real_to_complex)
+{
+	try { normal_1D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow2_single, large_1D_round_trip_real_to_complex)
+{
+	try { large_1D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, large_1D_round_trip_real_to_complex)
+{
+	try { large_1D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow2_single, normal_2D_round_trip_real_to_complex)
+{
+	try { normal_2D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, normal_2D_round_trip_real_to_complex)
+{
+	try { normal_2D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	lengths.push_back( small2 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow2_single, small_3D_round_trip_real_to_complex)
+{
+	try { small_3D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow2_double, small_3D_round_trip_real_to_complex)
+{
+	try { small_3D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+} //namespace
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
new file mode 100644
index 00000000..844e2158
--- /dev/null
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -0,0 +1,7356 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include<math.h>
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "typedefs.h"
+#include "accuracy_test_common.h"
+#include <stdexcept>
+#include <vector>
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_pow3_single : public ::testing::Test {
+protected:
+	accuracy_test_pow3_single(){}
+	virtual ~accuracy_test_pow3_single(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_pow3_double : public ::testing::Test {
+protected:
+	accuracy_test_pow3_double(){}
+	virtual ~accuracy_test_pow3_double(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+namespace power2
+{
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 1D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 1D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 2D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 2D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(3) );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 3D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 3D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, _small_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, _small_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, _small_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, _small_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, _small_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, _small_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, _small_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, _small_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, _small_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, _small_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, _small_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, _small_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 3D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 3 );
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 3 );
+	lengths.push_back( large3 );
+	lengths.push_back( 3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ special ^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_array_complex_to_complex)
+{
+	try { normal_1D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_array_complex_to_complex)
+{
+	try { normal_1D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_complex_to_complex_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_1D_array_complex_to_complex_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_1D_array_complex_to_complex_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_array_real_to_hermitian)
+{
+	try { normal_1D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_array_real_to_hermitian)
+{
+	try { normal_1D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { normal_1D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { normal_1D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_array_hermitian_to_real)
+{
+	try { normal_1D_array_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_array_hermitian_to_real)
+{
+	try { normal_1D_array_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_hermitian_to_real_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { normal_1D_array_hermitian_to_real_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { normal_1D_array_hermitian_to_real_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_array_real_to_hermitian)
+{
+	try { small_2D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_array_real_to_hermitian)
+{
+	try { small_2D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { small_2D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { small_2D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_array_hermitian_to_real)
+{
+	try { small_2D_array_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_array_hermitian_to_real)
+{
+	try { small_2D_array_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_hermitian_to_real_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { small_2D_array_hermitian_to_real_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { small_2D_array_hermitian_to_real_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_array_complex_to_complex)
+{
+	try { large_1D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_array_complex_to_complex)
+{
+	try { large_1D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void astoundingly_large_1D_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 2187 );
+	size_t batch = 65536;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, DISABLED_astoundingly_large_1D_complex_to_complex)
+{
+	try { astoundingly_large_1D_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, DISABLED_astoundingly_large_1D_complex_to_complex)
+{
+	try { astoundingly_large_1D_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_small_1D_non_unit_stride_complex_to_complex)
+{
+	try { very_small_1D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_small_1D_non_unit_stride_complex_to_complex)
+{
+	try { very_small_1D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 27 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {very_small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { very_small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 27 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_very_small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {very_very_small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { very_very_small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_very_small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_very_small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_very_small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { normal_1D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { normal_1D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 2;
+
+	std::vector<size_t> output_strides( input_strides );
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_small_1D_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_small_1D_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 16 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 128;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 2;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 16 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 128;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 2;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_forward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_forward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.5f );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_backward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_backward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { normal_1D_non_unit_stride_and_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { normal_1D_non_unit_stride_and_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_user_defined_scale_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_user_defined_scale_real_to_hermitian)
+{
+	try { normal_1D_user_defined_scale_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_user_defined_scale_real_to_hermitian)
+{
+	try { normal_1D_user_defined_scale_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { normal_1D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { normal_1D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 5 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 1 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 2 );
+
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_non_unit_stride_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_non_unit_stride_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+
+	size_t input_distance = lengths[0] * lengths[1] + 4;
+	size_t output_distance = lengths[0] * lengths[1] + 5;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_non_unit_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_non_unit_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 5 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 1 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 2 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 30;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 42;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_and_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_and_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 9 );
+	lengths.push_back( 9 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 12 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 9 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 7 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 32 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 50;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 60;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { small_2D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { small_2D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_user_defined_scale_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_user_defined_scale_hermitian_to_real)
+{
+	try { normal_1D_user_defined_scale_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_user_defined_scale_hermitian_to_real)
+{
+	try { normal_1D_user_defined_scale_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_1D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow3_single, single_point_1D_forward_complex_to_complex)
+{
+	try { single_point_1D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, single_point_1D_forward_complex_to_complex)
+{
+	try { single_point_1D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_1D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow3_single, single_point_1D_backward_complex_to_complex)
+{
+	try { single_point_1D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, single_point_1D_backward_complex_to_complex)
+{
+	try { single_point_1D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 20 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 20 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_non_unit_stride_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_non_unit_stride_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 19 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 19 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_2D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_2D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_forward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_forward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.5f );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_backward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_backward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void rectangular_2D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( normal3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, rectangular_2D_array_complex_to_complex)
+{
+	try { rectangular_2D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, rectangular_2D_array_complex_to_complex)
+{
+	try { rectangular_2D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_array_complex_to_complex_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( small3 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_2D_array_complex_to_complex_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_2D_array_complex_to_complex_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_array_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( large3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_array_forward_complex_to_complex)
+{
+	try { large_2D_array_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_array_forward_complex_to_complex)
+{
+	try { large_2D_array_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_array_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( large3 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, large_2D_array_backward_complex_to_complex)
+{
+	try { large_2D_array_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_2D_array_backward_complex_to_complex)
+{
+	try { large_2D_array_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_2D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow3_single, single_point_2D_forward_complex_to_complex)
+{
+	try { single_point_2D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, single_point_2D_forward_complex_to_complex)
+{
+	try { single_point_2D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_2D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow3_single, single_point_2D_backward_complex_to_complex)
+{
+	try { single_point_2D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, single_point_2D_backward_complex_to_complex)
+{
+	try { single_point_2D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_3D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow3_single, single_point_3D_forward_complex_to_complex)
+{
+	try { single_point_3D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, single_point_3D_forward_complex_to_complex)
+{
+	try { single_point_3D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_3D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow3_single, single_point_3D_backward_complex_to_complex)
+{
+	try { single_point_3D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, single_point_3D_backward_complex_to_complex)
+{
+	try { single_point_3D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 20 );
+	input_strides.push_back( lengths[1] * input_strides[1] + 17 );
+
+	std::vector<size_t> output_strides( input_strides );
+
+	size_t input_distance = 0;
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_non_unit_stride_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_non_unit_stride_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 19 );
+	input_strides.push_back( lengths[1] * input_strides[1] + 3 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+
+	std::vector<size_t> output_strides( input_strides );
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_interleaved;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_round_trip_complex_to_complex)
+{
+	try { normal_1D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_round_trip_complex_to_complex)
+{
+	try { normal_1D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_round_trip_complex_to_complex)
+{
+	try { normal_2D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_round_trip_complex_to_complex)
+{
+	try { normal_2D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_round_trip_complex_to_complex)
+{
+	try { small_3D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_round_trip_complex_to_complex)
+{
+	try { small_3D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_1D_round_trip_real_to_complex)
+{
+	try { normal_1D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_1D_round_trip_real_to_complex)
+{
+	try { normal_1D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow3_single, large_1D_round_trip_real_to_complex)
+{
+	try { large_1D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, large_1D_round_trip_real_to_complex)
+{
+	try { large_1D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow3_single, normal_2D_round_trip_real_to_complex)
+{
+	try { normal_2D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, normal_2D_round_trip_real_to_complex)
+{
+	try { normal_2D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	lengths.push_back( small3 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow3_single, small_3D_round_trip_real_to_complex)
+{
+	try { small_3D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow3_double, small_3D_round_trip_real_to_complex)
+{
+	try { small_3D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+} //namespace
diff --git a/src/tests/accuracy_test_pow5.cpp b/src/tests/accuracy_test_pow5.cpp
new file mode 100644
index 00000000..c73f5244
--- /dev/null
+++ b/src/tests/accuracy_test_pow5.cpp
@@ -0,0 +1,7356 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include<math.h>
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "typedefs.h"
+#include "accuracy_test_common.h"
+#include <stdexcept>
+#include <vector>
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_pow5_single : public ::testing::Test {
+protected:
+	accuracy_test_pow5_single(){}
+	virtual ~accuracy_test_pow5_single(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class accuracy_test_pow5_double : public ::testing::Test {
+protected:
+	accuracy_test_pow5_double(){}
+	virtual ~accuracy_test_pow5_double(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){
+	}
+};
+
+namespace power2
+{
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 1D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 1D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_1D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_1D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_1D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 2D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 2D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_2D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_2D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_2D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_2D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_2D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_2D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( MaxLength2D<T>(5) );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_2D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_2D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ normal 3D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { normal_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { normal_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { normal_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { normal_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ small 3D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { small_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { small_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, _small_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, _small_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, _small_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, _small_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, _small_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, _small_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { small_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, _small_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, _small_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { small_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, _small_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, _small_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { small_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, _small_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, _small_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { small_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ large 3D ^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_forward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_in_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_backward_in_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_in_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_backward_in_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_forward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_planar_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_backward_out_of_place_complex_planar_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_forward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_forward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_planar_to_complex_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_backward_out_of_place_complex_planar_to_complex_interleaved)
+{
+	try { large_3D_backward_out_of_place_complex_planar_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_forward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 5 );
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_forward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_forward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_backward_out_of_place_complex_interleaved_to_complex_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_backward_out_of_place_complex_interleaved_to_complex_planar)
+{
+	try { large_3D_backward_out_of_place_complex_interleaved_to_complex_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	lengths.push_back( large5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_in_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_in_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { large_3D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_out_of_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_out_of_place_hermitian_interleaved_to_real)
+{
+	try { large_3D_out_of_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_3D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_out_of_place_real_to_hermitian_planar)
+{
+	try { large_3D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_3D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 5 );
+	lengths.push_back( large5 );
+	lengths.push_back( 5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_3D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_3D_out_of_place_hermitian_planar_to_real)
+{
+	try { large_3D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ special ^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_array_complex_to_complex)
+{
+	try { normal_1D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_array_complex_to_complex)
+{
+	try { normal_1D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_complex_to_complex_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_1D_array_complex_to_complex_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_1D_array_complex_to_complex_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_array_real_to_hermitian)
+{
+	try { normal_1D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_array_real_to_hermitian)
+{
+	try { normal_1D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { normal_1D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { normal_1D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_array_hermitian_to_real)
+{
+	try { normal_1D_array_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_array_hermitian_to_real)
+{
+	try { normal_1D_array_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_array_hermitian_to_real_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { normal_1D_array_hermitian_to_real_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { normal_1D_array_hermitian_to_real_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_array_real_to_hermitian)
+{
+	try { small_2D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_array_real_to_hermitian)
+{
+	try { small_2D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { small_2D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { small_2D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_array_hermitian_to_real)
+{
+	try { small_2D_array_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_array_hermitian_to_real)
+{
+	try { small_2D_array_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_array_hermitian_to_real_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { small_2D_array_hermitian_to_real_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_array_hermitian_to_real_with_odd_batch_size)
+{
+	try { small_2D_array_hermitian_to_real_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_array_complex_to_complex)
+{
+	try { large_1D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_array_complex_to_complex)
+{
+	try { large_1D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void astoundingly_large_1D_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 3125 );
+	size_t batch = 65536;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, DISABLED_astoundingly_large_1D_complex_to_complex)
+{
+	try { astoundingly_large_1D_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, DISABLED_astoundingly_large_1D_complex_to_complex)
+{
+	try { astoundingly_large_1D_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_small_1D_non_unit_stride_complex_to_complex)
+{
+	try { very_small_1D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_small_1D_non_unit_stride_complex_to_complex)
+{
+	try { very_small_1D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {very_small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { very_small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_very_small_1D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try {very_very_small_1D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_very_small_1D_non_unit_stride_real_to_hermitian)
+{
+	try { very_very_small_1D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_very_small_1D_non_unit_stride_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_very_small_1D_non_unit_stride_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_very_small_1D_non_unit_stride_hermitian_to_real)
+{
+	try { very_very_small_1D_non_unit_stride_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { normal_1D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { normal_1D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 2;
+
+	std::vector<size_t> output_strides( input_strides );
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_small_1D_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_small_1D_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 16 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 128;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 2;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_out_of_place_different_input_output_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 16 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 128;
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 2;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex)
+{
+	try { very_small_1D_in_place_different_input_output_non_unit_stride_and_distance_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_forward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_forward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_forward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_backward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.5f );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_backward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_1D_backward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { normal_1D_non_unit_stride_and_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { normal_1D_non_unit_stride_and_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_user_defined_scale_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = impulse;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_user_defined_scale_real_to_hermitian)
+{
+	try { normal_1D_user_defined_scale_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_user_defined_scale_real_to_hermitian)
+{
+	try { normal_1D_user_defined_scale_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { normal_1D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { normal_1D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 5 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 1 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 2 );
+
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_non_unit_stride_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_non_unit_stride_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+
+	size_t input_distance = lengths[0] * lengths[1] + 4;
+	size_t output_distance = lengths[0] * lengths[1] + 5;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_non_unit_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_non_unit_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 5 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 1 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 2 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 2 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 30;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 42;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_and_distance_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_non_unit_stride_and_distance_real_to_hermitian)
+{
+	try { small_2D_non_unit_stride_and_distance_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 25 );
+	lengths.push_back( 25 );
+	size_t batch = 2;
+
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 12 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 9 );
+
+	std::vector<size_t> output_strides;
+	output_strides.push_back( 7 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 32 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 50;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 60;
+
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { small_2D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { small_2D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_user_defined_scale_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_user_defined_scale_hermitian_to_real)
+{
+	try { normal_1D_user_defined_scale_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_user_defined_scale_hermitian_to_real)
+{
+	try { normal_1D_user_defined_scale_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_1D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow5_single, single_point_1D_forward_complex_to_complex)
+{
+	try { single_point_1D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, single_point_1D_forward_complex_to_complex)
+{
+	try { single_point_1D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_1D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow5_single, single_point_1D_backward_complex_to_complex)
+{
+	try { single_point_1D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, single_point_1D_backward_complex_to_complex)
+{
+	try { single_point_1D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 3 );
+	output_strides.push_back( 3 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 20 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 20 );
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_non_unit_stride_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_non_unit_stride_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_2D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 19 );
+	output_strides.push_back( lengths[0] * output_strides[0] + 19 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_2D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_2D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_2D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_forward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.0f );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_forward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_forward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_forward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_backward_user_defined_scale_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 42.5f );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_backward_user_defined_scale_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_backward_user_defined_scale_complex_to_complex)
+{
+	try { normal_2D_backward_user_defined_scale_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void rectangular_2D_array_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( normal5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, rectangular_2D_array_complex_to_complex)
+{
+	try { rectangular_2D_array_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, rectangular_2D_array_complex_to_complex)
+{
+	try { rectangular_2D_array_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_array_complex_to_complex_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( small5 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_2D_array_complex_to_complex_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_array_complex_to_complex_with_odd_batch_size)
+{
+	try { normal_2D_array_complex_to_complex_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_array_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( large5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_array_forward_complex_to_complex)
+{
+	try { large_2D_array_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_array_forward_complex_to_complex)
+{
+	try { large_2D_array_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_2D_array_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( large5 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, large_2D_array_backward_complex_to_complex)
+{
+	try { large_2D_array_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_2D_array_backward_complex_to_complex)
+{
+	try { large_2D_array_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_2D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow5_single, single_point_2D_forward_complex_to_complex)
+{
+	try { single_point_2D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, single_point_2D_forward_complex_to_complex)
+{
+	try { single_point_2D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_2D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow5_single, single_point_2D_backward_complex_to_complex)
+{
+	try { single_point_2D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, single_point_2D_backward_complex_to_complex)
+{
+	try { single_point_2D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_3D_forward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow5_single, single_point_3D_forward_complex_to_complex)
+{
+	try { single_point_3D_forward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, single_point_3D_forward_complex_to_complex)
+{
+	try { single_point_3D_forward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void single_point_3D_backward_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	lengths.push_back( 1 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::backward;
+
+	data_pattern pattern = impulse;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, 0.42f );
+}
+
+TEST_F(accuracy_test_pow5_single, single_point_3D_backward_complex_to_complex)
+{
+	try { single_point_3D_backward_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, single_point_3D_backward_complex_to_complex)
+{
+	try { single_point_3D_backward_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_non_unit_stride_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 20 );
+	input_strides.push_back( lengths[1] * input_strides[1] + 17 );
+
+	std::vector<size_t> output_strides( input_strides );
+
+	size_t input_distance = 0;
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t in_layout = layout::complex_planar;
+	layout::buffer_layout_t out_layout = layout::complex_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_non_unit_stride_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_non_unit_stride_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+// *****************************************************
+// *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_non_unit_stride_and_distance_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	input_strides.push_back( 2 );
+	input_strides.push_back( lengths[0] * input_strides[0] + 19 );
+	input_strides.push_back( lengths[1] * input_strides[1] + 3 );
+
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+
+	std::vector<size_t> output_strides( input_strides );
+	size_t output_distance = input_distance;
+
+	layout::buffer_layout_t in_layout = layout::complex_interleaved;
+	layout::buffer_layout_t out_layout = layout::complex_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+	direction::direction_t direction = direction::forward;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_and_distance_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_non_unit_stride_and_distance_complex_to_complex)
+{
+	try { small_3D_non_unit_stride_and_distance_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_interleaved;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_round_trip_complex_to_complex)
+{
+	try { normal_1D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_round_trip_complex_to_complex)
+{
+	try { normal_1D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_round_trip_complex_to_complex)
+{
+	try { normal_2D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_round_trip_complex_to_complex)
+{
+	try { normal_2D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_round_trip_complex_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+	layout::buffer_layout_t layout = layout::complex_planar;
+
+	data_pattern pattern = sawtooth;
+	complex_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch, layout );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_round_trip_complex_to_complex)
+{
+	try { small_3D_round_trip_complex_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_round_trip_complex_to_complex)
+{
+	try { small_3D_round_trip_complex_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_1D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_1D_round_trip_real_to_complex)
+{
+	try { normal_1D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_1D_round_trip_real_to_complex)
+{
+	try { normal_1D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void large_1D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large5 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow5_single, large_1D_round_trip_real_to_complex)
+{
+	try { large_1D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, large_1D_round_trip_real_to_complex)
+{
+	try { large_1D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void normal_2D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal5 );
+	lengths.push_back( normal5 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow5_single, normal_2D_round_trip_real_to_complex)
+{
+	try { normal_2D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, normal_2D_round_trip_real_to_complex)
+{
+	try { normal_2D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+ // *****************************************************
+ // *****************************************************
+template< class T, class cl_T, class fftw_T >
+void small_3D_round_trip_real_to_complex()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	lengths.push_back( small5 );
+	size_t batch = 1;
+
+	data_pattern pattern = impulse;
+	real_to_complex_round_trip<T, cl_T, fftw_T>( pattern, lengths, batch );
+}
+
+TEST_F(accuracy_test_pow5_single, small_3D_round_trip_real_to_complex)
+{
+	try { small_3D_round_trip_real_to_complex< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_pow5_double, small_3D_round_trip_real_to_complex)
+{
+	try { small_3D_round_trip_real_to_complex< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+} //namespace
diff --git a/src/tests/accuracy_test_random.cpp b/src/tests/accuracy_test_random.cpp
new file mode 100644
index 00000000..377776fd
--- /dev/null
+++ b/src/tests/accuracy_test_random.cpp
@@ -0,0 +1,725 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <algorithm>
+#include <vector>
+#include <numeric>
+#include <memory>
+#include <time.h>
+#include <gtest/gtest.h>
+#include <boost/lexical_cast.hpp>
+#include <boost/random.hpp>
+
+#include "clFFT.h"
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+#include "typedefs.h"
+#include "accuracy_test_common.h"
+
+size_t super_duper_global_seed;
+
+namespace ParameterizedTest {
+	//TODO this is pasted from cl_transform.h
+	// it should be put in one place for everybody to use
+	/*****************************************************/
+	layout::buffer_layout_t cl_layout_to_buffer_layout( clfftLayout cl_layout )
+	{
+		if( cl_layout == CLFFT_REAL )
+			return layout::real;
+		else if( cl_layout == CLFFT_HERMITIAN_PLANAR )
+			return layout::hermitian_planar;
+		else if( cl_layout == CLFFT_COMPLEX_PLANAR )
+			return layout::complex_planar;
+		else if( cl_layout == CLFFT_HERMITIAN_INTERLEAVED )
+			return layout::hermitian_interleaved;
+		else if( cl_layout == CLFFT_COMPLEX_INTERLEAVED )
+			return layout::complex_interleaved;
+		else
+			throw std::runtime_error( "invalid cl_layout" );
+	}
+
+	const size_t one_gb = 1024 * 1024 * 1024;
+
+	size_t size_of_one_point( clfftPrecision precision, clfftLayout layout )
+	{
+		size_t size_of_one_point;
+		// size of one point will be 1 or 2, depending on whether the points are real or complex
+		if( layout == CLFFT_COMPLEX_INTERLEAVED || layout == CLFFT_COMPLEX_PLANAR || layout == CLFFT_HERMITIAN_PLANAR || layout == CLFFT_HERMITIAN_INTERLEAVED )
+			size_of_one_point = 2;
+		else if( layout == CLFFT_REAL )
+			size_of_one_point = 1;
+		else throw std::invalid_argument("random_supported_problem_size: invalid layout provided");
+
+		// each value in a point will be the size of a float or the size of a double, depending on the precision
+		if( precision == CLFFT_SINGLE ) size_of_one_point *= sizeof(float);
+		else if( precision == CLFFT_DOUBLE ) size_of_one_point *= sizeof(double);
+		else throw std::invalid_argument("random_supported_problem_size: invalid precision provided");
+
+		return size_of_one_point;
+	}
+
+	// the CPU can have a lot of memory hanging off of it. in these cases and when CPU is the cl device,
+	// huge amounts of memory might be dedicated to each buffer. in practice, large buffer sizes _destroy_
+	// performance (and can effectively hang the machine). that is bad. to get around this, we limit
+	// each buffer to a GB
+	size_t max_memory_size_for_one_buffer()
+	{
+		size_t max_mem = max_mem_available_on_cl_device(0);
+		if( max_mem > one_gb )
+			return one_gb;
+		else
+			return max_mem;
+	}
+
+	size_t max_problem_size_in_datapoints( clfftPrecision precision, clfftLayout layout )
+	{
+		// we divide by 32 to shrink things just a bit. otherwise problems take a billion hours each
+		return max_memory_size_for_one_buffer() / size_of_one_point(precision,layout) / 32;
+	}
+
+
+	boost::mt19937 random_parameter_generator;
+	boost::uniform_int<> distribution(1, INT_MAX);
+	boost::variate_generator<boost::mt19937&, boost::uniform_int<> >
+		random_value(random_parameter_generator, distribution);
+
+	size_t random_int() {
+		return random_value();
+	}
+
+	size_t random_int(size_t max) {
+		return random_value() % (max+1);
+	}
+
+	size_t random_int(size_t min, size_t max) {
+		return (random_value() % ((max+1)-min)) + min;
+	}
+
+	std::vector<size_t> random_supported_problem_size( size_t dimensions, clfftPrecision precision, clfftLayout layout )
+	{
+		std::vector<size_t> lengths;
+
+		std::vector<size_t> supported_radices;
+		supported_radices.push_back(2);
+		supported_radices.push_back(3);
+		supported_radices.push_back(5);
+
+		// total size of this problem should be some fraction of the total space available on the device
+		size_t this_problem_size = random_int(1, max_problem_size_in_datapoints(precision,layout));
+
+		size_t total_problem_size = 1;
+		std::vector<size_t> factors;
+
+		while( total_problem_size < this_problem_size )
+		{
+			size_t a_factor = supported_radices[random_int(0, supported_radices.size()-1)];
+			if( total_problem_size * a_factor <= this_problem_size )
+			{
+				total_problem_size *= a_factor;
+				factors.push_back(a_factor);
+			}
+			else
+				break;
+		} // problem size is now factored into some permutation of 2s, 3s, and 5s
+		  // (exact combination stored in "factors"
+
+		for( size_t i = 0; i < dimensions; ++i )
+			lengths.push_back(1);
+
+		// distribute the values in factors to each valid length value
+		while( !factors.empty() )
+		{
+			size_t which_factor = random_int( 0, factors.size()-1 );
+			size_t dim = random_int( 0, dimensions-1 );
+			lengths[dim] *= factors[which_factor];
+			factors.erase(factors.begin() + which_factor);
+		}
+
+		// by the time we reach the end, we've calculated the total problem size, split it up into valid radices, and
+		// distributed those among the dimensions available
+
+		if( lengths.size() != dimensions )
+			throw std::runtime_error( "random_supported_problem_size: number of lengths does not corroborate number of dimensions" );
+
+		return lengths;
+	}
+
+	struct Parameters {
+		size_t batch_size;
+		clfftPrecision precision;
+		clfftDirection direction;
+		clfftDim dimensions;
+		std::vector<size_t> lengths;
+		std::vector<size_t> input_strides;
+		std::vector<size_t> output_strides;
+		size_t input_distance;
+		size_t output_distance;
+		clfftLayout input_layout;
+		clfftLayout output_layout;
+		clfftResultLocation placeness;
+		double forward_scale;
+		double backward_scale;
+		data_pattern pattern;
+		size_t data_seed;
+		// start scales at double. we can just cast to float at the cost of
+		// a little precision if single precision is randomly chosen. no biggie
+
+		//we want to define a maximum stride so that memory does not get out of control
+		static const size_t max_stride = 5;
+		static const size_t max_distance = 128;
+
+		size_t total_size_in_points()
+		{
+			if( lengths.empty() )
+				throw std::runtime_error( "you shouldn't be here!" );
+
+			size_t total_size = 1;
+			for( size_t i = 0; i < lengths.size(); i++ )
+				total_size *= lengths[i];
+
+			return total_size;
+		}
+
+		bool is_in_place()
+		{
+			if( placeness == CLFFT_INPLACE ) return true;
+			else return false;
+		}
+
+		bool is_out_of_place()
+		{
+			return !is_in_place();
+		}
+
+		bool is_r2c()
+		{
+			if( input_layout == CLFFT_REAL ) return true;
+			else return false;
+		}
+
+		bool is_c2r()
+		{
+			if( output_layout == CLFFT_REAL ) return true;
+			else return false;
+		}
+
+		bool is_c2c()
+		{
+			if( ( input_layout == CLFFT_COMPLEX_INTERLEAVED || input_layout == CLFFT_COMPLEX_PLANAR )
+				&& ( output_layout == CLFFT_COMPLEX_INTERLEAVED || output_layout == CLFFT_COMPLEX_PLANAR ) )
+				return true;
+			else
+				return false;
+		}
+
+		bool fifty_percent_chance()
+		{
+			if( random_int(0,1) )
+				return true;
+			else
+				return false;
+		}
+
+		void generate_x_strides()
+		{
+			input_strides.push_back( random_int(1,max_stride) );
+			output_strides.push_back( random_int(1,max_stride) );
+		}
+
+		Parameters()
+			: precision( static_cast<clfftPrecision>(random_int(CLFFT_SINGLE,CLFFT_DOUBLE)) )
+			, dimensions( static_cast<clfftDim>(random_int(CLFFT_1D,ENDDIMENSION-1)) )
+			, placeness( static_cast<clfftResultLocation>(random_int(CLFFT_INPLACE, CLFFT_OUTOFPLACE)) )
+			, input_layout( static_cast<clfftLayout>(random_int(CLFFT_COMPLEX_INTERLEAVED, CLFFT_REAL)) )
+			, forward_scale( static_cast<float>(random_int())/static_cast<float>(random_int()) )
+			, backward_scale( static_cast<float>(random_int())/static_cast<float>(random_int()) )
+			, pattern( erratic )
+			, data_seed( random_int() )
+		{
+			try
+			{
+				// input and output layouts have strict requirements, so we'll base the output layout
+				// off of our randomly selected input layout
+				if( input_layout == CLFFT_REAL )
+				{
+					if( placeness == CLFFT_INPLACE )
+						output_layout = CLFFT_HERMITIAN_INTERLEAVED;
+					else if( placeness == CLFFT_OUTOFPLACE )
+						output_layout = static_cast<clfftLayout>(random_int(CLFFT_HERMITIAN_INTERLEAVED, CLFFT_HERMITIAN_PLANAR));
+					else
+						throw std::runtime_error( "parameter generator invalid placeness" );
+				}
+				else if( input_layout == CLFFT_HERMITIAN_INTERLEAVED )
+				{
+					output_layout = CLFFT_REAL;
+				}
+				else if( input_layout == CLFFT_HERMITIAN_PLANAR )
+				{
+					// in-place transforms not supported with hermitian planar
+					placeness = CLFFT_OUTOFPLACE;
+
+					output_layout = CLFFT_REAL;
+				}
+				else if( input_layout == CLFFT_COMPLEX_INTERLEAVED || input_layout == CLFFT_COMPLEX_PLANAR )
+				{
+					// complex is a little simpler. we can do them together here
+					if( placeness == CLFFT_INPLACE )
+						output_layout = input_layout;
+					else
+						output_layout = static_cast<clfftLayout>(random_int(CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_PLANAR));
+				}
+				else
+				{
+					throw std::runtime_error( "parameter generator invalid input layout" );
+				}
+
+				direction = random_int(0,1) ? CLFFT_FORWARD : CLFFT_BACKWARD;
+
+				lengths = random_supported_problem_size(dimensions, precision, input_layout);
+
+				// strides and distances
+
+				if( fifty_percent_chance() ) // about half the time, we just want unit strides
+				{
+					// input_strides and output_strides remain empty
+
+					input_distance = 0;
+					output_distance = 0;
+				}
+				else if( is_in_place() && is_r2c() )
+				{
+					generate_x_strides();
+
+					// generate y strides
+					if( dimensions >= 2 )
+					{
+						size_t random_y_stride = random_int(0,max_stride);
+						input_strides.push_back( input_strides[0] * ( 1 + lengths[0] / 2 ) * 2 + 2 * random_y_stride );
+						output_strides.push_back( output_strides[0] * ( 1 + lengths[0] / 2 ) + random_y_stride );
+
+						// both strides need to be able to fit both the input and the output
+						if( input_strides[1] > 2 * output_strides[1] )
+						{
+							output_strides[1] = input_strides[1] / 2;
+						}
+						else if( input_strides[1] < 2 * output_strides[1] )
+						{
+							input_strides[1] = 2 * output_strides[1];
+						}
+					}
+
+					// generate z strides
+					if( dimensions >= 3 )
+					{
+						output_strides.push_back( output_strides[1] * lengths[1] );
+						input_strides.push_back( 2 * output_strides[2] );
+					}
+
+					// generate distance
+
+					// 1D is a special case with distances, because we need to make sure
+					// we have the extra padding that we would have otherwise gotten from
+					// the y dimension
+					if( dimensions == 1 )
+					{
+						input_distance = input_strides[0] * ( 1 + lengths[0] / 2 ) * 2;
+						output_distance = output_strides[0] * ( 1 + lengths[0] / 2 );
+
+						// both strides need to be able to fit both the input and the output
+						if( input_distance > 2 * output_distance )
+						{
+							output_distance = input_distance / 2;
+						}
+						else if( input_distance < 2 * output_distance )
+						{
+							input_distance = 2 * output_distance;
+						}
+
+						size_t random_distance = random_int(0,max_distance);
+						input_distance += 2 * random_distance;
+						output_distance += random_distance;
+					}
+					else
+					{
+						output_distance = output_strides[dimensions-1] * lengths[dimensions-1] + random_int(0,max_distance);
+						input_distance = 2 * output_distance;
+					}
+
+					// check for ok
+					if( dimensions >= 2 )
+						if( input_strides[1] != 2 * output_strides[1] )
+							throw std::runtime_error( "invalid stride y generated for r2c" );
+
+					if( dimensions >= 3 )
+						if( input_strides[2] != 2 * output_strides[2] )
+							throw std::runtime_error( "invalid stride z generated for r2c" );
+
+					if( input_distance != 2 * output_distance )
+						throw std::runtime_error( "invalid distance generated for r2c" );
+				}
+				else if( is_in_place() && is_c2r() )
+				{
+					generate_x_strides();
+
+					// generate y strides
+					if( dimensions >= 2 )
+					{
+						size_t random_y_stride = random_int(0,max_stride);
+						output_strides.push_back( output_strides[0] * ( 1 + lengths[0] / 2 ) * 2 + 2 * random_y_stride );
+						input_strides.push_back( input_strides[0] * ( 1 + lengths[0] / 2 ) + random_y_stride );
+
+						// both strides need to be able to fit both the output and the input
+						if( output_strides[1] > 2 * input_strides[1] )
+						{
+							input_strides[1] = output_strides[1] / 2;
+						}
+						else if( output_strides[1] < 2 * input_strides[1] )
+						{
+							output_strides[1] = 2 * input_strides[1];
+						}
+					}
+
+					// generate z strides
+					if( dimensions >= 3 )
+					{
+						input_strides.push_back( input_strides[1] * lengths[1] );
+						output_strides.push_back( 2 * input_strides[2] );
+					}
+
+					// generate distance
+
+					// 1D is a special case with distances, because we need to make sure
+					// we have the extra padding that we would have otherwise gotten from
+					// the y dimension
+					if( dimensions == 1 )
+					{
+						output_distance = output_strides[0] * ( 1 + lengths[0] / 2 ) * 2;
+						input_distance = input_strides[0] * ( 1 + lengths[0] / 2 );
+
+						// both strides need to be able to fit both the output and the input
+						if( output_distance > 2 * input_distance )
+						{
+							input_distance = output_distance / 2;
+						}
+						else if( output_distance < 2 * input_distance )
+						{
+							output_distance = 2 * input_distance;
+						}
+
+						size_t random_distance = random_int(0,max_distance);
+						output_distance += 2 * random_distance;
+						input_distance += random_distance;
+					}
+					else
+					{
+						input_distance = input_strides[dimensions-1] * lengths[dimensions-1] + random_int(0,max_distance);
+						output_distance = 2 * input_distance;
+					}
+
+					// check for ok
+					if( dimensions >= 2 )
+						if( output_strides[1] != 2 * input_strides[1] )
+							throw std::runtime_error( "invalid stride y generated for c2r" );
+
+					if( dimensions >= 3 )
+						if( output_strides[2] != 2 * input_strides[2] )
+							throw std::runtime_error( "invalid stride z generated for c2r" );
+
+					if( output_distance != 2 * input_distance )
+						throw std::runtime_error( "invalid distance generated for c2r" );
+				}
+				else // placeness::in_place c2c or placeness::out_of_place
+				{
+					// input first
+					if( fifty_percent_chance() )
+					{
+						// tightly packed input
+						// leave stride vector empty
+
+						input_distance = 0;
+					}
+					else // input has padding
+					{
+						for( int i = 0; i < dimensions; i++)
+						{
+							if( i == 0 )
+								input_strides.push_back( random_int(1,max_stride) );
+							else
+								input_strides.push_back( lengths[i-1] * input_strides[i-1] + random_int(0,max_stride) );
+						}
+
+						input_distance = input_strides[dimensions-1] * lengths[dimensions-1] + random_int(max_distance);
+					}
+
+					// output next
+					if( is_in_place() && is_c2c() )
+					{
+						output_strides = input_strides;
+						output_distance = input_distance;
+					}
+					else if( fifty_percent_chance() )
+					{
+						// tightly packed output
+						// leave stride vector empty
+
+						output_distance = 0;
+					}
+					else // output has padding
+					{
+						for( int i = 0; i < dimensions; i++)
+						{
+							if( i == 0 )
+								output_strides.push_back( random_int(1,max_stride) );
+							else
+								output_strides.push_back( lengths[i-1] * output_strides[i-1] + random_int(0,max_stride) );
+						}
+
+						output_distance = output_strides[dimensions-1] * lengths[dimensions-1] + random_int(max_distance);
+					}
+				}
+
+				if( fifty_percent_chance() )
+				{
+					// we'll want batches sometimes . . .
+
+					// limit the batch size, taking in account available space and size of each pass
+					size_t current_problem_size;
+					if( input_strides.empty() )
+						current_problem_size = total_size_in_points();
+					else
+						current_problem_size = input_strides[dimensions-1] * lengths[dimensions-1];
+
+					size_t max_problem_size = max_problem_size_in_datapoints( precision, input_layout );
+					size_t max_batch_size_for_this_problem = max_problem_size / current_problem_size;
+					if( max_batch_size_for_this_problem <= 1 )
+						batch_size = 1;
+					else
+						batch_size = random_int( 1, max_batch_size_for_this_problem );
+				}
+				else
+				{
+					// . . . and sometimes we won't
+
+					batch_size = 1;
+				}
+			}
+			catch( const std::exception& err )
+			{
+				handle_exception(err);
+			}
+		}
+	}; //struct Parameters
+
+	struct plant_seed {
+		time_t the_seed;
+
+		plant_seed(time_t seed_in) :
+			the_seed( seed_in )
+		{
+			random_parameter_generator.seed( static_cast<boost::uint32_t>( the_seed ) );
+			::testing::Test::RecordProperty("parameter seed", static_cast<unsigned int>(the_seed));
+			std::cout << "Random test's seed is " << the_seed << std::endl;
+		}
+	};
+
+	class TestParameterGenerator {
+		private:
+			std::vector<Parameters> data_sets;
+		public:
+			TestParameterGenerator(int number_of_data_sets)
+			: initial_seed(random_test_parameter_seed)
+			{
+				generate(number_of_data_sets);
+			}
+
+			std::vector<Parameters> & parameter_sets() { return data_sets; }
+
+		private:
+			void generate(int number_of_data_sets) {
+				for( int i=0; i<number_of_data_sets; i++ )
+				{
+					data_sets.push_back( Parameters() );
+				}
+			}
+
+			ParameterizedTest::plant_seed initial_seed;
+	}; //class TestParameterGenerator
+
+} //namespace ParameterizedTest
+
+class accuracy_test_random : public ::testing::TestWithParam<ParameterizedTest::Parameters> {
+	protected:
+		accuracy_test_random(){}
+		virtual ~accuracy_test_random(){}
+		virtual void SetUp(){}
+		virtual void TearDown(){}
+};
+
+TEST_P( accuracy_test_random, random_transform ) {
+	try {
+		ParameterizedTest::Parameters params = GetParam();
+		RecordProperty("batch_size", (int)params.batch_size);
+		RecordProperty("precision", params.precision);
+		RecordProperty("direction", params.direction);
+		RecordProperty("dimensions", params.dimensions);
+		RecordProperty("length_x", (int)params.lengths[0]);
+		if( params.dimensions >= CLFFT_2D) RecordProperty("length_y", (int)params.lengths[1]);
+		if( params.dimensions >= CLFFT_3D) RecordProperty("length_z", (int)params.lengths[2]);
+
+		if( params.input_strides.empty() )
+		{
+			RecordProperty("input_strides", 0);
+		}
+		else
+		{
+			RecordProperty("input_stride_x", (int)params.input_strides[0]);
+			if( params.dimensions >= CLFFT_2D) RecordProperty("input_stride_y", (int)params.input_strides[1]);
+			if( params.dimensions >= CLFFT_3D) RecordProperty("input_stride_z", (int)params.input_strides[2]);
+		}
+
+		if( params.output_strides.empty() )
+		{
+			RecordProperty("output_strides", 0);
+		}
+		else
+		{
+			RecordProperty("output_stride_x", (int)params.output_strides[0]);
+			if( params.dimensions >= CLFFT_2D) RecordProperty("output_stride_y", (int)params.output_strides[1]);
+			if( params.dimensions >= CLFFT_3D) RecordProperty("output_stride_z", (int)params.output_strides[2]);
+		}
+
+		RecordProperty("input_distance", (int)params.input_distance);
+		RecordProperty("output_distance", (int)params.output_distance);
+		RecordProperty("input_layout", params.input_layout);
+		RecordProperty("output_layout", params.output_layout);
+		RecordProperty("placeness", params.placeness);
+		RecordProperty("forward_scale", (int)params.forward_scale);
+		RecordProperty("backward_scale", (int)params.backward_scale);
+		RecordProperty("data_seed", (int)params.data_seed);
+
+		// SO BAD
+		super_duper_global_seed = params.data_seed;
+
+		if( params.precision == CLFFT_SINGLE )
+		{
+			if( params.input_layout == CLFFT_REAL && ( params.output_layout == CLFFT_HERMITIAN_INTERLEAVED || params.output_layout == CLFFT_HERMITIAN_PLANAR ) )
+			{
+				real_to_complex<float, cl_float, fftwf_complex>( params.pattern,
+																 params.lengths,
+																 params.batch_size,
+																 params.input_strides,
+																 params.output_strides,
+																 params.input_distance,
+																 params.output_distance,
+																 ParameterizedTest::cl_layout_to_buffer_layout( params.output_layout ),
+																 params.placeness == CLFFT_INPLACE ? placeness::in_place : placeness::out_of_place );
+			}
+			else if( ( params.input_layout == CLFFT_HERMITIAN_INTERLEAVED || params.input_layout == CLFFT_HERMITIAN_PLANAR ) && params.output_layout == CLFFT_REAL )
+			{
+				complex_to_real<float, cl_float, fftwf_complex>( params.pattern,
+																 params.lengths,
+																 params.batch_size,
+																 params.input_strides,
+																 params.output_strides,
+																 params.input_distance,
+																 params.output_distance,
+																 ParameterizedTest::cl_layout_to_buffer_layout( params.input_layout ),
+																 params.placeness == CLFFT_INPLACE ? placeness::in_place : placeness::out_of_place );
+			}
+			else if( ( params.input_layout == CLFFT_COMPLEX_INTERLEAVED || params.input_layout == CLFFT_COMPLEX_PLANAR ) &&
+					 ( params.output_layout == CLFFT_COMPLEX_INTERLEAVED || params.output_layout == CLFFT_COMPLEX_PLANAR ) )
+			{
+				complex_to_complex<float, cl_float, fftwf_complex>( params.pattern,
+																	params.direction == CLFFT_FORWARD ? direction::forward : direction::backward,
+																	params.lengths,
+																	params.batch_size,
+																	params.input_strides,
+																	params.output_strides,
+																	params.input_distance,
+																	params.output_distance,
+																	ParameterizedTest::cl_layout_to_buffer_layout( params.input_layout ),
+																	ParameterizedTest::cl_layout_to_buffer_layout( params.output_layout ),
+																	params.placeness == CLFFT_INPLACE ? placeness::in_place : placeness::out_of_place );
+			}
+			else
+			{
+				throw std::runtime_error( "bad layout combination" );
+			}
+		}
+		else if( params.precision == CLFFT_DOUBLE )
+		{
+			if( params.input_layout == CLFFT_REAL && ( params.output_layout == CLFFT_HERMITIAN_INTERLEAVED || params.output_layout == CLFFT_HERMITIAN_PLANAR ) )
+			{
+				real_to_complex<double, cl_double, fftw_complex>( params.pattern,
+																 params.lengths,
+																 params.batch_size,
+																 params.input_strides,
+																 params.output_strides,
+																 params.input_distance,
+																 params.output_distance,
+																 ParameterizedTest::cl_layout_to_buffer_layout( params.output_layout ),
+																 params.placeness == CLFFT_INPLACE ? placeness::in_place : placeness::out_of_place );
+			}
+			else if( ( params.input_layout == CLFFT_HERMITIAN_INTERLEAVED || params.input_layout == CLFFT_HERMITIAN_PLANAR ) && params.output_layout == CLFFT_REAL )
+			{
+				complex_to_real<double, cl_double, fftw_complex>( params.pattern,
+																 params.lengths,
+																 params.batch_size,
+																 params.input_strides,
+																 params.output_strides,
+																 params.input_distance,
+																 params.output_distance,
+																 ParameterizedTest::cl_layout_to_buffer_layout( params.input_layout ),
+																 params.placeness == CLFFT_INPLACE ? placeness::in_place : placeness::out_of_place );
+			}
+			else if( ( params.input_layout == CLFFT_COMPLEX_INTERLEAVED || params.input_layout == CLFFT_COMPLEX_PLANAR ) &&
+					 ( params.output_layout == CLFFT_COMPLEX_INTERLEAVED || params.output_layout == CLFFT_COMPLEX_PLANAR ) )
+			{
+				complex_to_complex<double, cl_double, fftw_complex>( params.pattern,
+																	params.direction == CLFFT_FORWARD ? direction::forward : direction::backward,
+																	params.lengths,
+																	params.batch_size,
+																	params.input_strides,
+																	params.output_strides,
+																	params.input_distance,
+																	params.output_distance,
+																	ParameterizedTest::cl_layout_to_buffer_layout( params.input_layout ),
+																	ParameterizedTest::cl_layout_to_buffer_layout( params.output_layout ),
+																	params.placeness == CLFFT_INPLACE ? placeness::in_place : placeness::out_of_place );
+			}
+			else
+			{
+				throw std::runtime_error( "bad layout combination" );
+			}
+		}
+		else
+		{
+			throw std::runtime_error("Random test: this code path should never be executed");
+		}
+	}
+	catch( const std::exception& err ) {
+		handle_exception(err);
+	}
+}
+
+INSTANTIATE_TEST_CASE_P(
+	clfft_RandomTest,
+	accuracy_test_random,
+	::testing::ValuesIn( ParameterizedTest::TestParameterGenerator
+		((int)number_of_random_tests).parameter_sets())
+);
diff --git a/src/tests/buffer.cpp b/src/tests/buffer.cpp
new file mode 100644
index 00000000..4ed68482
--- /dev/null
+++ b/src/tests/buffer.cpp
@@ -0,0 +1,469 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+
+#include "buffer.h"
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+class test_harness_tests_buffer : public ::testing::Test {
+protected:
+	test_harness_tests_buffer() {}
+	virtual ~test_harness_tests_buffer(){
+	}
+	virtual void SetUp()
+	{
+		suppress_output = true;
+	}
+	virtual void TearDown()
+	{
+		suppress_output = false;
+	}
+};
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, even_length_hermitian_buffers_are_halfish_size) {
+	size_t dimensions = 3;
+	size_t lengths[3] = {8, 2, 4};
+	size_t* stride_null = NULL;
+	size_t batch = 3;
+	size_t distance = 0;
+
+	buffer<float> interleaved_buffer( dimensions, lengths, stride_null, batch, distance, layout::hermitian_interleaved, CLFFT_OUTOFPLACE );
+	EXPECT_EQ( 5, interleaved_buffer.length(dimx) );
+	EXPECT_EQ( 2, interleaved_buffer.length(dimy) );
+	EXPECT_EQ( 4, interleaved_buffer.length(dimz) );
+
+	buffer<float> planar_buffer( dimensions, lengths, stride_null, batch, distance, layout::hermitian_planar, CLFFT_OUTOFPLACE );
+	EXPECT_EQ( 5, planar_buffer.length(dimx) );
+	EXPECT_EQ( 2, planar_buffer.length(dimy) );
+	EXPECT_EQ( 4, planar_buffer.length(dimz) );
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, odd_length_hermitian_buffers_are_halfish_size) {
+	size_t dimensions = 3;
+	size_t lengths[3] = {9, 2, 4};
+	size_t* stride_null = NULL;
+	size_t batch = 3;
+	size_t distance = 0;
+
+	buffer<float> interleaved_buffer( dimensions, lengths, stride_null, batch, distance, layout::hermitian_interleaved, CLFFT_OUTOFPLACE );
+	EXPECT_EQ( 5, interleaved_buffer.length(dimx) );
+	EXPECT_EQ( 2, interleaved_buffer.length(dimy) );
+	EXPECT_EQ( 4, interleaved_buffer.length(dimz) );
+
+	buffer<float> planar_buffer( dimensions, lengths, stride_null, batch, distance, layout::hermitian_planar, CLFFT_OUTOFPLACE );
+	EXPECT_EQ( 5, planar_buffer.length(dimx) );
+	EXPECT_EQ( 2, planar_buffer.length(dimy) );
+	EXPECT_EQ( 4, planar_buffer.length(dimz) );
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, even_sized_in_place_real_buffers_should_have_padding) {
+	// TODO for now, they should all have padding
+	// eventually, this should be just in-place buffers
+	size_t dimensions = 1;
+	size_t lengths[1] = {8};
+	size_t* stride_null = NULL;
+	size_t batch = 3;
+	size_t distance = 0;
+
+	buffer<float> buffer_1d_float( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	// length of x should not change
+	EXPECT_EQ( 8, buffer_1d_float.length(dimx) );
+	// just the memory size should change
+	EXPECT_EQ( 10 * batch * sizeof(float), buffer_1d_float.size_in_bytes() );
+
+	buffer<double> buffer_1d_double( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	EXPECT_EQ( 8, buffer_1d_double.length(dimx) );
+	EXPECT_EQ( 10 * batch * sizeof(double), buffer_1d_double.size_in_bytes() );
+
+	dimensions = 2;
+	size_t lengths2d[2] = {4, 2};
+	buffer<float> buffer_2d_float( dimensions, lengths2d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	EXPECT_EQ( 4, buffer_2d_float.length(dimx) );
+	EXPECT_EQ( 6 * lengths2d[dimy] * batch * sizeof(float), buffer_2d_float.size_in_bytes() );
+
+	buffer<double> buffer_2d_double( dimensions, lengths2d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	EXPECT_EQ( 4, buffer_2d_double.length(dimx) );
+	EXPECT_EQ( 6 * lengths2d[dimy] * batch * sizeof(double), buffer_2d_double.size_in_bytes() );
+
+	dimensions = 3;
+	size_t lengths3d[3] = {16, 8, 2};
+	buffer<float> buffer_3d_float( dimensions, lengths3d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	EXPECT_EQ( 16, buffer_3d_float.length(dimx) );
+	EXPECT_EQ( 18 * lengths3d[dimy] * lengths3d[dimz] * batch * sizeof(float), buffer_3d_float.size_in_bytes() );
+
+	buffer<double> buffer_3d_double( dimensions, lengths3d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	EXPECT_EQ( 16, buffer_3d_double.length(dimx) );
+	EXPECT_EQ( 18 * lengths3d[dimy] * lengths3d[dimz] * batch * sizeof(double), buffer_3d_double.size_in_bytes() );
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, odd_sized_in_place_real_buffers_should_have_padding) {
+	// TODO for now, they should all have padding
+	// eventually, this should be just in-place buffers
+	size_t dimensions = 1;
+	size_t lengths[1] = {15};
+	size_t* stride_null = NULL;
+	size_t batch = 3;
+	size_t distance = 0;
+
+	buffer<float> buffer_1d_float( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	buffer_1d_float.set_all_to_linear_increase();
+	// length of x should not change
+	EXPECT_EQ( 15, buffer_1d_float.length(dimx) );
+	// just the memory size should change
+	EXPECT_EQ( 16 * batch * sizeof(float), buffer_1d_float.size_in_bytes() );
+
+	buffer<double> buffer_1d_double( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	buffer_1d_double.set_all_to_linear_increase();
+	EXPECT_EQ( 15, buffer_1d_double.length(dimx) );
+	EXPECT_EQ( 16 * batch * sizeof(double), buffer_1d_double.size_in_bytes() );
+
+	dimensions = 2;
+	size_t lengths2d[2] = {7, 2};
+	buffer<float> buffer_2d_float( dimensions, lengths2d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	buffer_2d_float.set_all_to_linear_increase();
+	EXPECT_EQ( 7, buffer_2d_float.length(dimx) );
+	EXPECT_EQ( 8 * lengths2d[dimy] * batch * sizeof(float), buffer_2d_float.size_in_bytes() );
+
+	buffer<double> buffer_2d_double( dimensions, lengths2d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	buffer_2d_double.set_all_to_linear_increase();
+	EXPECT_EQ( 7, buffer_2d_double.length(dimx) );
+	EXPECT_EQ( 8 * lengths2d[dimy] * batch * sizeof(double), buffer_2d_double.size_in_bytes() );
+
+	dimensions = 3;
+	size_t lengths3d[3] = {7, 4, 2};
+	buffer<float> buffer_3d_float( dimensions, lengths3d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	buffer_3d_float.set_all_to_linear_increase();
+	EXPECT_EQ( 7, buffer_3d_float.length(dimx) );
+	EXPECT_EQ( 8 * lengths3d[dimy] * lengths3d[dimz] * batch * sizeof(float), buffer_3d_float.size_in_bytes() );
+
+	buffer<double> buffer_3d_double( dimensions, lengths3d, stride_null, batch, distance, layout::real, CLFFT_INPLACE );
+	buffer_3d_double.set_all_to_linear_increase();
+	EXPECT_EQ( 7, buffer_3d_double.length(dimx) );
+	EXPECT_EQ( 8 * lengths3d[dimy] * lengths3d[dimz] * batch * sizeof(double), buffer_3d_double.size_in_bytes() );
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, real_imag_and_complex_functions_return_correct_values) {
+	try
+	{
+		size_t dimensions = 1;
+		size_t length = 8;
+		size_t* stride_null = NULL;
+		size_t batch = 1;
+		size_t distance = 0;
+
+		buffer<float> interleaved_buffer( dimensions, &length, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+		interleaved_buffer.set_all_to_linear_increase();
+
+		EXPECT_FLOAT_EQ( 1.0f, interleaved_buffer.real( 0 ) );
+		EXPECT_FLOAT_EQ( 1.5f, interleaved_buffer.imag( 0 ) );
+		EXPECT_FLOAT_EQ( 1.0f, interleaved_buffer.complex( 0 ).real() );
+		EXPECT_FLOAT_EQ( 1.5f, interleaved_buffer.complex( 0 ).imag() );
+		EXPECT_FLOAT_EQ( 4.0f, interleaved_buffer.real( 3 ) );
+		EXPECT_FLOAT_EQ( 4.5f, interleaved_buffer.imag( 3 ) );
+		EXPECT_FLOAT_EQ( 4.0f, interleaved_buffer.complex( 3 ).real() );
+		EXPECT_FLOAT_EQ( 4.5f, interleaved_buffer.complex( 3 ).imag() );
+		EXPECT_FLOAT_EQ( 8.0f, interleaved_buffer.real( 7 ) );
+		EXPECT_FLOAT_EQ( 8.5f, interleaved_buffer.imag( 7 ) );
+		EXPECT_FLOAT_EQ( 8.0f, interleaved_buffer.complex( 7 ).real() );
+		EXPECT_FLOAT_EQ( 8.5f, interleaved_buffer.complex( 7 ).imag() );
+
+		dimensions = 3;
+		size_t lengths[3] = { 2, 4, 8 };
+		batch = 2;
+		buffer<float> real_buffer( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		real_buffer.set_all_to_linear_increase();
+
+		EXPECT_FLOAT_EQ( 1.0f, real_buffer.real( 0, 0, 0, 0 ) );
+		EXPECT_FLOAT_EQ( 4.0f, real_buffer.real( 1, 1, 0, 0 ) );
+		EXPECT_FLOAT_EQ( 68.0f, real_buffer.real( 1, 1, 0, 1 ) );
+		EXPECT_FLOAT_EQ( 56.0f, real_buffer.real( 1, 3, 6, 0 ) );
+		EXPECT_FLOAT_EQ( 120.0f, real_buffer.real( 1, 3, 6, 1 ) );
+
+		dimensions = 2;
+		lengths[0] = 4;
+		lengths[1] = 2;
+		size_t strides[2] = {2, 11};
+		distance = 30;
+
+		buffer<double> planar_buffer( dimensions, lengths, strides, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+		planar_buffer.set_all_to_linear_increase();
+
+		EXPECT_DOUBLE_EQ( 1.0f, planar_buffer.real( 0, 0, 0, 0 ) );
+		EXPECT_DOUBLE_EQ( 1.5f, planar_buffer.imag( 0, 0, 0, 0 ) );
+		EXPECT_DOUBLE_EQ( 1.0f, planar_buffer.complex( 0, 0, 0, 0 ).real() );
+		EXPECT_DOUBLE_EQ( 1.5f, planar_buffer.complex( 0, 0, 0, 0 ).imag() );
+		EXPECT_DOUBLE_EQ( 4.0f, planar_buffer.real( 3, 0, 0, 0 ) );
+		EXPECT_DOUBLE_EQ( 4.5f, planar_buffer.imag( 3, 0, 0, 0 ) );
+		EXPECT_DOUBLE_EQ( 4.0f, planar_buffer.complex( 3, 0, 0, 0 ).real() );
+		EXPECT_DOUBLE_EQ( 4.5f, planar_buffer.complex( 3, 0, 0, 0 ).imag() );
+		EXPECT_DOUBLE_EQ( 15.0f, planar_buffer.real( 2, 1, 0, 1 ) );
+		EXPECT_DOUBLE_EQ( 15.5f, planar_buffer.imag( 2, 1, 0, 1 ) );
+		EXPECT_DOUBLE_EQ( 15.0f, planar_buffer.complex( 2, 1, 0, 1 ).real() );
+		EXPECT_DOUBLE_EQ( 15.5f, planar_buffer.complex( 2, 1, 0, 1 ).imag() );
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, equivalence_operator_returns_correct_result_with_pointwise_compare) {
+	try
+	{
+		bool comparison_type_restore = comparison_type;
+		comparison_type = pointwise_compare;
+
+		size_t dimensions = 3;
+		size_t lengths[3] = { 16, 32, 64 };
+		size_t* stride_null = NULL;
+		size_t batch = 2;
+		size_t distance = 0;
+
+		// complex test
+		buffer<float> thing_1( dimensions, lengths, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+		buffer<float> thing_2( dimensions, lengths, stride_null, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		EXPECT_EQ( true, thing_1 == thing_2 );
+
+		thing_2.set_one_data_point( 42.0f, 0.0f, 0, 0, 0, 0 );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		thing_2.set_one_data_point( 16.0f, 0.0f, 15, 31, 63, 1 );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		thing_2.set_one_data_point( 96.0f, 7.8f, 7, 16, 12, 1 );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		thing_1.set_all_to_sawtooth( 42.0f );
+		thing_2.set_all_to_sawtooth( 42.0f );
+		EXPECT_EQ( true, thing_1 == thing_2 );
+
+		//---------------------------------------------------//
+
+		// real test
+		buffer<double> thing_3( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		buffer<double> thing_4( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+
+		thing_3.set_all_to_sawtooth(1.0f);
+		thing_4.set_all_to_sawtooth(1.0f);
+		EXPECT_EQ( true, thing_3 == thing_4 );
+
+		thing_4.set_one_data_point( 42.0f, 0, 0, 0, 0 );
+		EXPECT_EQ( false, thing_3 == thing_4 );
+
+		thing_3.set_all_to_sawtooth( 42.0f );
+		thing_4.set_all_to_sawtooth( 42.0f );
+		EXPECT_EQ( true, thing_3 == thing_4 );
+
+		comparison_type = comparison_type_restore;
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, inequivalence_operator_returns_correct_result_with_pointwise_compare) {
+	try
+	{
+		bool comparison_type_restore = comparison_type;
+		comparison_type = pointwise_compare;
+
+		size_t dimensions = 3;
+		size_t lengths[3] = { 16, 32, 64 };
+		size_t* stride_null = NULL;
+		size_t batch = 2;
+		size_t distance = 0;
+
+		buffer<float> thing_1( dimensions, lengths, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+
+		buffer<float> thing_2( dimensions, lengths, stride_null, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		EXPECT_EQ( false, thing_1 != thing_2 );
+
+		thing_2.set_one_data_point( 42.0f, 0, 0, 0, 0, 0 );
+		EXPECT_EQ( true, thing_1 != thing_2 );
+
+		thing_1.set_all_to_sawtooth( 42.0f );
+		thing_2.set_all_to_sawtooth( 42.0f );
+		EXPECT_EQ( false, thing_1 != thing_2 );
+
+		comparison_type = comparison_type_restore;
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, equivalence_operator_returns_correct_result_with_rms) {
+	try
+	{
+		bool comparison_type_restore = comparison_type;
+		comparison_type = root_mean_square;
+
+		size_t dimensions = 3;
+		size_t lengths[3] = { 16, 32, 64 };
+		size_t* stride_null = NULL;
+		size_t batch = 2;
+		size_t distance = 0;
+
+		// complex test
+		buffer<float> thing_1( dimensions, lengths, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+		buffer<float> thing_2( dimensions, lengths, stride_null, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		EXPECT_EQ( true, thing_1 == thing_2 );
+
+		thing_2.set_one_data_point( 42.0f, 0.0f, 0, 0, 0, 0 );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		thing_2.set_one_data_point( 16.0f, 0.0f, 15, 31, 63, 1 );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		thing_2.set_one_data_point( 96.0f, 7.8f, 7, 16, 12, 1 );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		thing_1.set_all_to_sawtooth( 42.0f );
+		thing_2.set_all_to_sawtooth( 42.0f );
+		EXPECT_EQ( true, thing_1 == thing_2 );
+
+		//---------------------------------------------------//
+
+		// real test
+		buffer<double> thing_3( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		buffer<double> thing_4( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+
+		thing_3.set_all_to_sawtooth(1.0f);
+		thing_4.set_all_to_sawtooth(1.0f);
+		EXPECT_EQ( true, thing_3 == thing_4 );
+
+		thing_4.set_one_data_point( 42.0f, 0, 0, 0, 0 );
+		EXPECT_EQ( false, thing_3 == thing_4 );
+
+		thing_3.set_all_to_sawtooth( 42.0f );
+		thing_4.set_all_to_sawtooth( 42.0f );
+		EXPECT_EQ( true, thing_3 == thing_4 );
+
+		comparison_type = comparison_type_restore;
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, inequivalence_operator_returns_correct_result_with_rms) {
+	try
+	{
+		bool comparison_type_restore = comparison_type;
+		comparison_type = root_mean_square;
+
+		size_t dimensions = 3;
+		size_t lengths[3] = { 16, 32, 64 };
+		size_t* stride_null = NULL;
+		size_t batch = 2;
+		size_t distance = 0;
+
+		buffer<float> thing_1( dimensions, lengths, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+
+		buffer<float> thing_2( dimensions, lengths, stride_null, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+
+		thing_1.set_all_to_sawtooth(1.0f);
+		thing_2.set_all_to_sawtooth(1.0f);
+		EXPECT_EQ( false, thing_1 != thing_2 );
+
+		thing_2.set_one_data_point( 42.0f, 0, 0, 0, 0, 0 );
+		EXPECT_EQ( true, thing_1 != thing_2 );
+
+		thing_1.set_all_to_sawtooth( 42.0f );
+		thing_2.set_all_to_sawtooth( 42.0f );
+		EXPECT_EQ( false, thing_1 != thing_2 );
+
+		comparison_type = comparison_type_restore;
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
+
+/*****************************************************/
+/*****************************************************/
+TEST_F(test_harness_tests_buffer, equivalence_should_fail_given_non_matching_complexities) {
+	try
+	{
+		size_t dimensions = 1;
+		size_t lengths[3] = { 5, 1, 1 };
+		size_t* stride_null = NULL;
+		size_t batch = 1;
+		size_t distance = 0;
+
+		buffer<float> thing_1( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		buffer<float> thing_2( dimensions, lengths, stride_null, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+		EXPECT_EQ( false, thing_1 == thing_2 );
+
+		buffer<float> thing_3( dimensions, lengths, stride_null, batch, distance, layout::complex_planar, CLFFT_OUTOFPLACE );
+		buffer<float> thing_4( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		EXPECT_EQ( false, thing_3 == thing_4 );
+
+		buffer<float> thing_5( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		buffer<float> thing_6( dimensions, lengths, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+		EXPECT_EQ( false, thing_5 == thing_6 );
+
+		buffer<float> thing_7( dimensions, lengths, stride_null, batch, distance, layout::complex_interleaved, CLFFT_OUTOFPLACE );
+		buffer<float> thing_8( dimensions, lengths, stride_null, batch, distance, layout::real, CLFFT_OUTOFPLACE );
+		EXPECT_EQ( false, thing_7 == thing_8 );
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
\ No newline at end of file
diff --git a/src/tests/buffer.h b/src/tests/buffer.h
new file mode 100644
index 00000000..3c8da5da
--- /dev/null
+++ b/src/tests/buffer.h
@@ -0,0 +1,1158 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_BUFFER_H )
+#define CLFFT_BUFFER_H
+
+#include <cmath>
+#include <complex>
+#include <stdexcept>
+#include <memory>
+#include <vector>
+#include <utility>
+#include <sstream>
+#include "../include/clFFT.h"
+#include "test_constants.h"
+#include <boost/random.hpp>
+#include <stdint.h>
+#include "buffer_memory.h"
+
+/*****************************************************/
+/*****************************************************/
+template< typename T >
+bool floats_are_about_equal( T a, T b) {
+	// explicit check to see if a and b are both zero-ish . . .
+	if( fabs(a) < 0.00001f && fabs(b) < 0.00001f) return true;
+	// . . . and if not, we'll see if they're the same-ish
+	return ( fabs(a-b) > fabs(a*tolerance) ) ? false : true;
+}
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+struct index_t {
+	size_t x, y, z, batch;
+
+	index_t( size_t inx, size_t iny, size_t inz, size_t inbatch )
+		: x(inx)
+		, y(iny)
+		, z(inz)
+		, batch(inbatch)
+	{}
+};
+
+namespace layout
+{
+	// buffer_layout_t will be used to let class buffer know how many instances of buffer_memory to make and their sizes
+	enum buffer_layout_t
+	{
+		real,
+		complex_interleaved,
+		complex_planar,
+		hermitian_interleaved,
+		hermitian_planar
+	};
+}
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+template <class T>
+class buffer {
+private:
+	// we need to save the requested length x, because
+	// if we change the buffer from complex to real,
+	// (as in a round-trip test) we need to be able to
+	// get back to the original length of x. in the case
+	// of an odd transform length, that's not possible
+	// due to round-off error unless we explicitly save it
+	size_t _requested_length_x;
+	size_t _number_of_dimensions;
+	size_t _batch_size;
+	size_t _distance;
+	layout::buffer_layout_t _layout;
+	clfftResultLocation _placeness;
+
+	std::vector< size_t > _lengths;
+	std::vector< size_t > _strides;
+	bool _tightly_packed_strides;
+	bool _tightly_packed_distance;
+
+	static const size_t tightly_packed = 0;
+
+	// if real or planar:
+	// _the_buffers[re] will hold the real portion
+	// _the_buffers[im] will hold the imaginary portion (planar only)
+	// if interleaved:
+	// _the_buffers[interleaved] will hold the whole banana
+	std::vector< buffer_memory< T > > _the_buffers;
+
+	enum
+	{
+		interleaved = 0,
+		re = 0, // real
+		im = 1 // imaginary
+	};
+
+public:
+	/*****************************************************/
+	buffer( const size_t dimensions_in,
+			const size_t* lengths_in,
+			const size_t* strides_in,
+			const size_t batch_size_in,
+			const size_t distance_in,
+			const layout::buffer_layout_t layout_in,
+			const clfftResultLocation placeness_in
+		  )
+		: _number_of_dimensions( dimensions_in )
+		, _batch_size( batch_size_in )
+		, _distance( distance_in )
+		, _layout( layout_in )
+		, _placeness( placeness_in )
+		, _lengths()
+		, _strides()
+		, _the_buffers()
+	{
+		initialize_lengths(lengths_in);
+		initialize_strides(strides_in);
+		initialize_distance(distance_in);
+		create_buffer_memory();
+		clear();
+	}
+
+	/*****************************************************/
+	~buffer()
+	{}
+
+	/*****************************************************/
+	// this assignment operator only copies _data_.
+	// it does not change the rest of the buffer information
+	// and in fact, it requires that the buffer sizes be the same going in
+	buffer<T> & operator=( buffer<T> & that )
+	{
+		if( this->is_real() != that.is_real() ||
+			this->is_hermitian() != that.is_hermitian() ||
+			this->is_complex() != that.is_complex() )
+		{
+			throw std::runtime_error( "Buffers must be the same layout type for assignment operator" );
+		}
+
+		if( this->_number_of_dimensions != that._number_of_dimensions ||
+			this->_batch_size != that._batch_size ||
+			this->_lengths != that._lengths )
+		{
+			throw std::runtime_error( "Buffers must be the same size for assignment operator" );
+		}
+
+		if( this->is_real() )
+		{
+			for( size_t batch = 0; batch < batch_size(); batch++ ) {
+				for( size_t z = 0; z < length(dimz); z++ ) {
+					for( size_t y = 0; y < length(dimy); y++ ) {
+						for( size_t x = 0; x < length(dimx); x++ ) {
+							this->set_one_data_point( that.real(x,y,z,batch), x, y, z, batch );
+						}
+					}
+				}
+			}
+		}
+		else
+		{
+			for( size_t batch = 0; batch < batch_size(); batch++ ) {
+				for( size_t z = 0; z < length(dimz); z++ ) {
+					for( size_t y = 0; y < length(dimy); y++ ) {
+						for( size_t x = 0; x < length(dimx); x++ ) {
+							this->set_one_data_point( that.real(x,y,z,batch), that.imag(x,y,z,batch), x, y, z, batch );
+						}
+					}
+				}
+			}
+		}
+
+		return *this;
+	}
+
+private:
+	/*****************************************************/
+	void preinitialize_lengths_to_1_1_1()
+	{
+		_lengths.clear();
+
+		for( int i = 0; i < max_dimension; ++i ) {
+			_lengths.push_back(1);
+		}
+	}
+
+	/*****************************************************/
+	void initialize_lengths(const size_t* lengths_in)
+	{
+		preinitialize_lengths_to_1_1_1();
+
+		for( size_t i = 0; i < _number_of_dimensions; ++i )
+		{
+			_lengths[i] = lengths_in[i];
+		}
+
+		_requested_length_x = _lengths[dimx];
+		adjust_length_x_for_hermitian_buffers();
+	}
+
+	/*****************************************************/
+	void adjust_length_x_for_hermitian_buffers()
+	{
+		// complex-to-complex transforms do not require any change
+		// to the number of points in the buffer
+
+		// real buffers also never require a change to the number of
+		// points in the buffer
+
+		// a hermitian buffer with a length of "X" will actually
+		// have X/2 + 1 points (the other half-ish are conjugates
+		// and do not need to be stored). lenY and lenZ are never
+		// modified
+		if( is_hermitian() )
+		{
+			_lengths[dimx] = _lengths[dimx] / 2 + 1;
+		}
+	}
+
+	/*****************************************************/
+	void preinitialize_strides_to_1_1_1()
+	{
+		_strides.clear();
+
+		for( int i = 0; i < max_dimension; ++i ) {
+			_strides.push_back(1);
+		}
+	}
+
+	/*****************************************************/
+	void initialize_strides(const size_t* strides_in)
+	{
+		preinitialize_strides_to_1_1_1();
+
+		// we need to calculate the strides if tightly packed
+		if( strides_in == tightly_packed) {
+			_strides[dimx] = 1;
+			for( size_t i = 1; i < _number_of_dimensions; ++i )
+			{
+				_strides[i] = _strides[i-1]*_lengths[i-1];
+			}
+
+			_tightly_packed_strides = true;
+		}
+		// we do not need to calculate anything if the user specifies strides
+		// we just copy the input strides into place
+		else
+		{
+			for( size_t i = 0; i < _number_of_dimensions; ++i )
+			{
+				_strides[i] = strides_in[i];
+			}
+
+			_tightly_packed_strides = false;
+		}
+	}
+
+	/*****************************************************/
+	void initialize_distance(const size_t distance_in)
+	{
+		if( distance_in == tightly_packed )
+		{
+			// calculate distance if not passed in
+			_distance = _lengths[_number_of_dimensions-1] * _strides[_number_of_dimensions-1];
+
+			_tightly_packed_distance = true;
+		}
+		else
+		{
+			// or copy it if passed in
+			_distance = distance_in;
+
+			_tightly_packed_distance = false;
+		}
+	}
+
+	/*****************************************************/
+	void create_buffer_memory()
+	{
+		if( is_real() )
+		{
+			// just one real buffer
+			_the_buffers.push_back( buffer_memory< T >( total_number_of_points_including_data_and_intervening() ) );
+
+			increase_memory_allocation_for_real_in_place_buffers();
+		}
+		else if( is_planar() )
+		{
+			// one real buffer
+			_the_buffers.push_back( buffer_memory< T >( total_number_of_points_including_data_and_intervening() ) );
+			// and one imaginary buffer
+			_the_buffers.push_back( buffer_memory< T >( total_number_of_points_including_data_and_intervening() ) );
+		}
+		else if( is_interleaved() )
+		{
+			// one double-wide interleaved buffer
+			_the_buffers.push_back( buffer_memory< T >( 2 * total_number_of_points_including_data_and_intervening() ) );
+		}
+	}
+
+	/*****************************************************/
+	size_t amount_of_extra_padding_per_x()
+	{
+		if( length(dimx) % 2 == 0 ) // even lengths of x add 2 per row
+			return 2;
+		else // odd lengths of x add 1 per row
+			return 1;
+	}
+
+	/*****************************************************/
+	void adjust_strides_and_distance_for_in_place_real_buffer()
+	{
+		if( is_real() )
+		{
+			if( is_in_place() )
+			{
+				size_t amount_to_add_for_this_dimension = stride(dimx) * amount_of_extra_padding_per_x();
+
+				// strides first
+				if( number_of_dimensions() >= 2 )
+				{
+					_strides[dimy] += amount_to_add_for_this_dimension;
+				}
+
+				if( number_of_dimensions() == 3 )
+				{
+					amount_to_add_for_this_dimension *= length(dimy);
+					_strides[dimz] += amount_to_add_for_this_dimension;
+				}
+
+				// distance next
+				if( number_of_dimensions() == 1 )
+				{
+					_distance += amount_to_add_for_this_dimension;
+				}
+				else if( number_of_dimensions() == 2 )
+				{
+					_distance += ( amount_to_add_for_this_dimension * length(dimy) );
+				}
+				else if( number_of_dimensions() == 3 )
+				{
+					_distance += ( amount_to_add_for_this_dimension * length(dimz) );
+				}
+				else throw std::runtime_error( "invalid dimensions in adjust_strides_and_distance_for_in_place_real_buffer()" );
+			}
+			else throw std::runtime_error( "this buffer is out of place and shouldn't be adjusting strides" );
+		}
+		else throw std::runtime_error( "this buffer is unreal and shouldn't be adjusting strides" );
+	}
+
+	/*****************************************************/
+	void increase_memory_allocation_for_real_in_place_buffers()
+	{
+		// when performing an in-place, real-to-hermitian transform,
+		// we want a little extra space to account for the larger size
+		// of the hermitian output.
+
+		// each row in the X dimension should have enough space for 2 extra reals
+		// (to account for the one extra complex number that will be put
+		// into the buffer after the transform)
+
+		// we don't want to change the length, because the number of points
+		// in the transform isn't changing. we only want to change the
+		// amount of memory reserved
+		if( is_real() )
+		{
+			if( is_in_place() )
+			{
+				if( _tightly_packed_strides && _tightly_packed_distance )
+				{
+					// request extra memory
+					_the_buffers[re].increase_allocated_memory( amount_of_extra_padding_per_x() * stride(dimx) * length(dimy) * length(dimz) * batch_size() );
+
+					// adjust strides/distances so that the padding is at the end of each row in the Xth dimension
+					adjust_strides_and_distance_for_in_place_real_buffer();
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	size_t index( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0)
+	{
+		size_t interleaved_offset = 1;
+
+		// if this buffer is interleaved, the index should actually be double what it appears.
+		// interleaved_offset will accomplish this magical doubling.
+		if( is_interleaved() )
+			interleaved_offset = 2;
+
+		size_t the_index = ( stride(dimx) * x + stride(dimy) * y + stride(dimz) * z + distance() * batch ) * interleaved_offset;
+
+		return the_index;
+	}
+
+	/*****************************************************/
+	size_t next_index( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0)
+	{
+		if( x+1 < length(dimx))
+			return index( x+1, y, z, batch );
+		else if( y+1 < length(dimy) )
+			return index( 0, y+1, z, batch );
+		else if( z+1 < length(dimz) )
+			return index( 0, 0, z+1, batch );
+		else if( batch+1 < batch_size() )
+			return index( 0, 0, 0, batch+1 );
+		else
+			// we are at the last point
+			// return the location immediately after the last point
+			return index( 0, 0, 0, batch+1 );
+	}
+
+	/*****************************************************/
+	bool points_are_about_equal( buffer<T> & other_buffer, size_t x, size_t y, size_t z, size_t batch )
+	{
+		if( is_real() )
+			return floats_are_about_equal<T>( real(x, y, z, batch), other_buffer.real(x, y, z, batch) );
+		else if( is_complex() || is_hermitian() )
+			return ( floats_are_about_equal<T>( real(x, y, z, batch), other_buffer.real(x, y, z, batch) ) &&
+					 floats_are_about_equal<T>( imag(x, y, z, batch), other_buffer.imag(x, y, z, batch) ) );
+		else
+			throw std::runtime_error( "invalid layout in points_are_about_equal()" );
+	}
+
+	/*****************************************************/
+	size_t buffer_mismatches( buffer<T> & other_buffer, bool compare_method)
+	{
+		std::vector< index_t > mismatched_point_indices;
+
+		if (compare_method == pointwise_compare)
+		{
+			for( size_t batch = 0; batch < batch_size(); batch++ )
+				for( size_t z = 0; z < length(dimz); z++ )
+					for( size_t y = 0; y < length(dimy); y++ )
+						for( size_t x = 0; x < length(dimx); x++ )
+							if( !points_are_about_equal( other_buffer, x, y, z, batch ) )
+							{
+								mismatched_point_indices.push_back( index_t(x, y, z, batch));
+							}
+
+			const size_t max_mismatches_output = default_number_of_mismatches_to_output;
+
+			if( mismatched_point_indices.size() != 0 && max_mismatches_output != 0 && suppress_output == false) {
+				std::cout << std::endl << std::dec << mismatched_point_indices.size() << " of " << number_of_data_points_single_batch()
+					<<" data points did not match.  The first " << max_mismatches_output << " (max) mismatching points follow:" << std::endl;
+
+				std::cout << std::endl << "(array index)(index) ";
+				std::cout << "[test value (dec)] / [expected value (dec)]";
+				std::cout << std::endl;
+				for( size_t i = 0; i < max_mismatches_output && i < mismatched_point_indices.size(); i++ )
+				{
+					index_t mismatch = mismatched_point_indices[i];
+
+					std::cout
+						<< std::dec << "(" << mismatched_point_indices.at(i).batch << ")"
+						<< std::dec << "(" << mismatched_point_indices.at(i).x << "," << mismatched_point_indices.at(i).y << "," << mismatched_point_indices.at(i).z << ") ";
+					std::cout
+						<< real( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
+
+					if( is_complex() || is_hermitian() )
+					{
+						std::cout << "+i*" << imag( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
+					}
+					std::cout
+						<< " / " << other_buffer.real( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
+
+					if( is_complex() || is_hermitian() )
+					{
+						std::cout << "+i*" << other_buffer.imag( mismatch.x, mismatch.y, mismatch.z, mismatch.batch );
+					}
+					std::cout << std::endl;
+				}
+				std::cout << std::endl;
+			}
+			return mismatched_point_indices.size();
+		}
+		else
+		{
+			//RMS accuracy judgement
+
+			// Find maximum magnitude
+			double maxMag = 0.0, maxMagInv = 1.0;
+			for( size_t batch = 0; batch < batch_size(); batch++ ) {
+				for( size_t z = 0; z < length(dimz); z++) {
+					for( size_t y = 0; y < length(dimy); y++) {
+						for( size_t x = 0; x < length(dimx); x++) {
+							double ex_r, ex_i, mag;
+							ex_r = other_buffer.real(x, y, z, batch);
+
+							if( other_buffer.is_complex() || other_buffer.is_hermitian() )
+								ex_i = other_buffer.imag(x, y, z, batch);
+							else
+								ex_i = 0;
+
+							mag = ex_r*ex_r + ex_i*ex_i;
+							maxMag = (mag > maxMag) ? mag : maxMag;
+						}
+					}
+				}
+			}
+
+			if(maxMag > magnitude_lower_limit)
+			{
+				maxMagInv = 1.0/maxMag;
+			}
+
+			// Compute RMS error relative to maximum magnitude
+			double rms = 0;
+			for( size_t batch = 0; batch < batch_size(); batch++ ) {
+				for( size_t z = 0; z < length(dimz); z++) {
+					for( size_t y = 0; y < length(dimy); y++) {
+						for( size_t x = 0; x < length(dimx); x++) {
+							double ex_r, ex_i, ac_r, ac_i;
+
+							ex_r = other_buffer.real(x, y, z, batch);
+							ac_r = real(x, y, z, batch);
+
+							if( other_buffer.is_complex() || other_buffer.is_hermitian() )
+								ex_i = other_buffer.imag(x, y, z, batch);
+							else
+								ex_i = 0;
+
+							if( other_buffer.is_complex() || other_buffer.is_hermitian() )
+								ac_i = imag(x, y, z, batch);
+							else
+								ac_i = 0;
+
+							rms += ((ex_r - ac_r)*(ex_r - ac_r) + (ex_i - ac_i)*(ex_i - ac_i))*maxMagInv;
+						}
+					}
+				}
+			}
+			rms = sqrt(rms);
+
+			if ( fabs(rms) > tolerance )
+			{
+				if( suppress_output == false )
+					std::cout << std::endl <<"RMS accuracy judgement failure -- RMS = "<< std::dec << rms << std::endl;
+				return 1;
+			}
+			else
+				return 0;
+		}
+	}
+
+public:
+	/*****************************************************/
+	bool operator==( buffer<T> & other_buffer )
+	{
+		// complexity of each dimension must be the same
+		if( ( is_real() && !other_buffer.is_real() ) || ( !is_real() && other_buffer.is_real() ) ||
+			( is_hermitian() && !other_buffer.is_hermitian() ) || ( !is_hermitian() && other_buffer.is_hermitian() ) ||
+			( is_complex() && !other_buffer.is_complex() ) || ( !is_complex() && other_buffer.is_complex() ) )
+		{
+			return false;
+		}
+
+		// batch_size of the data must be the same
+		if( batch_size() != other_buffer.batch_size() )
+		{
+			return false;
+		}
+
+		// dimensionality of the data must be the same
+		if( number_of_dimensions() != other_buffer.number_of_dimensions() )
+		{
+			return false;
+		}
+
+		// size of each dimension must be the same
+		for( size_t i = 0; i < number_of_dimensions(); ++i )
+		{
+			if( length(i) != other_buffer.length(i)) return false;
+		}
+
+		size_t number_deaths = 0;
+		number_deaths += buffer_mismatches( other_buffer, comparison_type);
+
+		if( number_deaths == 0 ) return true;
+		else return false;
+	}
+
+	/*****************************************************/
+	bool operator!=( buffer<T> & other_buffer )
+	{
+		return !( *this == other_buffer );
+	}
+
+	/*****************************************************/
+	// strides and distance are those of the output (that is, the new hermitian buffer)
+	void change_real_to_hermitian( const size_t* strides_in, const size_t distance_in )
+	{
+		if( !is_real() || !is_in_place() )
+		{
+			throw std::runtime_error( "can only change a real buffer used in an in-place transform to a hermitian one" );
+		}
+
+		// we currently only support hermitian interleaved for in-place transforms
+		_layout = layout::hermitian_interleaved;
+		adjust_length_x_for_hermitian_buffers();
+		initialize_strides(strides_in);
+		initialize_distance(distance_in);
+	}
+
+	/*****************************************************/
+	// strides and distance are those of the output (that is, the new real buffer)
+	void change_hermitian_to_real( const size_t* strides_in, const size_t distance_in )
+	{
+		// we currently only support hermitian interleaved for in-place transforms
+		if( _layout != layout::hermitian_interleaved || !is_in_place() )
+		{
+			throw std::runtime_error( "can only change a hermitian interleaved buffer used in an in-place transform to a real one" );
+		}
+
+		_layout = layout::real;
+		_lengths[dimx] = _requested_length_x;
+		initialize_strides(strides_in);
+		initialize_distance(distance_in);
+	}
+
+	/*****************************************************/
+	bool is_real()
+	{
+		return _layout == layout::real;
+	}
+
+	/*****************************************************/
+	bool is_complex()
+	{
+		return _layout == layout::complex_interleaved || _layout == layout::complex_planar;
+	}
+
+	/*****************************************************/
+	bool is_hermitian()
+	{
+		return _layout == layout::hermitian_interleaved || _layout == layout::hermitian_planar;
+	}
+
+	/*****************************************************/
+	bool is_planar()
+	{
+		return _layout == layout::complex_planar || _layout == layout::hermitian_planar;
+	}
+
+	/*****************************************************/
+	bool is_interleaved()
+	{
+		return _layout == layout::complex_interleaved || _layout == layout::hermitian_interleaved;
+	}
+
+	/*****************************************************/
+	bool is_in_place()
+	{
+		if( _placeness == CLFFT_INPLACE ) return true;
+		else if( _placeness == CLFFT_OUTOFPLACE) return false;
+		else throw std::runtime_error( "invalid placeness value in is_in_place()" );
+	}
+
+	/*****************************************************/
+	T* interleaved_ptr()
+	{
+		if( is_interleaved() )
+			return _the_buffers[interleaved].ptr();
+		else
+			throw std::runtime_error( "interleaved_ptr() is only available on interleaved buffers" );
+	}
+
+	/*****************************************************/
+	T* real_ptr()
+	{
+		if( is_planar() || is_real() )
+			return _the_buffers[re].ptr();
+		else
+			throw std::runtime_error( "real() is only available on real and planar buffers" );
+	}
+
+	/*****************************************************/
+	T* imag_ptr()
+	{
+		if( is_planar() )
+			return _the_buffers[im].ptr();
+		else
+			throw std::runtime_error( "imag_ptr() is only available on planar buffers" );
+	}
+
+	/*****************************************************/
+	T real( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0 )
+	{
+		size_t this_index = index( x, y, z, batch );
+
+		// all layouts will have a real component
+		// using [re] will catch the real component for
+		// layout::interleaved as well
+		T this_value = _the_buffers[re][this_index];
+		return this_value;
+	}
+
+	/*****************************************************/
+	T imag( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0 )
+	{
+		size_t this_index = index( x, y, z, batch );
+
+		if( is_real() )
+			throw std::runtime_error( "imag() is not available for this real buffer" );
+		else if( is_planar() )
+			return _the_buffers[im][this_index];
+		else if( is_interleaved() )
+			// index always points to the real component of an interleaved number
+			// the following memory location is the imaginary component
+			return _the_buffers[interleaved][this_index + 1];
+		else
+			throw std::runtime_error( "invalid layout type in imag()" );
+	}
+
+	/*****************************************************/
+	std::complex<T> complex( const size_t x, const size_t y=0, const size_t z=0, const size_t batch=0 )
+	{
+		if( is_real() )
+			throw std::runtime_error( "complex() is not available for this real buffer" );
+		else if( is_complex() || is_hermitian() )
+		{
+			std::complex<T> this_complex( real( x, y, z, batch ), imag( x, y, z, batch ) );
+			return this_complex;
+		}
+		else
+			throw std::runtime_error( "invalid layout type in complex()" );
+	}
+
+	/*****************************************************/
+	size_t number_of_dimensions()
+	{
+		return _number_of_dimensions;
+	}
+
+	/*****************************************************/
+	size_t number_of_data_points_single_batch()
+	{
+		size_t number_of_points = 1;
+		for( size_t i = 0; i < _number_of_dimensions; ++i )
+		{
+			number_of_points *= length(i);
+		}
+		return number_of_points;
+	}
+
+	/*****************************************************/
+	size_t number_of_data_points()
+	{
+		return number_of_data_points_single_batch() * batch_size();
+	}
+
+	/*****************************************************/
+	// note that this returns the size in number of points and
+	// does not take layout into consideration. this will yield
+	// the same number for real, interleaved, and planar layouts.
+	// whomever uses this information will need to know if they
+	// want 1x buffer of this size (real), 2x buffer of this
+	// size (planar), or 1x double-wide buffer (interleaved)
+	size_t total_number_of_points_including_data_and_intervening()
+	{
+		return distance() * batch_size();
+	}
+
+	/*****************************************************/
+	// note that this will return the size of ONE BUFFER in bytes
+	// for real and interleaved, that doesn't change anything
+	// for planar, you will get the size of the real _or_ the imaginary
+	//			(which should always be the same)
+	size_t size_in_bytes()
+	{
+		return _the_buffers[0].size_in_bytes();
+	}
+
+	/*****************************************************/
+	size_t length(size_t dim)
+	{
+		return _lengths[dim];
+	}
+
+	/*****************************************************/
+	size_t stride(size_t dim)
+	{
+		return _strides[dim];
+	}
+
+	/*****************************************************/
+	size_t* lengths()
+	{
+		return &_lengths[0];
+	}
+
+	/*****************************************************/
+	size_t* strides()
+	{
+		return &_strides[0];
+	}
+
+	/*****************************************************/
+	size_t batch_size()
+	{
+		return _batch_size;
+	}
+
+	/*****************************************************/
+	size_t distance()
+	{
+		return _distance;
+	}
+
+	/*****************************************************/
+	void clear()
+	{
+		// for all batches
+
+		if( is_real() )
+			set_all_to_value( 0.0f );
+		else
+			set_all_to_value( 0.0f, 0.0f );
+	}
+
+	/*****************************************************/
+	void set_one_data_point( T real, const size_t x, const size_t y, const size_t z, const size_t batch )
+	{
+		if( is_real() )
+		{
+			T* base_ptr = _the_buffers[re].ptr();
+			size_t real_index = index(x, y, z, batch);
+
+			*( base_ptr + real_index ) = real;
+		}
+		else
+			throw std::runtime_error( "attempting to use real data point setter for complex or hermitian buffer" );
+	}
+
+	/*****************************************************/
+	void set_one_data_point( T real, T imag, const size_t x, const size_t y, const size_t z, const size_t batch )
+	{
+		if( is_real() )
+			throw std::runtime_error( "attempting to use complex data point setter for real buffer" );
+		else if( is_interleaved() )
+		{
+			T* base_ptr = _the_buffers[interleaved].ptr();
+			size_t real_index = index(x, y, z, batch);
+			size_t imag_index = real_index + 1; // the imaginary component immediately follows the real
+
+			*( base_ptr + real_index ) = real;
+			*( base_ptr + imag_index ) = imag;
+		}
+		else // planar
+		{
+			T* real_ptr = _the_buffers[re].ptr();
+			T* imag_ptr = _the_buffers[im].ptr();
+			size_t the_index = index(x, y, z, batch);
+
+			*( real_ptr + the_index ) = real;
+			*( imag_ptr + the_index ) = imag;
+		}
+	}
+
+	/*****************************************************/
+	void set_all_to_value( T real )
+	{
+		// for all batches
+
+		for( size_t batch = 0; batch < batch_size(); batch++ ) {
+			for( size_t z = 0; z < length(dimz); z++ ) {
+				for( size_t y = 0; y < length(dimy); y++ ) {
+					for( size_t x = 0; x < length(dimx); x++ ) {
+						set_one_data_point( real, x, y, z, batch );
+					}
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	void set_all_to_value( T real, T imag )
+	{
+		// for all batches
+
+		for( size_t batch = 0; batch < batch_size(); batch++ ) {
+			for( size_t z = 0; z < length(dimz); z++ ) {
+				for( size_t y = 0; y < length(dimy); y++ ) {
+					for( size_t x = 0; x < length(dimx); x++ ) {
+						set_one_data_point( real, imag, x, y, z, batch );
+					}
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	void set_all_to_linear_increase()
+	{
+		// for all batches
+
+		size_t val = 1;
+		for( size_t batch = 0; batch < batch_size(); batch++ ) {
+			for( size_t z = 0; z < length(dimz); z++ ) {
+				for( size_t y = 0; y < length(dimy); y++ ) {
+					for( size_t x = 0; x < length(dimx); x++ ) {
+						if( is_real() )
+						{
+							set_one_data_point( static_cast<T>(val), x, y, z, batch );
+						}
+
+						else
+						{
+							set_one_data_point( static_cast<T>(val), static_cast<T>(val) + 0.5f, x, y, z, batch );
+						}
+
+						++val;
+					}
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	void set_all_to_sawtooth( T amplitude )
+	{
+		// for all batches
+
+		for( size_t batch = 0; batch < batch_size(); batch++ )
+		{
+			for( size_t z = 0; z < length(dimz); z++ )
+			{
+				for( size_t y = 0; y < length(dimy); y++ )
+				{
+					// waveform will be 1 period of sawtooth
+					size_t number_of_points_in_one_period = length(dimx);
+					size_t number_of_points_on_one_line = number_of_points_in_one_period / 2;
+
+					// the sawtooth will start at 0 and increase to amplitude at T/2
+					// at T/2, value will change to -amplitude and increase back up to 0 at T
+					// if there are an odd number of points in the whole period,
+					// we'll make a stop at 0 in the middle of the jump
+					T value = 0.0f;
+					T per_point_delta = amplitude / (number_of_points_on_one_line - 1);
+
+					for( size_t x = 0; x < number_of_points_in_one_period; x++) {
+						if( is_real() )
+						{
+							set_one_data_point( value, x, y, z, batch);
+						}
+						else
+						{
+							// for the real value, we want the sawtooth as described above
+							// for the imaginary value, we want the 2 times the inverse
+							//		(so that real and imaginary don't match, possibly obscuring errors)
+							set_one_data_point( value, -2.0f * value, x, y, z, batch);
+						}
+
+						// if we're at T/2, we want to saw on down to the negative amplitude . . .
+						if( floats_are_about_equal( value, amplitude ) )
+						{
+							if( number_of_points_in_one_period % 2 != 0 ) // odd, we need to add the 0
+							{
+								x++;
+								if( is_real() )
+								{
+									set_one_data_point( 0.0f, x, y, z, batch);
+								}
+								else
+								{
+									set_one_data_point( 0.0f, 0.0f, x, y, z, batch);
+								}
+							}
+							value = -1 * amplitude;
+						}
+						// . . . otherwise, keep going up
+						else value += per_point_delta;
+					}
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	void set_all_to_random_data( size_t max_value, size_t seed ) {
+		// for all batches
+
+		boost::mt19937 random_data_generator;
+		boost::uniform_int<> distribution(1, INT_MAX);
+		boost::variate_generator<boost::mt19937&, boost::uniform_int<> >
+			random_value(random_data_generator, distribution);
+		random_data_generator.seed( static_cast<boost::uint32_t>( seed ) );
+
+		for( size_t batch = 0; batch < batch_size(); batch++) {
+			for( size_t z = 0; z < length(dimz); z++) {
+				for( size_t y = 0; y < length(dimy); y++) {
+					for( size_t x = 0; x < length(dimx); x++) {
+						int val = random_value() % (max_value + 1); // pluck a random value
+						if( random_value() % 2 ) val *= -1; // make it negative about 50% of the time
+
+						if( is_real() )
+						{
+							set_one_data_point( static_cast<T>(val), x, y, z, batch );
+						}
+
+						else
+						{
+							set_one_data_point( static_cast<T>(val), static_cast<T>(val), x, y, z, batch );
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	void set_all_to_impulse()
+	{
+		// for all batches
+		clear();
+
+		for( size_t batch = 0; batch < batch_size(); batch++ )
+		{
+			if( is_real() )
+				set_one_data_point( static_cast<T>(number_of_data_points_single_batch()), 0, 0, 0, batch);
+			else
+				set_one_data_point( static_cast<T>(number_of_data_points_single_batch()), 0.0f, 0, 0, 0, batch);
+		}
+	}
+
+
+
+	/*****************************************************/
+	void scale_data( T scale) {
+		// for all batches
+
+		for( size_t batch = 0; batch < batch_size(); batch++ )
+		{
+			for( size_t z = 0; z < length(dimz); z++ )
+			{
+				for( size_t y = 0; y < length(dimy); y++ )
+				{
+					for( size_t x = 0; x < length(dimx); x++ )
+					{
+						if( is_real() )
+						{
+							T this_value = real(x, y, z, batch);
+							T scaled_value = this_value * scale;
+							set_one_data_point( scaled_value, x, y, z, batch );
+						}
+						else
+						{
+							T this_real = real(x, y, z, batch);
+							T this_imag = imag(x, y, z, batch);
+
+							T scaled_real = this_real * scale;
+							T scaled_imag = this_imag * scale;
+							set_one_data_point( scaled_real, scaled_imag, x, y, z, batch );
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/*****************************************************/
+	void make_sure_padding_was_not_overwritten()
+	{
+		// check before and after memory first
+		for( size_t i = 0; i < _the_buffers.size(); i++ )
+		{
+			_the_buffers[i].check_memory_boundaries();
+		}
+
+		if( _tightly_packed_strides && _tightly_packed_distance) return; // nothing worth checking
+
+		size_t intervening_point_touched = 0;
+
+		for( size_t batch = 0; batch < batch_size(); batch++)
+		{
+			for( size_t z = 0; z < length(dimz); z++)
+			{
+				for( size_t y = 0; y < length(dimy); y++)
+				{
+					for( size_t x = 0; x < length(dimx); x++)
+					{
+						size_t this_point = index(x, y, z, batch);
+						size_t next_point = next_index(x, y, z, batch);
+
+						if( is_planar() )
+						{
+							if( this_point < _the_buffers[re].size() && this_point + 1 != next_point)
+							{
+								for( size_t i = this_point+1; i < next_point; i++)
+								{
+									T this_real = _the_buffers[re][i];
+									T this_imag = _the_buffers[im][i];
+
+									if( nan_as_hex(this_real) != float_as_hex(this_real)
+										|| nan_as_hex(this_imag) != float_as_hex(this_imag) )
+									{
+										++intervening_point_touched;
+									}
+								}
+							}
+						}
+						else if( is_real() )
+						{
+							if( this_point < _the_buffers[re].size() && this_point + 1 != next_point)
+							{
+								for( size_t i = this_point+1; i < next_point; i++)
+								{
+									T this_real = _the_buffers[re][i];
+
+									if( nan_as_hex(this_real) != float_as_hex(this_real) )
+									{
+										++intervening_point_touched;
+									}
+								}
+							}
+						}
+						else if( is_interleaved() )
+						{
+							if( this_point < _the_buffers[re].size() && this_point + 1 != next_point)
+							{
+								// NOTE whereas real and planar initialize i = this_point+1,
+								// we want this_point+2 for interleaved so that we skip the
+								// imaginary value of the point
+								for( size_t i = this_point+2; i < next_point; i++)
+								{
+									T this_real = _the_buffers[interleaved][i];
+
+									if( nan_as_hex(this_real) != float_as_hex(this_real) )
+									{
+										++intervening_point_touched;
+									}
+								}
+							}
+						}
+						else
+							throw std::runtime_error( "invalid layout in make_sure_memory_between_data_points_was_not_touched()" );
+					}
+				}
+			}
+		}
+
+		EXPECT_EQ( 0, intervening_point_touched );
+	}
+};
+
+#endif
diff --git a/src/tests/buffer_memory.cpp b/src/tests/buffer_memory.cpp
new file mode 100644
index 00000000..db0d949b
--- /dev/null
+++ b/src/tests/buffer_memory.cpp
@@ -0,0 +1,44 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <stdint.h>
+
+/*****************************************************/
+/*****************************************************/
+uint32_t float_as_hex( float a ) {
+	return *(uint32_t*)&a;
+}
+
+/*****************************************************/
+/*****************************************************/
+uint64_t float_as_hex( double a ) {
+	return *(uint64_t*)&a;
+}
+
+/*****************************************************/
+/*****************************************************/
+uint32_t nan_as_hex( float a ) {
+	a;
+	return ~0x0;
+}
+
+/*****************************************************/
+/*****************************************************/
+uint64_t nan_as_hex( double a ) {
+	a;
+	return ~0x0ull;
+}
\ No newline at end of file
diff --git a/src/tests/buffer_memory.h b/src/tests/buffer_memory.h
new file mode 100644
index 00000000..7914ed10
--- /dev/null
+++ b/src/tests/buffer_memory.h
@@ -0,0 +1,138 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_BUFFER_MEMORY_H )
+#define CLFFT_BUFFER_MEMORY_H
+
+#include <vector>
+#include <stdexcept>
+#include <stdint.h>
+
+uint32_t float_as_hex( float a );
+uint64_t float_as_hex( double a );
+uint32_t nan_as_hex( float a );
+uint64_t nan_as_hex( double a );
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+template <class T>
+class buffer_memory {
+private:
+	// Each array will have a cookie of this size placed before and after it.
+	// We will initialize the cookies to NaN.
+	// The user can confirm the cookies after operating on the data to confirm that
+	// his or her operations are respecting the boundaries of the memory.
+	size_t cookie_size;
+
+	// requested_floats is the number of floats the user requested originally.
+	// This never changes, even if the memory size is increased.
+	size_t requested_floats;
+
+	// With this and cookie_size, we can calculate the size of memory the user can access.
+	// Note that this will be in units of T (so 4 bytes or 8 bytes depending on float or double).
+	size_t memory_size_including_cookies;
+
+	// Interesting stuff goes here.
+	std::vector<T> memory;
+
+public:
+	 /*****************************************************/
+	 // requested_number_of_floats should already take into account any strides,
+	 // batch size, data layout (real, complex, hermitian, interleaved, planar)
+	buffer_memory( size_t requested_number_of_floats )
+		: cookie_size( 4 )
+		, requested_floats( requested_number_of_floats )
+		, memory_size_including_cookies( requested_number_of_floats + 2 * cookie_size )
+		, memory( memory_size_including_cookies )
+	{
+		clear();
+	}
+
+	 /*****************************************************/
+	~buffer_memory() {
+	}
+
+	 /*****************************************************/
+	buffer_memory<T> & operator=( const buffer_memory<T> & that )
+	{
+		this->cookie_size = that.cookie_size;
+		this->requested_floats = that.requested_floats;
+		this->memory_size_including_cookies = that.memory_size_including_cookies;
+		this->memory = that.memory;
+
+		return *this;
+	}
+
+	 /*****************************************************/
+	void check_memory_boundaries() {
+		for( size_t i = 0; i < cookie_size; ++i) {
+			// we need to compare hex values instead of float values so that we don't get float ambiguities
+			if( float_as_hex(memory[i]) != nan_as_hex(memory[0]) ||
+				float_as_hex( memory[ memory.size()-1-i ] ) != nan_as_hex(memory[0]) )
+			 throw std::runtime_error("some operation wrote beyond bounds of memory");
+		}
+	}
+
+	 /*****************************************************/
+	void clear()
+	{
+		memset(&memory[0], ~0x0, memory_size_including_cookies * sizeof(T));
+	}
+
+	 /*****************************************************/
+	 // note that this is in units of T (float or double)
+	 // also see: size_in_bytes()
+	size_t size()
+	{
+		return size_in_bytes() / sizeof(T);
+	}
+
+	 /*****************************************************/
+	 // returns the amount of memory currently allocated to the buffer in bytes
+	size_t size_in_bytes()
+	{
+		return (memory_size_including_cookies - 2 * cookie_size) * sizeof(T);
+	}
+
+	 /*****************************************************/
+	 // N.B. memory will be cleared after this
+	void increase_allocated_memory( size_t amount )
+	{
+		size_t new_memory_size = memory_size_including_cookies + amount;
+
+		memory.resize( new_memory_size );
+		memory_size_including_cookies = new_memory_size;
+
+		clear();
+	}
+
+	 /*****************************************************/
+	T* ptr()
+	{
+		return &memory[0] + cookie_size;
+	}
+
+	 /*****************************************************/
+	T& operator[]( size_t index ) {
+		if( index >= size() )
+			throw std::runtime_error( "operator[] write out of bounds" );
+		return memory[0 + cookie_size + index];
+	}
+};
+
+#endif
diff --git a/src/tests/c-compliance.c b/src/tests/c-compliance.c
new file mode 100644
index 00000000..04c0ff5e
--- /dev/null
+++ b/src/tests/c-compliance.c
@@ -0,0 +1,18 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include "clFFT.h"
diff --git a/src/tests/cl_transform.h b/src/tests/cl_transform.h
new file mode 100644
index 00000000..2c2036ce
--- /dev/null
+++ b/src/tests/cl_transform.h
@@ -0,0 +1,1026 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_CLTRANSFORM_H )
+#define CLFFT_CLTRANSFORM_H
+
+#include <iostream>
+#include <vector>
+#include "clFFT.h"
+#include "../library/private.h"
+#include "../client/openCL.misc.h"
+#include "buffer.h"
+#include "test_constants.h"
+
+//	Custom deleter functions for our unique_ptr smart pointer class
+struct clMem_deleter
+{
+	template <class T> void operator()(T* clMemObj)
+	{
+		if( clMemObj != NULL )
+			OPENCL_V_THROW( ::clReleaseMemObject( clMemObj ), "Error: In clReleaseMemObject\n" );
+	};
+};
+
+struct plan_handle_deleter
+{
+	template <class T> void operator()(T* handle)
+	{
+		if( *handle )
+		{
+			clfftDestroyPlan( handle );
+		}
+		clfftTeardown( ); // when multi-GPU tests are written, this will need to occur in the gtest cleanup
+	};
+};
+
+struct clEvent_deleter
+{
+	template <class T> void operator()(T* clEventObj)
+	{
+		if( clEventObj != NULL )
+			OPENCL_V_THROW( clReleaseEvent( clEventObj ), "Error: In clReleaseEvent\n" );
+	};
+};
+
+struct clCommQueue_deleter
+{
+	template <class T> void operator()(T* clQueueObj)
+	{
+		if( clQueueObj != NULL )
+			OPENCL_V_THROW( clReleaseCommandQueue( clQueueObj ), "Error: In clReleaseCommandQueue\n" );
+	};
+};
+
+struct clContext_deleter
+{
+	template <class T> void operator()(T* clContextObj)
+	{
+		if( clContextObj != NULL )
+			OPENCL_V_THROW( clReleaseContext( clContextObj ), "Error: In clReleaseContext\n" );
+	};
+};
+
+template <class T>
+class Precision_Setter
+{
+public:
+    Precision_Setter(clfftPlanHandle plan_handle)
+    {
+        throw std::runtime_error("Precision_Setter: this code path should never be executed");
+    }
+
+private:
+    Precision_Setter(){}
+};
+
+template<>
+class Precision_Setter<float>
+{
+public:
+    Precision_Setter(clfftPlanHandle plan_handle)
+    {
+	    EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanPrecision( plan_handle, CLFFT_SINGLE ));
+    }
+
+private:
+    Precision_Setter(){}
+};
+
+template<>
+class Precision_Setter<double>
+{
+public:
+    Precision_Setter(clfftPlanHandle plan_handle)
+    {
+		clfftStatus ret = clfftSetPlanPrecision( plan_handle, CLFFT_DOUBLE );
+
+		//	If device does not support double precision, skip this test, don't fail it
+		if( ret == CLFFT_DEVICE_NO_DOUBLE )
+			throw std::runtime_error("CLFFT_DEVICE_NO_DOUBLE");
+
+		EXPECT_EQ( CLFFT_SUCCESS, ret );
+    }
+
+private:
+    Precision_Setter(){}
+};
+
+ /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+ /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+template <class T, class cl_T>
+class clfft {
+private:
+	clfftLayout _input_layout, _output_layout;
+	clfftResultLocation _placeness;
+
+	buffer<T> input;
+	buffer<T> output;
+
+	size_t number_of_data_points;
+	T _forward_scale, _backward_scale;
+	cl_uint commandQueueFlags;
+	bool init_failure;
+	bool dataset_too_large;
+
+	cl_device_type deviceType;
+	std::unique_ptr< clfftPlanHandle, plan_handle_deleter > plan_handle;
+
+	clfftDirection _transformation_direction;
+	clfftDim dimension;
+
+	std::vector<size_t> lengths;
+
+	static const bool printInfo = false;
+
+	//	OpenCL resources that need to be carefully managed
+	std::unique_ptr< _cl_context, clContext_deleter > context;
+	std::unique_ptr< _cl_command_queue, clCommQueue_deleter > queue;
+	std::unique_ptr< _cl_event, clEvent_deleter > an_event;
+	std::vector< std::unique_ptr< _cl_mem, clMem_deleter > > cl_mem_input;
+	std::vector< std::unique_ptr< _cl_mem, clMem_deleter > > cl_mem_output;
+	std::vector< cl_device_id >	device_id;
+public:
+	/*****************************************************/
+	clfft(  const clfftDim dimensions_in, const size_t* lengths_in,
+			const size_t* input_strides_in, const size_t* output_strides_in,
+			const size_t batch_size_in,
+			const size_t input_distance_in, const size_t output_distance_in,
+			const clfftLayout input_layout_in, const clfftLayout output_layout_in,
+			const clfftResultLocation placeness_in )
+		try
+		: _input_layout( input_layout_in )
+		, _output_layout( output_layout_in )
+		, _placeness( placeness_in )
+		, input( 	static_cast<size_t>(dimensions_in),
+					lengths_in,
+					input_strides_in,
+					batch_size_in,
+					input_distance_in,
+					cl_layout_to_buffer_layout( _input_layout ),
+					_placeness
+				)
+		, output(	static_cast<size_t>(dimensions_in),
+					lengths_in,
+					output_strides_in,
+					batch_size_in,
+					output_distance_in,
+					cl_layout_to_buffer_layout( _output_layout ),
+					_placeness
+				)
+		, number_of_data_points( input.number_of_data_points())
+		, _forward_scale( 1.0f )
+		, _backward_scale( 1.0f/T(number_of_data_points) )
+		, commandQueueFlags( 0 )
+		, init_failure( false )
+		, dataset_too_large( false )
+		, deviceType( 0 )
+		, plan_handle( new clfftPlanHandle )
+		, _transformation_direction( ENDDIRECTION )
+		, dimension( dimensions_in )
+
+	{
+		if( _placeness == CLFFT_INPLACE )
+		{
+			if( ( is_real( _input_layout ) && is_planar( _output_layout ) ) ||
+				( is_planar( _input_layout ) && is_real( _output_layout ) ) )
+			{
+				throw std::runtime_error( "in-place transforms may not be real<->planar" );
+			}
+		}
+
+		*plan_handle = 0;
+		clfftSetupData setupData;
+		clfftInitSetupData( &setupData );
+		clfftSetup( &setupData );
+
+		for( int i = 0; i < max_dimension; i++ )
+		{
+			if( i < dimension )
+				lengths.push_back( lengths_in[i] );
+			else
+				lengths.push_back( 1 );
+		}
+
+		initialize_openCL();
+		initialize_plan();
+	}
+	catch( const std::exception& ) {
+		throw;
+	}
+
+	/*****************************************************/
+	~clfft()
+	{}
+
+	/*****************************************************/
+	bool is_real( const clfftLayout layout )
+	{
+		return layout == CLFFT_REAL;
+	}
+
+	/*****************************************************/
+	bool is_planar( const clfftLayout layout )
+	{
+		return (layout == CLFFT_COMPLEX_PLANAR || layout == CLFFT_HERMITIAN_PLANAR);
+	}
+
+	/*****************************************************/
+	bool is_interleaved( const clfftLayout layout )
+	{
+		return (layout == CLFFT_COMPLEX_INTERLEAVED || layout == CLFFT_HERMITIAN_INTERLEAVED);
+	}
+
+	/*****************************************************/
+	bool is_complex( const clfftLayout layout )
+	{
+		return (layout == CLFFT_COMPLEX_INTERLEAVED || layout == CLFFT_COMPLEX_PLANAR);
+	}
+
+	/*****************************************************/
+	bool is_hermitian( const clfftLayout layout )
+	{
+		return (layout == CLFFT_HERMITIAN_INTERLEAVED || layout == CLFFT_HERMITIAN_PLANAR);
+	}
+
+	/*****************************************************/
+	void initialize_openCL() {
+		try
+		{
+			cl_context tempContext = NULL;
+			device_id = initializeCL(
+				device_type,
+				device_gpu_list,
+				tempContext,
+				printInfo
+			);
+			context = std::unique_ptr< _cl_context, clContext_deleter >( tempContext );
+
+			if( input.size_in_bytes() > cl_device_max_memory_to_allocate(0) ||
+				output.size_in_bytes() > cl_device_max_memory_to_allocate(0))
+			{
+				throw std::runtime_error("problem too large for device");
+			}
+
+			cl_int status = 0;
+			queue	= std::unique_ptr< _cl_command_queue, clCommQueue_deleter >(
+					::clCreateCommandQueue( context.get( ), device_id[ 0 ], commandQueueFlags, &status ) );
+			OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" );
+
+			// make the new buffer
+			const size_t bufferSizeBytes = input.size_in_bytes( );
+
+			for( cl_int i = 0; i < CLFFT_COMPLEX_INTERLEAVED; ++i )
+			{
+				cl_int status = 0;
+				std::unique_ptr< _cl_mem, clMem_deleter > inBuff(
+						::clCreateBuffer( context.get( ), CL_MEM_READ_WRITE, bufferSizeBytes, NULL, &status) );
+				OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
+
+				cl_mem_input.push_back( std::move( inBuff ) );
+
+				std::unique_ptr< _cl_mem, clMem_deleter > outBuff(
+						::clCreateBuffer( context.get( ), CL_MEM_READ_WRITE, bufferSizeBytes, NULL, &status) );
+				OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
+
+				cl_mem_output.push_back( std::move( outBuff ) );
+			}
+		}
+		catch( const std::exception& )
+		{
+			throw;
+		}
+	}
+
+	/*****************************************************/
+	void initialize_plan()
+	{
+		EXPECT_EQ( CLFFT_SUCCESS, clfftCreateDefaultPlan( plan_handle.get(), context.get( ), dimension, &lengths[0] ) );
+		set_layouts( _input_layout, _output_layout );
+		placeness( _placeness );
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( *plan_handle, dimension, input.strides()));
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( *plan_handle, dimension, output.strides()));
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( *plan_handle, input.batch_size()));
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( *plan_handle, input.distance(), output.distance()));
+		Precision_Setter<T> setter(*plan_handle);
+	}
+
+	/*****************************************************/
+	std::string input_strides_plaintext()
+	{
+		size_t strides[3];
+		clfftGetPlanInStride( *plan_handle, dimension, &strides[0] );
+
+		std::ostringstream my_strides_stream;
+
+		for( int i = 0; i < dimension; i++ )
+			my_strides_stream << strides[i] << " ";
+
+		std::string my_strides( my_strides_stream.str() );
+		my_strides.erase( my_strides.end() - 1 ); // chomp off trailing space
+
+		return my_strides;
+	}
+
+	/*****************************************************/
+	std::string output_strides_plaintext()
+	{
+		size_t strides[3];
+		clfftGetPlanOutStride( *plan_handle, dimension, &strides[0] );
+
+		std::ostringstream my_strides_stream;
+
+		for( int i = 0; i < dimension; i++ )
+			my_strides_stream << strides[i] << " ";
+
+		std::string my_strides( my_strides_stream.str() );
+		my_strides.erase( my_strides.end() - 1 ); // chomp off trailing space
+
+		return my_strides;
+	}
+
+	/*****************************************************/
+	std::string lengths_plaintext()
+	{
+		size_t lengths[3];
+		clfftGetPlanLength( *plan_handle, dimension, &lengths[0] );
+
+		std::ostringstream my_lengths_stream;
+
+		for( int i = 0; i < dimension; i++ )
+			my_lengths_stream << lengths[i] << " ";
+
+		std::string my_lengths( my_lengths_stream.str() );
+		my_lengths.erase( my_lengths.end() - 1 ); // chomp off trailing space
+
+		return my_lengths;
+	}
+
+	/*****************************************************/
+	std::string layout_plaintext( clfftLayout layout )
+	{
+		switch( layout )
+		{
+		case CLFFT_REAL:
+			return "real";
+		case CLFFT_HERMITIAN_INTERLEAVED:
+			return "hermitian interleaved";
+		case CLFFT_HERMITIAN_PLANAR:
+			return "hermitian planar";
+		case CLFFT_COMPLEX_INTERLEAVED:
+			return "complex interleaved";
+		case CLFFT_COMPLEX_PLANAR:
+			return "complex planar";
+		default:
+			throw std::runtime_error( "invalid layout in layout_plaintext()" );
+		}
+	}
+
+	/*****************************************************/
+	void refresh_plan()
+	{
+		clfftDestroyPlan(plan_handle.get());
+		initialize_plan();
+	}
+
+	/*****************************************************/
+	layout::buffer_layout_t cl_layout_to_buffer_layout( clfftLayout cl_layout )
+	{
+		if( cl_layout == CLFFT_REAL )
+			return layout::real;
+		else if( cl_layout == CLFFT_HERMITIAN_PLANAR )
+			return layout::hermitian_planar;
+		else if( cl_layout == CLFFT_COMPLEX_PLANAR )
+			return layout::complex_planar;
+		else if( cl_layout == CLFFT_HERMITIAN_INTERLEAVED )
+			return layout::hermitian_interleaved;
+		else if( cl_layout == CLFFT_COMPLEX_INTERLEAVED )
+			return layout::complex_interleaved;
+		else
+			throw std::runtime_error( "invalid cl_layout" );
+	}
+
+	/*****************************************************/
+	void verbose_output()
+	{
+		if(verbose)
+		{
+			std::cout << "transform parameters as seen by clfft:" << std::endl;
+
+			clfftDim dim;
+			cl_uint dimensions;
+			clfftGetPlanDim( *plan_handle, &dim, &dimensions );
+
+			std::cout << dimensions << " dimension(s): " << lengths_plaintext() << std::endl;
+
+			size_t batch;
+			clfftGetPlanBatchSize( *plan_handle, &batch );
+			std::cout << "batch: " << batch << std::endl;
+
+			clfftPrecision precision;
+			clfftGetPlanPrecision( *plan_handle, &precision );
+			if( precision == CLFFT_SINGLE ) std::cout << "single precision" << std::endl;
+			else if( precision == CLFFT_DOUBLE ) std::cout << "double precision" << std::endl;
+			else throw std::runtime_error( "can't figure out the precision in verbose_output()" );
+
+			if( placeness() == CLFFT_INPLACE ) std::cout << "in-place" << std::endl;
+			else std::cout << "out-of-place" << std::endl;
+
+			get_layouts();
+			std::cout << layout_plaintext(_input_layout) << " -> " << layout_plaintext(_output_layout) << std::endl;
+
+			std::cout << "input stride(s): " << input_strides_plaintext() << std::endl;
+			std::cout << "output stride(s): " << output_strides_plaintext() << std::endl;
+
+			size_t input_distance, output_distance;
+			clfftGetPlanDistance( *plan_handle, &input_distance, &output_distance );
+			std::cout << "input distance: " << input_distance << std::endl;
+			std::cout << "output distance: " << output_distance << std::endl;
+		}
+	}
+
+	/*****************************************************/
+	clfftResultLocation placeness() {
+		clfftResultLocation res;
+		EXPECT_EQ( CLFFT_SUCCESS, clfftGetResultLocation( *plan_handle, &res ) );
+		return res;
+	}
+
+	/*****************************************************/
+	void set_forward_transform() {
+		_transformation_direction = CLFFT_FORWARD;
+	}
+
+	/*****************************************************/
+	void set_backward_transform() {
+		_transformation_direction = CLFFT_BACKWARD;
+	}
+
+	/*****************************************************/
+	void set_transposed() {
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanTransposeResult( *plan_handle, CLFFT_TRANSPOSED ) );
+	}
+
+	/*****************************************************/
+	void set_layouts( clfftLayout new_input_layout, clfftLayout new_output_layout )
+	{
+		cl_mem_input.clear( );
+		cl_mem_output.clear( );
+
+		// make the new input buffer
+		const size_t input_buffer_size_in_bytes = input.size_in_bytes();
+
+		size_t number_of_input_buffers;
+
+		if( is_planar( new_input_layout ) )
+			number_of_input_buffers = 2;
+		else if( is_real( new_input_layout ) || is_interleaved( new_input_layout ) )
+			number_of_input_buffers = 1;
+		else
+			throw std::runtime_error( "we shouldn't make it here [set_layouts(), input]" );
+
+		for( size_t i = 0; i < number_of_input_buffers; ++i )
+		{
+			cl_int status = 0;
+			std::unique_ptr< _cl_mem, clMem_deleter > buff(
+				::clCreateBuffer( context.get( ), CL_MEM_READ_WRITE, input_buffer_size_in_bytes, NULL, &status) );
+			OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
+
+			cl_mem_input.push_back( std::move( buff ) );
+		}
+
+		// make the new output buffer
+		const size_t output_buffer_size_in_bytes = output.size_in_bytes();
+
+		size_t number_of_output_buffers;
+
+		if( is_planar( new_output_layout ) )
+			number_of_output_buffers = 2;
+		else if( is_real( new_output_layout ) || is_interleaved( new_output_layout ) )
+			number_of_output_buffers = 1;
+		else
+			throw std::runtime_error( "we shouldn't make it here [set_layouts(), input]" );
+
+		for( size_t i = 0; i < number_of_output_buffers; ++i )
+		{
+			cl_int status = 0;
+			std::unique_ptr< _cl_mem, clMem_deleter > buff(
+				::clCreateBuffer( context.get( ), CL_MEM_READ_WRITE, output_buffer_size_in_bytes, NULL, &status) );
+			OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
+
+			cl_mem_output.push_back( std::move( buff ) );
+		}
+
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( *plan_handle, new_input_layout, new_output_layout ) );
+		get_layouts();
+	}
+
+	/*****************************************************/
+	// swap_layouts should only be used with in-place real-to-complex or complex-to-real transforms
+	void swap_layouts()
+	{
+		get_layouts();
+		clfftLayout new_input_layout = _output_layout;
+		clfftLayout new_output_layout = _input_layout;
+
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( *plan_handle, new_input_layout, new_output_layout ) );
+		get_layouts();
+
+		refresh_plan();
+	}
+
+	/*****************************************************/
+	clfftLayout input_layout() {
+		get_layouts();
+		return _input_layout;
+	}
+
+	/*****************************************************/
+	clfftLayout output_layout() {
+		get_layouts();
+		return _output_layout;
+	}
+
+	/*****************************************************/
+	void forward_scale( T in ) {
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( *plan_handle, CLFFT_FORWARD, static_cast<float>( in ) ) );
+		_forward_scale = forward_scale();
+	}
+
+	/*****************************************************/
+	void backward_scale( T in ) {
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( *plan_handle, CLFFT_BACKWARD, static_cast<float>( in ) ) );
+		_backward_scale = backward_scale();
+	}
+
+	/*****************************************************/
+	T forward_scale() {
+		cl_T scale;
+		EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( *plan_handle, CLFFT_FORWARD, reinterpret_cast<cl_float*>(&scale) ));
+		return scale;
+	}
+
+	/*****************************************************/
+	T backward_scale() {
+		cl_T scale;
+		EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( *plan_handle, CLFFT_BACKWARD, reinterpret_cast<cl_float*>(&scale) ));
+		return scale;
+	}
+
+	/*****************************************************/
+	void set_input_to_value( T real )
+	{
+		input.set_all_to_value( real );
+	}
+
+	/*****************************************************/
+	void set_input_to_value( T real, T imag )
+	{
+		input.set_all_to_value( real, imag );
+	}
+
+	/*****************************************************/
+	void set_input_to_sawtooth(T max) {
+		input.set_all_to_sawtooth(max);
+	}
+
+	/*****************************************************/
+	void set_input_to_impulse() {
+		input.set_all_to_impulse();
+	}
+
+	/*****************************************************/
+	// yes, the "super duper global seed" is horrible
+	// alas, i'll have TODO it better later
+	void set_input_to_random()
+	{
+		input.set_all_to_random_data( 10, super_duper_global_seed );
+	}
+
+	/*****************************************************/
+	void set_input_to_buffer( buffer<T> other_buffer ) {
+		input = other_buffer;
+	}
+
+	/*****************************************************/
+	bool device_list_has_devices() {
+		return !device_id.empty();
+	}
+
+	/*****************************************************/
+	// returns true if the memory required for input + output (if applicable) + intermediate (if applicable) buffers
+	// is too large compared with the OpenCL device's memory size
+	bool total_memory_footprint_is_too_large_for_device() {
+		throw_if_device_list_is_empty();
+
+		// In order to call clfftEnqueueTransform, we need to pass naked pointers
+		cl_command_queue tempQueue = queue.get( );
+		cl_event tempEvent = an_event.get( );
+		size_t buffer_size = 0;
+
+		EXPECT_EQ( CLFFT_SUCCESS, clfftBakePlan(*plan_handle, 1, &tempQueue, NULL, NULL ));
+		EXPECT_EQ( CLFFT_SUCCESS, clfftGetTmpBufSize(*plan_handle, &buffer_size ));
+
+		cl_ulong total_memory_size = input.size_in_bytes() + buffer_size;
+
+		// we are only going to include the result space if the transform is out of place
+		if( placeness() == CLFFT_OUTOFPLACE )
+		{
+			total_memory_size += output.size_in_bytes();
+		}
+
+		cl_ulong global_memory_size = cl_device_max_global_memory(0);
+
+		// we don't want to bog down the CPU with ginormous problem sizes
+		// so we chop the global memory way down to keep things manageable
+		if( device_type == CL_DEVICE_TYPE_CPU )
+		{
+			global_memory_size /= 8;
+		}
+
+		return total_memory_size > global_memory_size;
+	}
+
+	/*****************************************************/
+	void throw_if_total_memory_footprint_is_too_large_for_device()
+	{
+		if( total_memory_footprint_is_too_large_for_device() )
+		{
+			throw std::runtime_error("problem too large for device");
+		}
+	}
+
+	/*****************************************************/
+	void throw_if_device_list_is_empty()
+	{
+		if( !device_list_has_devices() ) {
+			throw std::runtime_error("device list is empty at transform");
+		}
+	}
+
+	/*****************************************************/
+	void transform(bool explicit_intermediate_buffer = use_explicit_intermediate_buffer) {
+		verbose_output();
+
+		throw_if_device_list_is_empty();
+
+		cl_int status;
+
+		// In order to call clfftEnqueueTransform, we need to pass naked pointers
+		cl_command_queue tempQueue = queue.get( );
+		cl_event tempEvent = an_event.get( );
+		std::unique_ptr< _cl_mem, clMem_deleter > intermediate_buffer;
+
+		throw_if_total_memory_footprint_is_too_large_for_device();
+
+		write_local_input_buffer_to_gpu();
+		if( placeness() == CLFFT_OUTOFPLACE )
+			write_local_output_buffer_to_gpu();
+
+		try
+		{
+			size_t buffer_size = 0;
+			EXPECT_EQ( CLFFT_SUCCESS, clfftBakePlan(*plan_handle, 1, &tempQueue, NULL, NULL ));
+			EXPECT_EQ( CLFFT_SUCCESS, clfftGetTmpBufSize(*plan_handle, &buffer_size ));
+
+			if( explicit_intermediate_buffer )
+			{
+				// the buffer size is already stashed above
+				// now we want to make the intermediate buffer to pass in (if necessary)
+				if (buffer_size)
+				{
+					// because unique_ptrs are funky, we have to create a temp_buffer
+					// and then std::move it to the intermediate_buffer
+					std::unique_ptr< _cl_mem, clMem_deleter > temp_buffer(
+						::clCreateBuffer( context.get( ),
+								  CL_MEM_READ_WRITE,
+								  buffer_size,
+								  NULL,
+								  &status) );
+					OPENCL_V_THROW( status, "Creating intermediate Buffer ( ::clCreateBuffer() )" );
+
+					intermediate_buffer = std::move( temp_buffer );
+				}
+			}
+
+			cl_mem	tempInput[2];
+			cl_mem	tempOutput[2];
+			for( cl_uint i = 0; i < cl_mem_input.size( ); ++i )
+				tempInput[ i ] = cl_mem_input[ i ].get( );
+
+			for( cl_uint i = 0; i < cl_mem_output.size( ); ++i )
+				tempOutput[ i ] = cl_mem_output[ i ].get( );
+
+			if( buffer_size )
+			{
+				status = clfftEnqueueTransform(*plan_handle,
+								  _transformation_direction,
+								  1,
+								  &tempQueue,
+								  0,
+								  NULL,
+								  &tempEvent,
+								  &tempInput[ 0 ],
+								  &tempOutput[ 0 ],
+								  intermediate_buffer.get() );
+			}
+			else
+			{
+				status = clfftEnqueueTransform(*plan_handle,
+								  _transformation_direction,
+								  1,
+								  &tempQueue,
+								  0,
+								  NULL,
+								  &tempEvent,
+								  &tempInput[ 0 ],
+								  &tempOutput[ 0 ],
+								  NULL );
+			}
+            clFinish(tempQueue);
+
+			if( status != CLFFT_SUCCESS )
+			{
+				throw std::runtime_error(prettyPrintclFFTStatus(status).c_str());
+			}
+
+			// wait for the kernel call to finish execution
+            cl_int wait_status = clWaitForEvents(1, &tempEvent);
+			if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
+			{
+				cl_int error_code;
+				clGetEventInfo( tempEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &error_code, NULL );
+				throw std::runtime_error(prettyPrintclFFTStatus(error_code).c_str());
+			}
+            else if( wait_status != CL_SUCCESS )
+            {
+				throw std::runtime_error(prettyPrintclFFTStatus(wait_status).c_str());
+            }
+		}
+		catch (const std::exception& ) {
+			std::cout << "Exception occurred during clfftEnqueueTransform"
+					  << __FILE__ << __LINE__ << std::endl;
+			throw;
+		}
+
+		if( in_place() ) {
+			capture_input();
+		}
+		else {
+			capture_output();
+		}
+
+		get_layouts();
+		if( placeness() == CLFFT_INPLACE )
+		{
+			if( is_real( _input_layout ) && is_hermitian( _output_layout ) )
+			{
+				input.change_real_to_hermitian( output.strides(), output.distance() );
+			}
+			else if( is_hermitian( _input_layout ) && is_real( _output_layout ) )
+			{
+				input.change_hermitian_to_real( output.strides(), output.distance() );
+			}
+		}
+
+		// there's no way to know if in-place transforms have written in bad places,
+		// because depending on input and output strides, the state of the memory
+		// between points is not necessarily the NaN that we set it to
+		if( _placeness != CLFFT_INPLACE )
+		{
+			input.make_sure_padding_was_not_overwritten();
+			output.make_sure_padding_was_not_overwritten();
+		}
+	}
+
+	/*****************************************************/
+	size_t maximum_problem_size() {
+		int device_index = 0;
+		//N.B. if this class ever needs to support more than one device at once
+		//(i.e., multiple GPUs or CPU+GPU), device index will need to be variable
+		//to choose the device of interest
+		return cl_device_max_memory_to_allocate(device_index)/(sizeof(T)*2);
+		//TODO *2 needs to be either *1 or *2, depending, once real numbers are implemented in clfft
+	}
+
+	/*****************************************************/
+	size_t number_of_opencl_devices() {
+		return device_id.size();
+	}
+
+
+	/*****************************************************/
+	bool initialize_failed() {
+		return init_failure;
+	}
+
+	/*****************************************************/
+	bool dataset_is_too_large_for_device() {
+		return dataset_too_large;
+	}
+
+	/*****************************************************/
+	buffer<T> & input_buffer()
+	{
+		return input;
+	}
+
+	/*****************************************************/
+	buffer<T> & output_buffer()
+	{
+		return output;
+	}
+
+	/*****************************************************/
+	buffer<T> & result()
+	{
+		if( placeness() == CLFFT_INPLACE )
+			return input;
+		else if( placeness() == CLFFT_OUTOFPLACE )
+			return output;
+		else
+			throw std::runtime_error( "invalid placeness" );
+	}
+
+private:
+	/*****************************************************/
+	void get_layouts() {
+		EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( *plan_handle, &_input_layout, &_output_layout ) );
+	}
+
+	/*****************************************************/
+	// after transform() is run:
+	//   if in-place transformation -- the results will be in the input buffer
+	//	 otherwise -- the results will be in the output buffer
+	void placeness( clfftResultLocation placeness )
+	{
+		EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( *plan_handle, placeness ) );
+	}
+
+	/*****************************************************/
+	bool in_place() {
+		clfftResultLocation placeness;
+		clfftGetResultLocation( *plan_handle, &placeness );
+		return (placeness == CLFFT_INPLACE) ? true : false;
+	}
+
+	/*****************************************************/
+	void capture_output() {
+		if( is_planar( output_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_output[REAL].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.real_ptr(), 0, NULL, NULL), "reading output buffer - planar real ( ::clEnqueueReadBuffer() )" );
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_output[IMAG].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.imag_ptr(), 0, NULL, NULL), "reading output buffer - planar imaginary ( ::clEnqueueReadBuffer() )" );
+		}
+		else if( is_interleaved( output_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_output[0].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.interleaved_ptr(), 0, NULL, NULL), "reading output buffer - interleaved ( ::clEnqueueReadBuffer() )" );
+		}
+		else if( is_real( output_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_output[REAL].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.real_ptr(), 0, NULL, NULL), "reading output buffer - planar real ( ::clEnqueueReadBuffer() )" );
+		}
+		else
+		{
+			throw std::runtime_error( "we shouldn't make it here [capture_output()]" );
+		}
+	}
+
+	/*****************************************************/
+	void capture_input() {
+		if( is_planar( input_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_input[REAL].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.real_ptr(), 0, NULL, NULL), "reading input buffer - planar real ( ::clEnqueueReadBuffer() )" );
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_input[IMAG].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.imag_ptr(), 0, NULL, NULL), "reading input buffer - planar imaginary ( ::clEnqueueReadBuffer() )" );
+		}
+		else if( is_interleaved ( input_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_input[0].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.interleaved_ptr(), 0, NULL, NULL), "reading input buffer - interleaved ( ::clEnqueueReadBuffer() )" );
+		}
+		else if( is_real( input_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueReadBuffer( queue.get( ), cl_mem_input[REAL].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.real_ptr(), 0, NULL, NULL), "reading input buffer - planar real ( ::clEnqueueReadBuffer() )" );
+		}
+		else
+		{
+			throw std::runtime_error( "we shouldn't make it here [capture_input()]" );
+		}
+	}
+
+	/*****************************************************/
+	void write_local_output_buffer_to_gpu() {
+		if( is_planar( output_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_output[REAL].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.real_ptr(), 0, NULL, NULL), "writing output buffer - planar real ( ::clEnqueueWriteBuffer() )" );
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_output[IMAG].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.imag_ptr(), 0, NULL, NULL), "writing output buffer - planar imaginary ( ::clEnqueueWriteBuffer() )" );
+		}
+		else if( is_interleaved ( output_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_output[0].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.interleaved_ptr(), 0, NULL, NULL), "writing output buffer - interleaved ( ::clEnqueueWriteBuffer() )" );
+		}
+		else if( is_real( output_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_output[REAL].get( ), CL_TRUE, 0,
+					output.size_in_bytes(), output.real_ptr(), 0, NULL, NULL), "writing output buffer - planar real ( ::clEnqueueWriteBuffer() )" );
+		}
+		else
+		{
+			throw std::runtime_error( "we shouldn't make it here [write_local_output_buffer_to_gpu()]" );
+		}
+	}
+
+	/*****************************************************/
+	void write_local_input_buffer_to_gpu() {
+		if( is_planar( input_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_input[REAL].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.real_ptr(), 0, NULL, NULL), "writing input buffer - planar real ( ::clEnqueueWriteBuffer() )" );
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_input[IMAG].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.imag_ptr(), 0, NULL, NULL), "writing input buffer - planar imaginary ( ::clEnqueueWriteBuffer() )" );
+		}
+		else if( is_interleaved( input_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_input[0].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.interleaved_ptr(), 0, NULL, NULL), "writing input buffer - interleaved ( ::clEnqueueWriteBuffer() )" );
+		}
+		else if( is_real( input_layout() ) ) {
+			OPENCL_V_THROW( clEnqueueWriteBuffer( queue.get( ), cl_mem_input[REAL].get( ), CL_TRUE, 0,
+					input.size_in_bytes(), input.real_ptr(), 0, NULL, NULL), "writing input buffer - planar real ( ::clEnqueueWriteBuffer() )" );
+		}
+		else
+		{
+			throw std::runtime_error( "we shouldn't make it here [write_local_input_buffer_to_gpu()]" );
+		}
+	}
+
+
+	/*****************************************************/
+	cl_ulong cl_device_max_memory_to_allocate(size_t device_index) {
+		if( number_of_opencl_devices() == 0 || device_index > number_of_opencl_devices() )
+		{
+			return 0;
+		}
+		else
+		{
+			cl_ulong device_max_to_allocate = 0;
+			OPENCL_V_THROW( ::clGetDeviceInfo( device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &device_max_to_allocate, NULL ),
+				"Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" );
+
+			return device_max_to_allocate;
+		}
+	}
+
+
+	/*****************************************************/
+	cl_ulong cl_device_max_global_memory(size_t device_index) {
+		if( number_of_opencl_devices() == 0 || device_index > number_of_opencl_devices() )
+		{
+			return 0;
+		}
+		else
+		{
+			cl_ulong global_mem_size = 0;
+			OPENCL_V_THROW( ::clGetDeviceInfo( device_id[device_index], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( cl_ulong ), &global_mem_size, NULL ),
+				"Getting CL_DEVICE_GLOBAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" );
+
+			return global_mem_size;
+		}
+	}
+
+	#if defined(PERSISTENT_PLANS_FEATURE_HAS_BEEN_DEFEATURED_WHICH_MEANS_IT_IS_NO_LONGER_A_FEATURE)
+	/*****************************************************/
+	void write_plan_to_file(std::string filename)
+	{
+		cl_command_queue tempQueue = queue.get( );
+		EXPECT_EQ( CLFFT_SUCCESS, clfftBakePlan(*plan_handle, 1, &tempQueue, NULL, NULL ));
+		// we need to make sure the plan is baked before we write it out, or we won't get any juicy binaries along with it
+
+		clfftWritePlanToDisk(*plan_handle, filename.c_str());
+	}
+
+	/*****************************************************/
+	void read_plan_from_file(std::string filename)
+	{
+		clfftReadPlanFromDisk( *plan_handle, filename.c_str() );
+
+		// if we've changed from the default for input and output layouts, we need to re-set the layouts to make sure buffers get set up completely
+		set_layouts( input_layout(), output_layout() );
+	}
+	#endif
+};
+
+#endif
diff --git a/src/tests/fftw_transform.h b/src/tests/fftw_transform.h
new file mode 100644
index 00000000..87686489
--- /dev/null
+++ b/src/tests/fftw_transform.h
@@ -0,0 +1,495 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_FFTWTRANSFORM_H )
+#define CLFFT_FFTWTRANSFORM_H
+
+#include <vector>
+#include "fftw3.h"
+#include "buffer.h"
+#include "../client/openCL.misc.h" // we need this to leverage the CLFFT_INPLACE and _OUTOFPLACE enums
+
+enum fftw_direction {forward=-1, backward=+1};
+
+enum fftw_transform_type {c2c, r2c, c2r};
+
+template <typename T, typename fftw_T>
+class fftw_wrapper
+{};
+
+template <>
+class fftw_wrapper<float, fftwf_complex>
+{
+public:
+	fftwf_plan plan;
+
+	void make_plan( int x, int y, int z, int num_dimensions, int batch_size, fftwf_complex* input_ptr, fftwf_complex* output_ptr, int num_points_in_single_batch, fftw_direction direction, fftw_transform_type type )
+	{
+		// we need to swap x,y,z dimensions because of a row-column discrepancy between clfft and fftw
+		int lengths[max_dimension] = {z, y, x};
+
+		if( type == c2c )
+		{
+			plan = fftwf_plan_many_dft( num_dimensions,
+										// because we swapped dimensions up above, we need to start
+										// at the end of the array and count backwards to get the
+										// correct dimensions passed in to fftw
+										// e.g. if max_dimension is 3 and number_of_dimensions is 2:
+										// lengths = {dimz, dimy, dimx}
+										// lengths + 3 - 2 = lengths + 1
+										// so we will skip dimz and pass in a pointer to {dimy, dimx}
+										lengths+max_dimension-num_dimensions,
+										batch_size,
+										input_ptr, NULL,
+										1, num_points_in_single_batch,
+										output_ptr, NULL,
+										1, num_points_in_single_batch,
+										direction, FFTW_ESTIMATE);
+		}
+		else if( type == r2c )
+		{
+			plan = fftwf_plan_many_dft_r2c( num_dimensions,
+											// because we swapped dimensions up above, we need to start
+											// at the end of the array and count backwards to get the
+											// correct dimensions passed in to fftw
+											// e.g. if max_dimension is 3 and number_of_dimensions is 2:
+											// lengths = {dimz, dimy, dimx}
+											// lengths + 3 - 2 = lengths + 1
+											// so we will skip dimz and pass in a pointer to {dimy, dimx}
+											lengths+max_dimension-num_dimensions,
+											batch_size,
+											reinterpret_cast<float*>(input_ptr), NULL,
+											1, num_points_in_single_batch,
+											output_ptr, NULL,
+											1, (x/2 + 1) * y * z,
+											FFTW_ESTIMATE);
+		}
+		else if( type == c2r )
+		{
+			plan = fftwf_plan_many_dft_c2r( num_dimensions,
+											// because we swapped dimensions up above, we need to start
+											// at the end of the array and count backwards to get the
+											// correct dimensions passed in to fftw
+											// e.g. if max_dimension is 3 and number_of_dimensions is 2:
+											// lengths = {dimz, dimy, dimx}
+											// lengths + 3 - 2 = lengths + 1
+											// so we will skip dimz and pass in a pointer to {dimy, dimx}
+											lengths+max_dimension-num_dimensions,
+											batch_size,
+											input_ptr, NULL,
+											1, (x/2 + 1) * y * z,
+											reinterpret_cast<float*>(output_ptr), NULL,
+											1, num_points_in_single_batch,
+											FFTW_ESTIMATE);
+		}
+		else
+			throw std::runtime_error( "invalid transform type in <float>make_plan" );
+	}
+
+	fftw_wrapper( int x, int y, int z, int num_dimensions, int batch_size, fftwf_complex* input_ptr, fftwf_complex* output_ptr, int num_points_in_single_batch, fftw_direction direction, fftw_transform_type type )
+	{
+		make_plan( x, y, z, num_dimensions, batch_size, input_ptr, output_ptr, num_points_in_single_batch, direction, type );
+	}
+
+	void destroy_plan()
+	{
+		fftwf_destroy_plan(plan);
+	}
+
+	~fftw_wrapper()
+	{
+		destroy_plan();
+	}
+
+	void execute()
+	{
+		fftwf_execute(plan);
+	}
+};
+
+template <>
+class fftw_wrapper<double, fftw_complex>
+{
+public:
+	fftw_plan plan;
+
+	void make_plan( int x, int y, int z, int num_dimensions, int batch_size, fftw_complex* input_ptr, fftw_complex* output_ptr, int num_points_in_single_batch, fftw_direction direction, fftw_transform_type type )
+	{
+		// we need to swap x,y,z dimensions because of a row-column discrepancy between clfft and fftw
+		int lengths[max_dimension] = {z, y, x};
+
+		if( type == c2c )
+		{
+			plan = fftw_plan_many_dft( num_dimensions,
+									// because we swapped dimensions up above, we need to start
+									// at the end of the array and count backwards to get the
+									// correct dimensions passed in to fftw
+									// e.g. if max_dimension is 3 and number_of_dimensions is 2:
+									// lengths = {dimz, dimy, dimx}
+									// lengths + 3 - 2 = lengths + 1
+									// so we will skip dimz and pass in a pointer to {dimy, dimx}
+									lengths+max_dimension-num_dimensions,
+									batch_size,
+									input_ptr, NULL,
+									1, num_points_in_single_batch,
+									output_ptr, NULL,
+									1, num_points_in_single_batch,
+									direction, FFTW_ESTIMATE);
+		}
+		else if( type == r2c )
+		{
+			plan = fftw_plan_many_dft_r2c( num_dimensions,
+											// because we swapped dimensions up above, we need to start
+											// at the end of the array and count backwards to get the
+											// correct dimensions passed in to fftw
+											// e.g. if max_dimension is 3 and number_of_dimensions is 2:
+											// lengths = {dimz, dimy, dimx}
+											// lengths + 3 - 2 = lengths + 1
+											// so we will skip dimz and pass in a pointer to {dimy, dimx}
+											lengths+max_dimension-num_dimensions,
+											batch_size,
+											reinterpret_cast<double*>(input_ptr), NULL,
+											1, num_points_in_single_batch,
+											output_ptr, NULL,
+											1, (x/2 + 1) * y * z,
+											FFTW_ESTIMATE);
+		}
+		else if( type == c2r )
+		{
+			plan = fftw_plan_many_dft_c2r( num_dimensions,
+											// because we swapped dimensions up above, we need to start
+											// at the end of the array and count backwards to get the
+											// correct dimensions passed in to fftw
+											// e.g. if max_dimension is 3 and number_of_dimensions is 2:
+											// lengths = {dimz, dimy, dimx}
+											// lengths + 3 - 2 = lengths + 1
+											// so we will skip dimz and pass in a pointer to {dimy, dimx}
+											lengths+max_dimension-num_dimensions,
+											batch_size,
+											input_ptr, NULL,
+											1, (x/2 + 1) * y * z,
+											reinterpret_cast<double*>(output_ptr), NULL,
+											1, num_points_in_single_batch,
+											FFTW_ESTIMATE);
+		}
+		else
+			throw std::runtime_error( "invalid transform type in <double>make_plan" );
+	}
+
+	fftw_wrapper( int x, int y, int z, int num_dimensions, int batch_size, fftw_complex* input_ptr, fftw_complex* output_ptr, int num_points_in_single_batch, fftw_direction direction, fftw_transform_type type )
+	{
+		make_plan( x, y, z, num_dimensions, batch_size, input_ptr, output_ptr, num_points_in_single_batch, direction, type );
+	}
+
+	void destroy_plan()
+	{
+		fftw_destroy_plan(plan);
+	}
+
+	~fftw_wrapper()
+	{
+		destroy_plan();
+	}
+
+	void execute()
+	{
+		fftw_execute(plan);
+	}
+};
+
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
+template <typename T, typename fftw_T>
+class fftw {
+private:
+	static const size_t tightly_packed_distance = 0;
+
+	std::vector<size_t> _lengths;
+	fftw_direction _direction;
+	fftw_transform_type _type;
+	layout::buffer_layout_t _input_layout, _output_layout;
+	size_t _batch_size;
+	buffer<T> input;
+	buffer<T> output;
+	fftw_wrapper<T, fftw_T> fftw_guts;
+
+	T _forward_scale, _backward_scale;
+public:
+	/*****************************************************/
+	fftw( const size_t number_of_dimensions_in, const size_t* lengths_in, const size_t batch_size_in, fftw_transform_type type_in )
+		: _lengths( initialized_lengths( number_of_dimensions_in, lengths_in ) )
+		, _direction( forward )
+		, _type( type_in )
+		, _input_layout( initialized_input_layout() )
+		, _output_layout( initialized_output_layout() )
+		, _batch_size( batch_size_in )
+		, input( number_of_dimensions_in,
+				lengths_in,
+				NULL,
+				batch_size_in,
+				tightly_packed_distance,
+				_input_layout,
+				CLFFT_OUTOFPLACE )
+		, output( number_of_dimensions_in,
+				lengths_in,
+				NULL,
+				batch_size_in,
+				tightly_packed_distance,
+				_output_layout,
+				CLFFT_OUTOFPLACE )
+		, _forward_scale( 1.0f )
+		, _backward_scale( 1.0f/T(input.number_of_data_points_single_batch()) )
+		, fftw_guts( (int)_lengths[dimx], (int)_lengths[dimy], (int)_lengths[dimz],
+					 (int)number_of_dimensions_in, (int)batch_size_in,
+					 reinterpret_cast<fftw_T*>(input_ptr()),
+					 reinterpret_cast<fftw_T*>(output_ptr()),
+					 (int)(_lengths[dimx]*_lengths[dimy]*_lengths[dimz]), _direction, _type)
+	{
+		clear_data_buffer();
+	}
+
+	/*****************************************************/
+	~fftw() {}
+
+	/*****************************************************/
+	layout::buffer_layout_t initialized_input_layout()
+	{
+		if( _type == c2c )
+			return layout::complex_interleaved;
+		else if( _type == r2c )
+			return layout::real;
+		else if( _type == c2r )
+			return layout::hermitian_interleaved;
+		else
+			throw std::runtime_error( "invalid transform type in initialized_input_layout" );
+	}
+
+	/*****************************************************/
+	layout::buffer_layout_t initialized_output_layout()
+	{
+		if( _type == c2c )
+			return layout::complex_interleaved;
+		else if( _type == r2c )
+			return layout::hermitian_interleaved;
+		else if( _type == c2r )
+			return layout::real;
+		else
+			throw std::runtime_error( "invalid transform type in initialized_input_layout" );
+	}
+
+	/*****************************************************/
+	std::vector<size_t> initialized_lengths( const size_t number_of_dimensions, const size_t* lengths_in )
+	{
+		std::vector<size_t> lengths( 3, 1 ); // start with 1, 1, 1
+
+		for( size_t i = 0; i < number_of_dimensions; i++ )
+		{
+			lengths[i] = lengths_in[i];
+		}
+
+		return lengths;
+	}
+
+	/*****************************************************/
+	T* input_ptr()
+	{
+		if( _input_layout == layout::real )
+			return input.real_ptr();
+		else if( _input_layout == layout::complex_interleaved )
+			return input.interleaved_ptr();
+		else if( _input_layout == layout::hermitian_interleaved )
+			return input.interleaved_ptr();
+		else
+			throw std::runtime_error( "invalid layout in fftw::input_ptr" );
+	}
+
+	/*****************************************************/
+	T* output_ptr()
+	{
+		if( _output_layout == layout::real )
+			return output.real_ptr();
+		else if( _output_layout == layout::complex_interleaved )
+			return output.interleaved_ptr();
+		else if( _output_layout == layout::hermitian_interleaved )
+			return output.interleaved_ptr();
+		else
+			throw std::runtime_error( "invalid layout in fftw::output_ptr" );
+	}
+
+	// you must call either set_forward_transform() or
+	// set_backward_transform() before setting the input buffer
+	/*****************************************************/
+	void set_forward_transform()
+	{
+		if( _type != c2c )
+			throw std::runtime_error( "do not use set_forward_transform() except with c2c transforms" );
+
+		if( _direction != forward )
+		{
+			_direction = forward;
+			fftw_guts.destroy_plan();
+			fftw_guts.make_plan((int)_lengths[dimx], (int)_lengths[dimy], (int)_lengths[dimz],
+								(int)input.number_of_dimensions(), (int)input.batch_size(),
+								reinterpret_cast<fftw_T*>(input.interleaved_ptr()), reinterpret_cast<fftw_T*>(output.interleaved_ptr()),
+								(int)(_lengths[dimx]*_lengths[dimy]*_lengths[dimz]), _direction, _type);
+		}
+	}
+
+	/*****************************************************/
+	void set_backward_transform()
+	{
+		if( _type != c2c )
+			throw std::runtime_error( "do not use set_backward_transform() except with c2c transforms" );
+
+		if( _direction != backward )
+		{
+			_direction = backward;
+			fftw_guts.destroy_plan();
+			fftw_guts.make_plan((int)_lengths[dimx], (int)_lengths[dimy], (int)_lengths[dimz],
+								(int)input.number_of_dimensions(), (int)input.batch_size(),
+								reinterpret_cast<fftw_T*>(input.interleaved_ptr()), reinterpret_cast<fftw_T*>(output.interleaved_ptr()),
+								(int)(_lengths[dimx]*_lengths[dimy]*_lengths[dimz]), _direction, _type);
+		}
+	}
+
+	/*****************************************************/
+	size_t size_of_data_in_bytes()
+	{
+		return input.size_in_bytes();
+	}
+
+	/*****************************************************/
+	void forward_scale( T in )
+	{
+		_forward_scale = in;
+	}
+
+	/*****************************************************/
+	void backward_scale( T in )
+	{
+		_backward_scale = in;
+	}
+
+	/*****************************************************/
+	T forward_scale()
+	{
+		return _forward_scale;
+	}
+
+	/*****************************************************/
+	T backward_scale()
+	{
+		return _backward_scale;
+	}
+
+	/*****************************************************/
+	void set_all_data_to_value( T value )
+	{
+		input.set_all_to_value( value );
+	}
+
+	/*****************************************************/
+	void set_all_data_to_value( T real_value, T imag_value )
+	{
+		input.set_all_to_value( real_value, imag_value );
+	}
+
+	/*****************************************************/
+	void set_data_to_sawtooth(T max)
+	{
+		input.set_all_to_sawtooth( max );
+	}
+
+	/*****************************************************/
+	void set_data_to_increase_linearly()
+	{
+		input.set_all_to_linear_increase();
+	}
+
+	/*****************************************************/
+	void set_data_to_impulse()
+	{
+		input.set_all_to_impulse();
+	}
+
+	/*****************************************************/
+	// yes, the "super duper global seed" is horrible
+	// alas, i'll have TODO it better later
+	void set_data_to_random()
+	{
+		input.set_all_to_random_data( 10, super_duper_global_seed );
+	}
+
+	/*****************************************************/
+	void set_input_to_buffer( buffer<T> other_buffer ) {
+		input = other_buffer;
+	}
+
+	/*****************************************************/
+	void clear_data_buffer()
+	{
+		if( _input_layout == layout::real )
+		{
+			set_all_data_to_value( 0.0f );
+		}
+		else
+		{
+			set_all_data_to_value( 0.0f, 0.0f );
+		}
+	}
+
+	/*****************************************************/
+	void transform()
+	{
+		fftw_guts.execute();
+
+		if( _type == c2c )
+		{
+			if( _direction == forward  ) {
+				output.scale_data( static_cast<T>( forward_scale( ) ) );
+			}
+			else if( _direction == backward  ) {
+				output.scale_data( static_cast<T>( backward_scale( ) ) );
+			}
+		}
+		else if( _type == r2c )
+		{
+			output.scale_data( static_cast<T>( forward_scale( ) ) );
+		}
+		else if( _type == c2r )
+		{
+			output.scale_data( static_cast<T>( backward_scale( ) ) );
+		}
+		else
+			throw std::runtime_error( "invalid transform type in fftw::transform()" );
+	}
+
+	/*****************************************************/
+	buffer<T> & result()
+	{
+		return output;
+	}
+
+	/*****************************************************/
+	buffer<T> & input_buffer()
+	{
+		return input;
+	}
+};
+
+#endif
diff --git a/src/tests/gtest_main.cpp b/src/tests/gtest_main.cpp
new file mode 100644
index 00000000..6e29b90e
--- /dev/null
+++ b/src/tests/gtest_main.cpp
@@ -0,0 +1,267 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+#include <cstring>
+#include <gtest/gtest.h>
+#include <boost/program_options.hpp>
+#include "clFFT.h"
+#include "version.h"
+#include "test_constants.h"
+#include "../client/openCL.misc.h"
+#include "unicode.compatibility.h"
+
+namespace po = boost::program_options;
+size_t number_of_random_tests;
+time_t random_test_parameter_seed;
+float tolerance;
+bool verbose;
+
+#if defined( _WIN32 )
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
+#include <intrin.h>
+
+#if defined( _WIN64 )
+void inline BSF( unsigned long* index, size_t& mask )
+{
+	_BitScanForward64( index, mask );
+}
+#else
+void inline BSF( unsigned long* index, size_t& mask )
+{
+	_BitScanForward( index, mask );
+}
+#endif
+#elif defined( __GNUC__ )
+void inline BSF (unsigned long * index, size_t & mask) {
+	*index = __builtin_ctz (mask);
+}
+#endif
+
+// global for test use
+bool suppress_output = false;
+
+//	Globals that user can set on the command line, that need to be passed down to unit tests
+cl_device_type device_type = CL_DEVICE_TYPE_GPU;
+cl_uint device_gpu_list = ~0x0;
+bool comparison_type = root_mean_square;
+
+int main( int argc, char **argv )
+{
+	//	Define MEMORYREPORT on windows platfroms to enable debug memory heap checking
+#if defined( MEMORYREPORT ) && defined( _WIN32 )
+	TCHAR logPath[ MAX_PATH ];
+	::GetCurrentDirectory( MAX_PATH, logPath );
+	::_tcscat_s( logPath, _T( "\\MemoryReport.txt") );
+
+	//	We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory
+	//	statistics on app shutdown
+	HANDLE hLogFile;
+	hLogFile = ::CreateFile( logPath, GENERIC_WRITE,
+		FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL );
+
+	::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
+	::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
+	::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG );
+
+	::_CrtSetReportFile( _CRT_ASSERT, hLogFile );
+	::_CrtSetReportFile( _CRT_ERROR, hLogFile );
+	::_CrtSetReportFile( _CRT_WARN, hLogFile );
+
+	int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
+	tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF;
+	::_CrtSetDbgFlag( tmp );
+
+	//	By looking at the memory leak report that is generated by this debug heap, there is a number with
+	//	{} brackets that indicates the incremental allocation number of that block.  If you wish to set
+	//	a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap
+	//	will issue a bp on the request, allowing you to look at the call stack
+	//	::_CrtSetBreakAlloc( 997 );
+
+#endif /* MEMORYREPORT */
+
+	// Declare the supported options.
+	po::options_description desc( "clFFT Runtime Test command line options" );
+	desc.add_options()
+		( "help,h",				"produces this help message" )
+		( "verbose,v",			"print out detailed information for the tests" )
+		( "noVersion",     "Don't print version information from the clFFT library" )
+		( "noInfoCL",      "Don't print information from the OpenCL runtime" )
+		( "cpu,c",         "Run tests on a CPU device" )
+		( "gpu,g",         "Run tests on a GPU device (default)" )
+		( "pointwise,p",         "Do a pointwise comparison to determine test correctness (default: use root mean square)" )
+		( "tolerance,t",        po::value< float >( &tolerance )->default_value( 0.001f ),   "tolerance level to use when determining test pass/fail" )
+		( "numRandom,r",        po::value< size_t >( &number_of_random_tests )->default_value( 2000 ),   "number of random tests to run" )
+		( "seed",        po::value< time_t >( &random_test_parameter_seed )->default_value( time(NULL)%1308000000 ),
+						"seed to use for the random test. defaults to time(NULL)" )
+						// modulo lops off the first few digits of the time value to make the seed easier to type
+						// even without these digits, the seed value won't wrap around until 2036 or later
+		( "short,s",         "Run radix 2 tests; no random testing" )
+		( "medium,m",         "Run all radices; no random testing" )
+		;
+
+	//	Parse the command line options, ignore unrecognized options and collect them into a vector of strings
+	po::variables_map vm;
+	po::parsed_options parsed = po::command_line_parser( argc, argv ).options( desc ).allow_unregistered( ).run( );
+	po::store( parsed, vm );
+	po::notify( vm );
+	std::vector< std::string > to_pass_further = po::collect_unrecognized( parsed.options, po::include_positional );
+
+	std::cout << std::endl;
+
+	size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0)
+		| ((vm.count( "cpu" ) > 0) ? 2 : 0);
+	if ((mutex & (mutex-1)) != 0) {
+		terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl;
+		if (vm.count ( "cpu" )  > 0) terr << _T("    cpu, c	Run tests on a CPU device" ) << std::endl;
+		if (vm.count ( "gpu" )  > 0) terr << _T("    gpu, g	Run tests on a GPU device" ) << std::endl;
+		return 1;
+	}
+
+	if( vm.count( "cpu" ) )
+	{
+		device_type = CL_DEVICE_TYPE_CPU;
+	}
+
+	if( vm.count( "gpu" ) )
+	{
+		device_type	= CL_DEVICE_TYPE_GPU;
+		device_gpu_list = ~0;
+	}
+
+	//	Print version by default
+	if( !vm.count( "noVersion" ) )
+	{
+		const int indent = countOf( "clFFT client API version: " );
+		tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " )
+			<< clfftVersionMajor << _T( "." )
+			<< clfftVersionMinor << _T( "." )
+			<< clfftVersionPatch << std::endl;
+
+		cl_uint libMajor, libMinor, libPatch;
+		clfftGetVersion( &libMajor, &libMinor, &libPatch );
+
+		tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " )
+			<< libMajor << _T( "." )
+			<< libMinor << _T( "." )
+			<< libPatch << std::endl << std::endl;
+	}
+
+	//	Print clInfo by default
+	if( !vm.count( "noInfoCL" ) )
+	{
+		cl_context tempContext = NULL;
+		cl_command_queue tempQueue = NULL;
+		cl_event tempEvent = NULL;
+		std::vector< cl_device_id > device_id = ::initializeCL( device_type, device_gpu_list, tempContext, true );
+		::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent );
+	}
+
+	if( vm.count( "help" ) )
+	{
+		std::cout << desc << std::endl;
+		return 0;
+	}
+
+	if( vm.count( "verbose" ) )
+	{
+		verbose = true;
+	}
+	else
+	{
+		verbose = false;
+	}
+
+	if( vm.count( "short" ) && vm.count( "medium" ) )
+	{
+		terr << _T("Options 'short' and 'medium' are mutually-exclusive.  Please select only one.") << std::endl;
+		return 1;
+	}
+
+	//	Create a new argc,argv to pass to InitGoogleTest
+	//	First parameter of course is the name of this program
+	std::vector< const char* > myArgv;
+
+	//	Push back a pointer to the executable name
+	if( argc > 0 )
+		myArgv.push_back( *argv );
+
+	//	Push into our new argv vector any parameter the user passed, except to filter their gtest_filter expressions
+	std::string userFilter;
+	for( int i = 1; i < argc; ++i )
+	{
+		if( vm.count( "short" ) || vm.count( "medium" ) )
+		{
+			std::string tmpStr( argv[ i ] );
+			std::string::size_type pos = tmpStr.find( "gtest_filter" );
+			if( pos == std::string::npos )
+			{
+				myArgv.push_back( argv[ i ] );
+			}
+			else
+			{
+				//  Capture the users filter, but only the regexp portion
+				userFilter = argv[ i ];
+				userFilter.erase( 0, 15 );
+			}
+		}
+		else
+		{
+			myArgv.push_back( argv[ i ] );
+		}
+	}
+
+	std::string newFilter;
+	if( vm.count( "short" ) )
+	{
+		newFilter += "--gtest_filter=*accuracy_test_pow2*";
+		if( userFilter.size( ) )
+		{
+			newFilter += ":";
+			newFilter += userFilter;
+		}
+		myArgv.push_back( newFilter.c_str( ) );
+	}
+
+	if( vm.count( "medium" ) )
+	{
+		newFilter += "--gtest_filter=";
+		if( userFilter.size( ) )
+		{
+			newFilter += userFilter;
+			newFilter += ":";
+		}
+		newFilter += "-*Random*";
+		myArgv.push_back( newFilter.c_str( ) );
+	}
+
+	if( vm.count( "pointwise" ) )
+	{
+		comparison_type = pointwise_compare;
+	}
+	else
+	{
+		comparison_type = root_mean_square;
+	}
+
+	int myArgc	= static_cast< int >( myArgv.size( ) );
+
+	std::cout << "Result comparison tolerance is " << tolerance << std::endl;
+
+	::testing::InitGoogleTest( &myArgc, const_cast< char** >( &myArgv[ 0 ] ) );
+
+	return RUN_ALL_TESTS();
+}
diff --git a/src/tests/test_constants.cpp b/src/tests/test_constants.cpp
new file mode 100644
index 00000000..d4ca1e57
--- /dev/null
+++ b/src/tests/test_constants.cpp
@@ -0,0 +1,112 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include "test_constants.h"
+#include <gtest/gtest.h>
+#include <stdexcept>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include "../client/openCL.misc.h"
+
+#if defined( _WIN32 ) && defined( _DEBUG )
+#include <Windows.h>
+#endif
+
+void handle_exception( const std::exception& except )
+{
+	std::string error_message(except.what());
+
+	std::cout << "--- Exception caught ---" << std::endl;
+
+	if( error_message.find("problem too large for device") != std::string::npos ||
+		error_message.find("CLFFT_INVALID_BUFFER_SIZE" ) != std::string::npos ||
+		error_message.find("CLFFT_MEM_OBJECT_ALLOCATION_FAILURE" ) != std::string::npos ||
+		error_message.find("CLFFT_OUT_OF_HOST_MEMORY" ) != std::string::npos ||
+		error_message.find("CLFFT_OUT_OF_RESOURCES" ) != std::string::npos )
+	{
+		std::cout << "Data set is too large for this device -- skipping test" << std::endl;
+		//TODO put in (this problem size[data + stride]/max problem size/gpu or cpu) specifics
+	}
+	else if( error_message.find("system memory allocation failure") != std::string::npos )
+	{
+		std::cout << "Framework was denied enough system memory to support the data set"
+			<< " -- skipping test" << std::endl;
+	}
+	else if( error_message.find("CLFFT_DEVICE_NO_DOUBLE") != std::string::npos )
+	{
+		std::cout << "Device in context does not support double precision"
+			<< " -- skipping test" << std::endl;
+	}
+	else if( error_message.find("dereference null pointer") != std::string::npos )
+	{
+		std::cout << error_message << std::endl;
+		FAIL();
+	}
+	else if( error_message.find("in-place transform, unmatched in/out layouts")
+					!= std::string::npos )
+	{
+		std::cout << "Invalid arguments: for an in-place transform, "
+			<< "in/output layouts must be the same" << std::endl;
+		FAIL();
+	}
+	else if( error_message.find("device list is empty at transform") != std::string::npos )
+	{
+		std::cout << "A clfft transform is requested, but the device list is empty" << std::endl;
+		FAIL();
+	}
+	else
+	{
+		std::cout << "Unrecognized exception: " << std::endl;
+		std::cout << error_message << std::endl;
+		/*
+#if defined( _WIN32 ) && defined( _DEBUG )
+		::DebugBreak( );
+#endif
+		*/
+		FAIL();
+	}
+}
+
+/*****************************************************/
+size_t max_mem_available_on_cl_device(size_t device_index) {
+
+	std::vector< cl_device_id >	device_id;
+	cl_context tempContext = NULL;
+	device_id = initializeCL(
+		device_type,
+		device_gpu_list,
+		tempContext,
+		false
+		);
+
+	cl_ulong device_max_to_allocate = 0;
+	if( device_id.size() == 0 || device_index > device_id.size() )
+	{
+	}
+	else
+	{
+		OPENCL_V_THROW( ::clGetDeviceInfo( device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &device_max_to_allocate, NULL ),
+			"Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" );
+	}
+
+	cl_command_queue tempQueue = NULL;
+	cl_event tempEvent = NULL;
+	::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent );
+
+	return static_cast<size_t>(device_max_to_allocate);
+}
diff --git a/src/tests/test_constants.h b/src/tests/test_constants.h
new file mode 100644
index 00000000..6a457058
--- /dev/null
+++ b/src/tests/test_constants.h
@@ -0,0 +1,109 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_TESTCONSTANTS_H )
+#define CLFFT_TESTCONSTANTS_H
+
+#include "clFFT.h"
+#include <string>
+#include <stdexcept>
+
+enum { REAL=0, IMAG=1 };
+enum { dimx=0, dimy=1, dimz=2 };
+enum fftw_dim { one_d=1, two_d=2, three_d=3 };
+enum { one_interleaved_buffer=1, separate_real_and_imaginary_buffers=2 };
+const bool use_explicit_intermediate_buffer = true;
+const bool autogenerate_intermediate_buffer = false;
+const bool pointwise_compare = true;
+const bool root_mean_square = false;
+extern bool comparison_type;
+extern bool suppress_output;
+
+// this thing is horrible. horrible! i am not proud.
+extern size_t super_duper_global_seed;
+
+const size_t small2 = 32;
+const size_t normal2 = 1024;
+const size_t large2 = 8192;
+const size_t dlarge2 = 4096;
+
+const size_t small3 = 9;
+const size_t normal3 = 729;
+const size_t large3 = 6561;
+const size_t dlarge3 = 2187;
+
+const size_t small5 = 25;
+const size_t normal5 = 625;
+const size_t large5 = 15625;
+const size_t dlarge5 = 3125;
+
+const size_t large_batch_size = 2048;
+const size_t do_not_output_any_mismatches = 0;
+const size_t default_number_of_mismatches_to_output = 10;
+const size_t max_dimension = 3;
+
+const double magnitude_lower_limit = 1.0E-100;
+
+extern float tolerance;
+
+extern cl_device_type device_type;
+extern cl_uint device_gpu_list;
+
+extern size_t number_of_random_tests;
+extern time_t random_test_parameter_seed;
+extern bool verbose;
+
+void handle_exception( const std::exception& except );
+size_t max_mem_available_on_cl_device(size_t device_index);
+
+// Creating this template function and specializations to control the length inputs to the tests;
+// these should be removed once the size restriction on transfrom lengths (SP 2^24 and DP 2^22)
+// is removed; the dlarge* constants can then be removed
+
+template <typename T>
+inline size_t MaxLength2D(size_t rad)
+{
+	return 0;
+}
+
+template <>
+inline size_t MaxLength2D<float>(size_t rad)
+{
+	switch(rad)
+	{
+	case 2:		return large2;
+	case 3:		return large3;
+	case 5:		return large5;
+	default:	return 0;
+	}
+}
+
+template <>
+inline size_t MaxLength2D<double>(size_t rad)
+{
+	switch(rad)
+	{
+	case 2:		return dlarge2;
+	case 3:		return dlarge3;
+	case 5:		return dlarge5;
+	default:	return 0;
+	}
+}
+
+
+#endif
diff --git a/src/tests/typedefs.h b/src/tests/typedefs.h
new file mode 100644
index 00000000..fb0af1df
--- /dev/null
+++ b/src/tests/typedefs.h
@@ -0,0 +1,33 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#pragma once
+#if !defined( CLFFT_TYPEDEFS_H )
+#define CLFFT_TYPEDEFS_H
+
+#include "test_constants.h"
+#include "fftw_transform.h"
+#include "cl_transform.h"
+
+typedef clfft<float, cl_float> clfft_single;
+typedef clfft<double, cl_double> clfft_double;
+typedef buffer<float> buffer_single;
+typedef buffer<double> buffer_double;
+typedef fftw<float, fftwf_complex> fftw_single;
+typedef fftw<double, fftw_complex> fftw_double;
+
+#endif
\ No newline at end of file
diff --git a/src/tests/unit_test.cpp b/src/tests/unit_test.cpp
new file mode 100644
index 00000000..a446abc6
--- /dev/null
+++ b/src/tests/unit_test.cpp
@@ -0,0 +1,1031 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <gtest/gtest.h>
+#include <complex>
+#include "clFFT.h"
+#include "../client/openCL.misc.h"
+
+class clfft_UnitTest : public ::testing::Test {
+protected:
+	clfft_UnitTest(){}
+	virtual ~clfft_UnitTest(){}
+	virtual void SetUp()
+	{
+		lengths[ 0 ] = 32;
+		lengths[ 1 ] = 32;
+		lengths[ 2 ] = 32;
+		cl_uint	deviceGpuList = ~0; // a bitmap set
+		commandQueueFlags = 0;
+
+		size_t memSizeBytes = lengths[ 0 ] * lengths[ 1 ] * lengths[ 2 ] * sizeof( std::complex< float > );
+
+		device_id = initializeCL( CL_DEVICE_TYPE_CPU, deviceGpuList, context, printInfo );
+		createOpenCLCommandQueue( context,
+								  commandQueueFlags,
+								  queue,
+								  device_id,
+								  memSizeBytes, 1, &cl_mem_input,
+								  memSizeBytes, 1, &cl_mem_output
+								);
+
+		outEvent	= NULL;
+
+		clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	}
+
+	virtual void TearDown()
+	{
+		if( test_plan != 0 )
+		{
+			clfftDestroyPlan( &test_plan );
+            clfftTeardown();
+		}
+
+		cleanupCL( &context, &queue, 1, &cl_mem_input, 1, &cl_mem_output, &outEvent );
+	}
+
+	clfftPlanHandle test_plan;
+	size_t lengths[3];
+
+	//	We need a valid context for clfftCreateDefaultPlan to work
+	cl_context			context;
+	cl_command_queue	queue;
+	std::vector< cl_device_id >	device_id;
+	cl_event			outEvent;
+	static const bool printInfo = false;
+	cl_uint commandQueueFlags;
+
+	//	These are not used, they are only placeholders for initializeCL
+	cl_mem	cl_mem_input;
+	cl_mem	cl_mem_output;
+};
+
+TEST_F(clfft_UnitTest, get_plan_context_should_get_a_context) {
+    cl_context the_context = NULL;
+
+    EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanContext( test_plan, &the_context ) );
+    if( the_context == NULL )
+        EXPECT_EQ( "context is null :(", "context should not be null");
+}
+
+TEST_F(clfft_UnitTest, copyPlan_should_copy_plan) {
+	clfftPlanHandle copied_plan;
+	cl_context new_context = NULL;
+
+	lengths[0] = 8;
+	lengths[1] = 16;
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanPrecision( test_plan, CLFFT_SINGLE ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_FORWARD, 42.0f ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_BACKWARD, 0.24f ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 8*16, 8*16 ) );
+
+	size_t clStrides[ ] = { 1, 8 };
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_2D, clStrides ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_2D, clStrides ) );
+
+	// TODO need to have created context by now; clfftPlanHandle is no longer a pointer
+	clfftCopyPlan( &copied_plan, context, test_plan );
+
+	//EXPECT_EQ( false, copied_plan.baked );
+	//EXPECT_EQ( CLFFT_2D, copied_plan.dim );
+	//EXPECT_EQ( CLFFT_COMPLEX_INTERLEAVED, copied_plan.inputLayout );
+	//EXPECT_EQ( CLFFT_COMPLEX_INTERLEAVED, copied_plan.outputLayout );
+	//EXPECT_EQ( CLFFT_OUTOFPLACE, copied_plan.placeness );
+	//EXPECT_EQ( CLFFT_SINGLE, copied_plan.precision );
+	////TODO check context here
+	//EXPECT_FLOAT_EQ( 42.0f, copied_plan.forwardScale );
+	//EXPECT_FLOAT_EQ( 0.24f, copied_plan.backwardScale );
+	//EXPECT_EQ( 8*16, copied_plan.pitch );
+	//EXPECT_EQ( 2, copied_plan.length.size() );
+	//EXPECT_EQ( 8, copied_plan.length[0] );
+	//EXPECT_EQ( 16, copied_plan.length[1] );
+	//EXPECT_EQ( 2, copied_plan.inStride.size() );
+	//EXPECT_EQ( 1, copied_plan.inStride[0] );
+	//EXPECT_EQ( 8, copied_plan.inStride[1] );
+	//EXPECT_EQ( 2, copied_plan.outStride.size() );
+	//EXPECT_EQ( 1, copied_plan.outStride[0] );
+	//EXPECT_EQ( 8, copied_plan.outStride[1] );
+
+	clfftDestroyPlan( &copied_plan );
+}
+
+TEST_F(clfft_UnitTest, copyPlan_should_increase_context_reference_count) {
+	//TODO me
+}
+
+//TODO need to promote some things in client.cpp to a library to write this
+//TEST_F(clfft_UnitTest, getPlanContext_should_yield_appropriate_values) {
+//}
+
+TEST_F(clfft_UnitTest, getPlanBatchSize_should_yield_appropriate_values) {
+	size_t batch_size;
+
+	lengths[0] = 2;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( test_plan, 1 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanBatchSize( test_plan, &batch_size ) );
+	EXPECT_EQ( 1, batch_size );
+
+	lengths[0] = 4;
+	lengths[1] = 2;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( test_plan, 8 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanBatchSize( test_plan, &batch_size ) );
+	EXPECT_EQ( 8, batch_size );
+
+	lengths[0] = 4;
+	lengths[1] = 2;
+	lengths[2] = 8;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( test_plan, 16 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanBatchSize( test_plan, &batch_size ) );
+	EXPECT_EQ( 16, batch_size );
+}
+
+TEST_F(clfft_UnitTest, setPlanBatchSize_should_set_batch_size_correctly) {
+	size_t batch_size;
+	lengths[0] = 1;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( test_plan, 1 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanBatchSize( test_plan, &batch_size ) );
+	EXPECT_EQ( 1, batch_size );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( test_plan, 2 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanBatchSize( test_plan, &batch_size ) );
+	EXPECT_EQ( 2, batch_size );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanBatchSize( test_plan, 16 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanBatchSize( test_plan, &batch_size ) );
+	EXPECT_EQ( 16, batch_size );
+}
+
+TEST_F(clfft_UnitTest, getPlanPrecision_should_yield_appropriate_values) {
+	clfftPrecision precision;
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanPrecision( test_plan, CLFFT_SINGLE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanPrecision( test_plan, &precision ) );
+	EXPECT_EQ( CLFFT_SINGLE, precision );
+}
+
+TEST_F(clfft_UnitTest, setPlanPrecision_should_set_precision_to_supported_values) {
+
+	clfftPrecision precision;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanPrecision( test_plan, CLFFT_SINGLE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanPrecision( test_plan, &precision ) );
+	EXPECT_EQ( CLFFT_SINGLE, precision );
+}
+
+TEST_F(clfft_UnitTest, setPlanPrecision_should_fail_to_set_unsupported_values) {
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanPrecision( test_plan, CLFFT_SINGLE_FAST ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanPrecision( test_plan, CLFFT_DOUBLE_FAST ) );
+}
+
+TEST_F(clfft_UnitTest, getPlanScale_should_yield_appropriate_values) {
+	cl_float scale;
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_FORWARD, 1.414f ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_BACKWARD, 2.718f ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_FORWARD, &scale ) );
+	EXPECT_FLOAT_EQ( 1.414f, scale );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_BACKWARD, &scale ) );
+	EXPECT_FLOAT_EQ( 2.718f, scale );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_MINUS, &scale ) );
+	EXPECT_FLOAT_EQ( 1.414f, scale );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_PLUS, &scale ) );
+	EXPECT_FLOAT_EQ( 2.718f, scale );
+}
+
+TEST_F(clfft_UnitTest, getPlanScale_should_fail_on_invalid_direction) {
+	cl_float scale;
+
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftGetPlanScale( test_plan, ENDDIRECTION, &scale ) );
+}
+
+TEST_F(clfft_UnitTest, setPlanScale_should_set_scale_correctly) {
+	cl_float scale;
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_FORWARD, 1.57f ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_FORWARD, &scale ) );
+	EXPECT_FLOAT_EQ( 1.57f, scale );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_BACKWARD, 3.14f ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_BACKWARD, &scale ) );
+	EXPECT_FLOAT_EQ( 3.14f, scale );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_MINUS, 4.71f ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_MINUS, &scale ) );
+	EXPECT_FLOAT_EQ( 4.71f, scale );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanScale( test_plan, CLFFT_PLUS, 6.28f ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanScale( test_plan, CLFFT_PLUS, &scale ) );
+	EXPECT_FLOAT_EQ( 6.28f, scale );
+}
+
+TEST_F(clfft_UnitTest, setPlanScale_should_fail_on_invalid_direction) {
+
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanScale( test_plan, ENDDIRECTION, 42.0 ) );
+}
+
+TEST_F(clfft_UnitTest, setPlanDimLength_should_set_dimensions_to_supported_values) {
+	cl_uint	lengthSize	= 0;
+	clfftDim	dim;
+	size_t	testLengths[ 3 ];
+
+	lengths[0] = 1;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_1D ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( 1, lengthSize );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_1D, testLengths ) );
+	EXPECT_EQ( CLFFT_1D, dim );
+	EXPECT_EQ( 1, testLengths[0] );
+
+
+	lengths[0] = 2;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_1D, testLengths ) );
+	EXPECT_EQ( 2, testLengths[0] );
+
+	lengths[0] = 4;
+	lengths[1] = 8;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_2D, dim );
+	EXPECT_EQ( 2, lengthSize );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_2D, testLengths ) );
+	EXPECT_EQ( 4, testLengths[0] );
+	EXPECT_EQ( 8, testLengths[1] );
+
+	lengths[0] = 32;
+	lengths[1] = 64;
+	lengths[2] = 128;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_3D, dim );
+	EXPECT_EQ( 3, lengthSize );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_3D, testLengths ) );
+	EXPECT_EQ( 32, testLengths[0] );
+	EXPECT_EQ( 64, testLengths[1] );
+	EXPECT_EQ( 128, testLengths[2] );
+
+	lengths[0] = 2;
+	lengths[1] = 3;
+	lengths[2] = 5;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_3D, dim );
+	EXPECT_EQ( 3, lengthSize );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_3D, testLengths ) );
+	EXPECT_EQ( 2, testLengths[0] );
+	EXPECT_EQ( 3, testLengths[1] );
+	EXPECT_EQ( 5, testLengths[2] );
+
+	lengths[0] = 4;
+	lengths[1] = 9;
+	lengths[2] = 25;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_3D, dim );
+	EXPECT_EQ( 3, lengthSize );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_3D, testLengths ) );
+	EXPECT_EQ( 4, testLengths[0] );
+	EXPECT_EQ( 9, testLengths[1] );
+	EXPECT_EQ( 25, testLengths[2] );
+
+	lengths[0] = 10;
+	lengths[1] = 144;
+	lengths[2] = 2700;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_3D, dim );
+	EXPECT_EQ( 3, lengthSize );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_3D, testLengths ) );
+	EXPECT_EQ( 10, testLengths[0] );
+	EXPECT_EQ( 144, testLengths[1] );
+	EXPECT_EQ( 2700, testLengths[2] );
+}
+
+TEST_F(clfft_UnitTest, setPlanDimLength_should_fail_if_a_length_is_set_to_zero) {
+	lengths[0] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+
+	lengths[0] = 4;
+	lengths[1] = 0;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+
+	lengths[0] = 0;
+	lengths[1] = 4;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+
+	lengths[0] = 0;
+	lengths[1] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+
+	lengths[0] = 0;
+	lengths[1] = 4;
+	lengths[2] = 4;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+
+	lengths[0] = 4;
+	lengths[1] = 0;
+	lengths[2] = 4;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+
+	lengths[0] = 4;
+	lengths[1] = 4;
+	lengths[2] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+
+	lengths[0] = 0;
+	lengths[1] = 0;
+	lengths[2] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+}
+
+TEST_F(clfft_UnitTest, setPlanDimLength_should_fail_on_radices_that_have_non_supported_factors) {
+	// currently only factors of 2, 3, and 5 are supported
+	lengths[0] = 2*3*5*7;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+
+	lengths[0] = 2*2*3*3*5*5*5*5*13;
+	lengths[1] = 17;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+
+	lengths[0] = 5*11;
+	lengths[1] = 2*2*3;
+	lengths[2] = 5*3*2*2*2*2*2*2*2*7;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+}
+
+TEST_F(clfft_UnitTest, setPlanDimLength_should_set_values_to_second_set_when_called_twice) {
+	cl_uint	lengthSize	= 0;
+	clfftDim	dim;
+	size_t	testLengths[ 1 ];
+
+	lengths[0] = 2;
+	lengths[1] = 4;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_1D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_1D, testLengths ) );
+	EXPECT_EQ( CLFFT_1D, dim );
+	EXPECT_EQ( 1, lengthSize );
+	EXPECT_EQ( 2, testLengths[ 0 ] );
+}
+
+TEST_F(clfft_UnitTest, getPlanDimLength_should_yield_correct_values) {
+	clfftDim dim;
+	cl_uint	lengthSize	= 0;
+	size_t gotten_lengths[3];
+
+	lengths[0] = 1;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_1D, gotten_lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_1D, dim );
+	EXPECT_EQ( 1, gotten_lengths[0] );
+
+	lengths[0] = 2;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_1D, gotten_lengths ) );
+	EXPECT_EQ( 2, gotten_lengths[0] );
+
+	lengths[0] = 1;
+	lengths[1] = 1;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_2D, gotten_lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_2D, dim );
+	EXPECT_EQ( 1, gotten_lengths[0] );
+	EXPECT_EQ( 1, gotten_lengths[1] );
+
+	lengths[0] = 2;
+	lengths[1] = 4;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_2D, gotten_lengths ) );
+	EXPECT_EQ( 2, gotten_lengths[0] );
+	EXPECT_EQ( 4, gotten_lengths[1] );
+
+	lengths[0] = 1;
+	lengths[1] = 1;
+	lengths[2] = 1;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_3D, gotten_lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDim( test_plan, &dim, &lengthSize ) );
+	EXPECT_EQ( CLFFT_3D, dim );
+	EXPECT_EQ( 1, gotten_lengths[0] );
+	EXPECT_EQ( 1, gotten_lengths[1] );
+	EXPECT_EQ( 1, gotten_lengths[2] );
+
+	lengths[0] = 2;
+	lengths[1] = 4;
+	lengths[2] = 8;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanLength( test_plan, CLFFT_3D, gotten_lengths ) );
+	EXPECT_EQ( CLFFT_3D, dim );
+	EXPECT_EQ( 2, gotten_lengths[0] );
+	EXPECT_EQ( 4, gotten_lengths[1] );
+	EXPECT_EQ( 8, gotten_lengths[2] );
+}
+
+TEST_F(clfft_UnitTest, getPlanLength_should_fail_when_passed_null_pointer) {
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftGetPlanLength( test_plan, CLFFT_1D, NULL ) );
+}
+
+TEST_F(clfft_UnitTest, getPlanInStride_should_fail_when_passed_null_pointer) {
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftGetPlanInStride( test_plan, CLFFT_1D, NULL ) );
+}
+
+TEST_F(clfft_UnitTest, getPlanOutStride_should_fail_when_passed_null_pointer) {
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftGetPlanOutStride( test_plan, CLFFT_1D, NULL ) );
+}
+
+TEST_F(clfft_UnitTest, createDefaultPlan_should_fail_when_passed_null_pointer) {
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, NULL));
+}
+
+TEST_F(clfft_UnitTest, createDefaultPlan_should_fail_when_passed_length_of_0) {
+    size_t length[3] = {1,1,1};
+
+    length[0] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, length));
+
+    length[0] = 1;
+    length[1] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_2D, length));
+    length[0] = 0;
+    length[1] = 1;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_2D, length));
+    length[0] = 0;
+    length[1] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_2D, length));
+
+    length[0] = 1;
+    length[1] = 1;
+    length[2] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+    length[0] = 1;
+    length[1] = 0;
+    length[2] = 1;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+    length[0] = 0;
+    length[1] = 1;
+    length[2] = 1;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+    length[0] = 0;
+    length[1] = 0;
+    length[2] = 0;
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+}
+
+TEST_F(clfft_UnitTest, createDefaultPlan_should_fail_when_passed_invalid_dimension) {
+    size_t length[3] = {1,1,1};
+
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, ENDDIMENSION, length));
+}
+
+TEST_F(clfft_UnitTest, createDefaultPlan_should_fail_when_passed_unsupported_length) {
+    size_t length[3] = {1,1,1};
+
+    length[0] = 7;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, length));
+
+    length[0] = 13;
+    length[1] = 1;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_2D, length));
+    length[0] = 1;
+    length[1] = 14;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_2D, length));
+    length[0] = 19;
+    length[1] = 22;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_2D, length));
+
+    length[0] = 1;
+    length[1] = 1;
+    length[2] = 91;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+    length[0] = 1;
+    length[1] = 17;
+    length[2] = 1;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+    length[0] = 42;
+    length[1] = 1;
+    length[2] = 1;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+    length[0] = 5;
+    length[1] = 6;
+    length[2] = 7;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftCreateDefaultPlan( &test_plan, context, CLFFT_3D, length));
+}
+
+TEST_F(clfft_UnitTest, setPlanInStride_should_set_input_strides_to_supported_values) {
+	size_t strides[ ] = { 1, 16, 16*32 };
+	size_t gotten_strides[3];
+
+	lengths[0] = 16;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_1D, strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanInStride( test_plan, CLFFT_1D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[0] );
+
+	lengths[0] = 16;
+	lengths[1] = 32;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_2D, strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanInStride( test_plan, CLFFT_2D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[0] );
+	EXPECT_EQ( 16, gotten_strides[1] );
+
+	lengths[0] = 16;
+	lengths[1] = 32;
+	lengths[2] = 64;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_3D, strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanInStride( test_plan, CLFFT_3D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[0] );
+	EXPECT_EQ( 16, gotten_strides[1] );
+	EXPECT_EQ( 16*32, gotten_strides[2] );
+}
+
+TEST_F(clfft_UnitTest, getPlanInStride_should_yield_correct_values) {
+	size_t input_strides[ ] = { 1, 8, 8*16 };
+	size_t output_strides[ ] = { 3, 99, 456789 };
+	size_t gotten_strides[ 3 ];
+
+	size_t x;
+
+	lengths[0] = 8;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_1D, input_strides ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_1D, output_strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanInStride( test_plan, CLFFT_1D, &x ) );
+	EXPECT_EQ( 1, x );
+
+	lengths[0] = 8;
+	lengths[1] = 16;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_2D, input_strides) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_2D, output_strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanInStride( test_plan, CLFFT_2D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[ 0 ] );
+	EXPECT_EQ( 8, gotten_strides[ 1 ] );
+
+	lengths[0] = 8;
+	lengths[1] = 16;
+	lengths[2] = 32;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_3D, input_strides) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_3D, output_strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanInStride( test_plan, CLFFT_3D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[ 0 ] );
+	EXPECT_EQ( 8, gotten_strides[ 1 ] );
+	EXPECT_EQ( 8*16, gotten_strides[ 2 ] );
+}
+
+TEST_F(clfft_UnitTest, setPlanOutStride_should_set_output_strides_to_supported_values) {
+	size_t strides[ ] = { 1, 16, 16*32 };
+	size_t gotten_strides[3];
+
+	lengths[0] = 16;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_1D, strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanOutStride( test_plan, CLFFT_1D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[0] );
+
+	lengths[0] = 16;
+	lengths[1] = 32;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_2D, strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanOutStride( test_plan, CLFFT_2D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[0] );
+	EXPECT_EQ( 16, gotten_strides[1] );
+
+	lengths[0] = 16;
+	lengths[1] = 32;
+	lengths[2] = 64;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_3D, strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanOutStride( test_plan, CLFFT_3D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[0] );
+	EXPECT_EQ( 16, gotten_strides[1] );
+	EXPECT_EQ( 16*32, gotten_strides[2] );
+}
+
+TEST_F(clfft_UnitTest, getPlanOutStride_should_yield_correct_values) {
+	size_t input_strides[ ] = { 3, 99, 456789 };
+	size_t output_strides[ ] = { 1, 8, 8*16 };
+	size_t gotten_strides[ 3 ];
+
+	size_t x;
+
+	lengths[0] = 8;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_1D, input_strides ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_1D, output_strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanOutStride( test_plan, CLFFT_1D, &x ) );
+	EXPECT_EQ( 1, x );
+
+	lengths[0] = 8;
+	lengths[1] = 16;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_2D, input_strides) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_2D, output_strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanOutStride( test_plan, CLFFT_2D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[ 0 ] );
+	EXPECT_EQ( 8, gotten_strides[ 1 ] );
+
+	lengths[0] = 8;
+	lengths[1] = 16;
+	lengths[2] = 32;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_3D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_3D, lengths ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanInStride( test_plan, CLFFT_3D, input_strides) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanOutStride( test_plan, CLFFT_3D, output_strides ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanOutStride( test_plan, CLFFT_3D, gotten_strides ) );
+	EXPECT_EQ( 1, gotten_strides[ 0 ] );
+	EXPECT_EQ( 8, gotten_strides[ 1 ] );
+	EXPECT_EQ( 8*16, gotten_strides[ 2 ] );
+}
+
+TEST_F(clfft_UnitTest, setPlanDistance_should_set_distance_to_supported_values) {
+	size_t inDistance, outDistance;
+	lengths[0] = 8;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 8+10, 8+2 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 18, inDistance );
+	EXPECT_EQ( 10, outDistance );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 8+3, 8+11 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 11, inDistance );
+	EXPECT_EQ( 19, outDistance );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 8, 8 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 8, inDistance );
+	EXPECT_EQ( 8, outDistance );
+
+	lengths[0] = 2;
+	lengths[1] = 2;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDim( test_plan, CLFFT_2D ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 2*2+10, 2*2+2 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 14, inDistance );
+	EXPECT_EQ( 6, outDistance );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 2*2+3, 2*2+11 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 7, inDistance );
+	EXPECT_EQ( 15, outDistance );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 2*2, 2*2 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 4, inDistance );
+	EXPECT_EQ( 4, outDistance );
+}
+
+TEST_F(clfft_UnitTest, setPlanDistance_should_fail_to_set_pitch_to_smaller_than_one_dataset) {
+	lengths[0] = 32;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 32-30, 32-30 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 32-16, 32-16 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 32-1, 32-1 ) );
+
+	lengths[0] = 32;
+	lengths[1] = 32;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_1D, lengths ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 32*32-30, 32*32-30 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 32*32-16, 32*32-16 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 32*32-1, 32*32-1 ) );
+}
+
+TEST_F(clfft_UnitTest, getPlanDistance_should_yield_correct_values) {
+	size_t inDistance;
+	size_t outDistance;
+
+	lengths[0] = 2;
+	lengths[1] = 2;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanDistance( test_plan, 4, 6 ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanDistance( test_plan, &inDistance, &outDistance ) );
+	EXPECT_EQ( 4, inDistance );
+	EXPECT_EQ( 6, outDistance );
+}
+
+TEST_F(clfft_UnitTest, setLayout_and_getLayout_should_set_and_get_layouts_respectively) {
+	clfftLayout	in, out;
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_COMPLEX_INTERLEAVED, in );
+	EXPECT_EQ( CLFFT_COMPLEX_INTERLEAVED, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_COMPLEX_PLANAR, in );
+	EXPECT_EQ( CLFFT_COMPLEX_PLANAR, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_PLANAR ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_COMPLEX_INTERLEAVED, in );
+	EXPECT_EQ( CLFFT_COMPLEX_PLANAR, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_INTERLEAVED ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_COMPLEX_PLANAR, in );
+	EXPECT_EQ( CLFFT_COMPLEX_INTERLEAVED, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_REAL, in );
+	EXPECT_EQ( CLFFT_HERMITIAN_INTERLEAVED, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_HERMITIAN_PLANAR ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_REAL, in );
+	EXPECT_EQ( CLFFT_HERMITIAN_PLANAR, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_HERMITIAN_PLANAR, CLFFT_REAL ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_HERMITIAN_PLANAR, in );
+	EXPECT_EQ( CLFFT_REAL, out );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetLayout( test_plan, &in, &out ) );
+	EXPECT_EQ( CLFFT_HERMITIAN_INTERLEAVED, in );
+	EXPECT_EQ( CLFFT_REAL, out );
+}
+
+TEST_F(clfft_UnitTest, setLayout_should_fail_on_invalid_argument_values) {
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetLayout( test_plan, CLFFT_COMPLEX_INTERLEAVED, ENDLAYOUT ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetLayout( test_plan, ENDLAYOUT, CLFFT_COMPLEX_INTERLEAVED ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetLayout( test_plan, ENDLAYOUT, ENDLAYOUT ) );
+
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetLayout( test_plan, static_cast<clfftLayout>(ENDLAYOUT+42), CLFFT_COMPLEX_INTERLEAVED ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetLayout( test_plan, CLFFT_COMPLEX_INTERLEAVED, static_cast<clfftLayout>(ENDLAYOUT+42) ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetLayout( test_plan, static_cast<clfftLayout>(ENDLAYOUT+42), static_cast<clfftLayout>(ENDLAYOUT+42)) );
+}
+
+TEST_F(clfft_UnitTest, valid_layout_combinations_for_real_to_complex_should_succeed_on_bake_1) {
+	// in place can go from real <-> hermitian interleaved
+
+	clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_INPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, valid_layout_combinations_for_real_to_complex_should_succeed_on_bake_2) {
+	clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_INPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, valid_layout_combinations_for_real_to_complex_should_succeed_on_bake_3) {
+	// out of place can go from real <-> hermitian interleaved or planar
+	clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, valid_layout_combinations_for_real_to_complex_should_succeed_on_bake_4) {
+	clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_HERMITIAN_PLANAR ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, valid_layout_combinations_for_real_to_complex_should_succeed_on_bake_5) {
+	clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, valid_layout_combinations_for_real_to_complex_should_succeed_on_bake_6) {
+	clfftCreateDefaultPlan( &test_plan, context, CLFFT_1D, lengths );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_HERMITIAN_PLANAR, CLFFT_REAL ) );
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, invalid_layout_combinations_for_real_to_complex_should_fail) {
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_REAL ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_COMPLEX_INTERLEAVED ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetLayout( test_plan, CLFFT_COMPLEX_INTERLEAVED, CLFFT_REAL ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_COMPLEX_PLANAR ) );
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetLayout( test_plan, CLFFT_COMPLEX_PLANAR, CLFFT_REAL ) );
+}
+
+TEST_F(clfft_UnitTest, real_to_planar_should_fail_on_bake) {
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_INPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_REAL, CLFFT_HERMITIAN_PLANAR ) );
+	EXPECT_EQ( CLFFT_INVALID_PLAN, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, planar_to_real_should_fail_on_bake) {
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_INPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetLayout( test_plan, CLFFT_HERMITIAN_PLANAR, CLFFT_REAL ) );
+	EXPECT_EQ( CLFFT_INVALID_PLAN, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+}
+
+TEST_F(clfft_UnitTest, setResultLocation_should_set_placeness_to_supported_values) {
+	clfftResultLocation	place;
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_INPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetResultLocation( test_plan, &place ) );
+	EXPECT_EQ( CLFFT_INPLACE, place );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetResultLocation( test_plan, &place ) );
+	EXPECT_EQ( CLFFT_OUTOFPLACE, place );
+}
+
+TEST_F(clfft_UnitTest, setResultLocation_should_fail_on_invalid_argument_values) {
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetResultLocation( test_plan, ENDPLACE ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetResultLocation( test_plan, static_cast<clfftResultLocation>(ENDPLACE+42) ) );
+}
+
+TEST_F(clfft_UnitTest, getResultLocation_should_yield_correct_values) {
+	clfftResultLocation placeness;
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_INPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetResultLocation( test_plan, &placeness ) );
+	EXPECT_EQ( CLFFT_INPLACE, placeness );
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetResultLocation( test_plan, CLFFT_OUTOFPLACE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetResultLocation( test_plan, &placeness ) );
+	EXPECT_EQ( CLFFT_OUTOFPLACE, placeness );
+}
+
+TEST_F(clfft_UnitTest, SetPlanTransposeResult_should_set_resulttransposed_to_supported_values) {
+	clfftResultTransposed transposed;
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanTransposeResult( test_plan, CLFFT_TRANSPOSED ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanTransposeResult( test_plan, &transposed ) );
+	EXPECT_EQ( CLFFT_TRANSPOSED, transposed );
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetPlanTransposeResult( test_plan, CLFFT_NOTRANSPOSE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanTransposeResult( test_plan, &transposed ) );
+	EXPECT_EQ( CLFFT_NOTRANSPOSE, transposed );
+}
+
+TEST_F(clfft_UnitTest, setPlanTransposeResult_should_fail_on_invalid_argument_values) {
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanTransposeResult( test_plan, ENDTRANSPOSED ) );
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanTransposeResult( test_plan, static_cast<clfftResultTransposed>(ENDTRANSPOSED+42) ) );
+}
+
+TEST_F(clfft_UnitTest, getPlanTransposeResult_should_yield_correct_values) {
+	clfftResultTransposed transposed;
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanTransposeResult( test_plan, CLFFT_TRANSPOSED ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanTransposeResult( test_plan, &transposed ) );
+	EXPECT_EQ( CLFFT_TRANSPOSED, transposed );
+
+	ASSERT_EQ( CLFFT_SUCCESS, clfftSetPlanTransposeResult( test_plan, CLFFT_NOTRANSPOSE ) );
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetPlanTransposeResult( test_plan, &transposed ) );
+	EXPECT_EQ( CLFFT_NOTRANSPOSE, transposed );
+}
+
+TEST_F(clfft_UnitTest, getTmpBufSize_should_fail_on_unbaked_plan) {
+	size_t buffersize;
+	EXPECT_EQ( CLFFT_INVALID_OPERATION, clfftGetTmpBufSize( test_plan, &buffersize ) );
+}
+
+TEST_F(clfft_UnitTest, getTmpBufSize_should_succeed_on_baked_plan) {
+	size_t buffersize;
+	ASSERT_EQ( CLFFT_SUCCESS, clfftBakePlan(test_plan, 1, &queue, NULL, NULL ));
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetTmpBufSize( test_plan, &buffersize ) );
+}
+
+TEST_F(clfft_UnitTest, bake_plan_should_fail_to_bake_multi_GPU_plan) {
+	cl_uint number_of_gpus = 2;
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftBakePlan(test_plan, number_of_gpus, &queue, NULL, NULL ));
+}
+
+void CL_CALLBACK foofies( clfftPlanHandle spoon, void* fork){}
+
+TEST_F(clfft_UnitTest, bake_plan_should_fail_on_non_null_function_pointer) {
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftBakePlan(test_plan, 1, &queue, &foofies, NULL ));
+}
+
+TEST_F(clfft_UnitTest, bake_plan_should_fail_on_non_null_user_data) {
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftBakePlan(test_plan, 1, &queue, NULL, (void*)0xf00f1e5 ));
+}
+
+TEST_F(clfft_UnitTest, set_dimension_should_fail_on_invalid_value) {
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanDim( test_plan, ENDDIMENSION ) );
+}
+
+TEST_F(clfft_UnitTest, set_precision_should_fail_on_invalid_value) {
+	EXPECT_EQ( CLFFT_INVALID_ARG_VALUE, clfftSetPlanPrecision( test_plan, ENDPRECISION ) );
+}
+
+TEST_F(clfft_UnitTest, set_length_should_fail_on_null_pointer) {
+    size_t* lengths = NULL;
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftSetPlanLength( test_plan, CLFFT_2D, lengths ) );
+}
+
+TEST_F(clfft_UnitTest, set_length_should_fail_on_invalid_dimension) {
+    size_t lengths[3] = {2,4,8};
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanLength( test_plan, ENDDIMENSION, lengths ) );
+}
+
+TEST_F(clfft_UnitTest, get_length_should_fail_on_invalid_dimension) {
+    size_t lengths[3] = {2,4,8};
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftGetPlanLength( test_plan, ENDDIMENSION, lengths ) );
+}
+
+TEST_F(clfft_UnitTest, set_in_stride_should_fail_on_null_pointer) {
+    size_t* strides = NULL;
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftSetPlanInStride( test_plan, CLFFT_2D, strides ) );
+}
+
+TEST_F(clfft_UnitTest, set_in_stride_should_fail_on_invalid_dimension) {
+    size_t strides[3] = {2,4,8};
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanInStride( test_plan, ENDDIMENSION, strides ) );
+}
+
+TEST_F(clfft_UnitTest, get_in_stride_should_fail_on_invalid_dimension) {
+    size_t strides[3] = {2,4,8};
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftGetPlanInStride( test_plan, ENDDIMENSION, strides ) );
+}
+
+TEST_F(clfft_UnitTest, set_out_stride_should_fail_on_null_pointer) {
+    size_t* strides = NULL;
+	EXPECT_EQ( CLFFT_INVALID_HOST_PTR, clfftSetPlanOutStride( test_plan, CLFFT_2D, strides ) );
+}
+
+TEST_F(clfft_UnitTest, set_out_stride_should_fail_on_invalid_dimension) {
+    size_t strides[3] = {2,4,8};
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftSetPlanOutStride( test_plan, ENDDIMENSION, strides ) );
+}
+
+TEST_F(clfft_UnitTest, get_out_stride_should_fail_on_invalid_dimension) {
+    size_t strides[3] = {2,4,8};
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftGetPlanOutStride( test_plan, ENDDIMENSION, strides ) );
+}
+
+TEST_F(clfft_UnitTest, enqueue_transform_should_fail_with_num_queues_and_events_greater_than_1) {
+    cl_mem* cl_mem_input = NULL;
+    cl_mem* cl_mem_output = NULL;
+
+	EXPECT_EQ( CLFFT_NOTIMPLEMENTED, clfftEnqueueTransform(
+		test_plan, CLFFT_FORWARD, 2, &queue, 0, NULL, &outEvent, cl_mem_input, cl_mem_output, NULL ));
+}
+
+TEST_F(clfft_UnitTest, get_version_should_get_a_version_number) {
+    cl_uint major = 0xb00f1e5;
+    cl_uint minor = 0xd00f1e5;
+    cl_uint patch = 0xf00f1e5;
+	EXPECT_EQ( CLFFT_SUCCESS, clfftGetVersion( &major, &minor, &patch ) );
+    EXPECT_NE( 0xb00f1e5, major);
+    EXPECT_NE( 0xd00f1e5, minor);
+    EXPECT_NE( 0xf00f1e5, patch);
+}
+
+TEST_F(clfft_UnitTest, setup_should_succeed_given_a_setup_data_pointer) {
+    clfftSetupData data;
+    data.major = 6;
+    data.minor = 8;
+    data.patch = 42;
+    data.debugFlags = 0xf1a95;
+
+	EXPECT_EQ( CLFFT_SUCCESS, clfftSetup( &data ) );
+}
diff --git a/src/tests/unit_test_persistent_plans.cpp b/src/tests/unit_test_persistent_plans.cpp
new file mode 100644
index 00000000..d122b50a
--- /dev/null
+++ b/src/tests/unit_test_persistent_plans.cpp
@@ -0,0 +1,276 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+//TODO persistent plan feature : caching kernel binaries for later reload
+#if defined(_WIN32)
+
+#include <gtest/gtest.h>
+#include <complex>
+#include "clFFT.h"
+#include "../client/openCL.misc.h"
+#include "clfft.typedefs.h"
+
+class clfft_PersistentPlans : public ::testing::Test {
+protected:
+	clfft_PersistentPlans(){}
+	virtual ~clfft_PersistentPlans(){}
+	virtual void SetUp(){}
+	virtual void TearDown(){}
+};
+
+void test_persistent_plan( size_t * length, clfftLayout input_layout, clfftLayout output_layout, clfftResultLocation result_location )
+{
+	try
+	{
+		clfftDim cl_dimension;
+		if( length[dimy] == 1 && length[dimz] == 1 )
+			cl_dimension = CLFFT_1D;
+		else if( length[dimz] == 1 )
+			cl_dimension = CLFFT_2D;
+		else
+			cl_dimension = CLFFT_3D;
+
+		fftw_dim fftw_dimension = static_cast<fftw_dim>(cl_dimension);
+
+		{
+			clfft_single write_fft( cl_dimension, length, NULL, 1, 0);
+			write_fft.input.set_all_data_points_on_all_passes_to_value(1.0f,0.0f);
+			write_fft.set_forward_transform();
+
+			if( result_location == CLFFT_INPLACE )
+				write_fft.set_in_place();
+			else
+				write_fft.set_out_of_place();
+
+			write_fft.input_layout(input_layout);
+			write_fft.output_layout(output_layout);
+			write_fft.write_plan_to_file("wakkawakka.fft");
+		}
+
+		clfft_single read_fft( cl_dimension, length, NULL, 1, 0);
+		read_fft.input.set_all_data_points_on_all_passes_to_value(1.0f,0.0f);
+		read_fft.set_forward_transform();
+		read_fft.read_plan_from_file("wakkawakka.fft");
+		read_fft.transform();
+
+		fftw_single reference( fftw_dimension, length);
+		reference.set_forward_transform();
+		reference.data.set_all_data_points_on_all_passes_to_value(1.0f,0.0f);
+		reference.transform();
+
+		if( read_fft.placeness() == CLFFT_INPLACE )
+			EXPECT_EQ( true, read_fft.input.is_equal_to( reference.data));
+		else
+			EXPECT_EQ( true, read_fft.output.is_equal_to( reference.data));
+	}
+	catch( const std::exception& err )
+	{
+		handle_exception(err);
+	}
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_1d_inplace_interleaved)
+{
+	size_t length[3] = {1024,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_1d_inplace_interleaved)
+{
+	size_t length[3] = {32768,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_2d_inplace_interleaved)
+{
+	size_t length[3] = {1024,1024,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_normal_2d_inplace_interleaved)
+{
+	size_t length[3] = {4096,2,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_large_2d_inplace_interleaved)
+{
+	size_t length[3] = {4096,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_large_2d_inplace_interleaved)
+{
+	size_t length[3] = {2,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_normal_3d_inplace_interleaved)
+{
+	size_t length[3] = {32,32,32};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_mixed_radices_3d_inplace_interleaved)
+{
+	size_t length[3] = {2*3*5,2*3*5,2*3*5};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_1d_inplace_planar)
+{
+	size_t length[3] = {1024,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_1d_inplace_planar)
+{
+	size_t length[3] = {32768,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_2d_inplace_planar)
+{
+	size_t length[3] = {1024,1024,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_normal_2d_inplace_planar)
+{
+	size_t length[3] = {4096,2,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_large_2d_inplace_planar)
+{
+	size_t length[3] = {4096,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_large_2d_inplace_planar)
+{
+	size_t length[3] = {2,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_normal_3d_inplace_planar)
+{
+	size_t length[3] = {32,32,32};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_mixed_radices_3d_inplace_planar)
+{
+	size_t length[3] = {2*3*5,2*3*5,2*3*5};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_INPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_1d_outofplace_interleaved)
+{
+	size_t length[3] = {1024,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_1d_outofplace_interleaved)
+{
+	size_t length[3] = {32768,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_2d_outofplace_interleaved)
+{
+	size_t length[3] = {1024,1024,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_normal_2d_outofplace_interleaved)
+{
+	size_t length[3] = {4096,2,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_large_2d_outofplace_interleaved)
+{
+	size_t length[3] = {4096,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_large_2d_outofplace_interleaved)
+{
+	size_t length[3] = {2,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_normal_3d_outofplace_interleaved)
+{
+	size_t length[3] = {32,32,32};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_mixed_radices_3d_outofplace_interleaved)
+{
+	size_t length[3] = {2*3*5,2*3*5,2*3*5};
+	test_persistent_plan( length, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_1d_outofplace_planar)
+{
+	size_t length[3] = {1024,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_1d_outofplace_planar)
+{
+	size_t length[3] = {32768,1,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_2d_outofplace_planar)
+{
+	size_t length[3] = {1024,1024,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_normal_2d_outofplace_planar)
+{
+	size_t length[3] = {4096,2,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_large_large_2d_outofplace_planar)
+{
+	size_t length[3] = {4096,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_large_2d_outofplace_planar)
+{
+	size_t length[3] = {2,4096,1};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_normal_normal_normal_3d_outofplace_planar)
+{
+	size_t length[3] = {32,32,32};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+
+TEST_F(clfft_PersistentPlans, DISABLED_mixed_radices_3d_outofplace_planar)
+{
+	size_t length[3] = {2*3*5,2*3*5,2*3*5};
+	test_persistent_plan( length, CLFFT_COMPLEX_PLANAR, CLFFT_COMPLEX_PLANAR, CLFFT_OUTOFPLACE );
+}
+#endif