/*******************************************************************************
* Copyright 2021 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

#include "prwarpaffine_t.h"

/*
//  Name:               ipprWarpAffineGetBufSize_T
//  Purpose:            Computes the size of an external work buffer (in bytes)
//  Parameters:
//    srcVOI            region of interest of source volume
//    dstVOI            region of interest of destination volume
//    nChannel          number of channels
//    interpolation     type of interpolation to perform for resizing the input volume:
//                        IPPI_INTER_NN      nearest neighbor interpolation
//                        IPPI_INTER_LINEAR  trilinear interpolation
//                        IPPI_INTER_CUBIC   tricubic polynomial interpolation
//                      including two-parameter cubic filters:
//                        IPPI_INTER_CUBIC2P_BSPLINE      B-spline filter (1, 0)
//                        IPPI_INTER_CUBIC2P_CATMULLROM   Catmull-Rom filter (0, 1/2)
//                        IPPI_INTER_CUBIC2P_B05C03       special filter with parameters (1/2, 3/10)
//    pSize             pointer to the external buffer`s size
//  Returns:
//    ippStsNoErr             no errors
//    ippStsNullPtrErr        pSize == NULL
//    ippStsSizeErr           size of source or destination volumes is less or equal zero
//    ippStsNumChannelsErr    number of channels is not 1
//    ippStsInterpolationErr  (interpolation != IPPI_INTER_NN) &&
//                            (interpolation != IPPI_INTER_LINEAR) &&
//                            (interpolation != IPPI_INTER_CUBIC) &&
//                            (interpolation != IPPI_INTER_CUBIC2P_BSPLINE) &&
//                            (interpolation != IPPI_INTER_CUBIC2P_CATMULLROM) &&
//                            (interpolation != IPPI_INTER_CUBIC2P_B05C03)
*/

IPPFUN(IppStatus, ipprWarpAffineGetBufSize_T, (IpprVolume srcVolume, IpprCuboid srcVOI, IpprCuboid dstVOI, const double coeffs[3][4], int nChannel, int interpolation, int* pSize))
{
    return ipprWarpAffineGetBufSize(srcVolume, srcVOI, dstVOI, coeffs, nChannel, interpolation, pSize);
}


IppStatus ipprWarpAffine_8u_C1PV_Fun(int t, void* args) {
    WarpAffineThreadData_8u_PV* tData = (WarpAffineThreadData_8u_PV*)args;
    const Ipp8u* const *pSrc = tData->pSrc;
    Ipp8u* const *pDst = tData->pDst;
    IpprVolume srcVolume = tData->srcVolume;
    IpprCuboid srcVOI = tData->srcVOI;
    IpprCuboid dstVOI = tData->dstVOI;
    int srcStep = tData->srcStep;
    int dstStep = tData->dstStep;
    const double(*coeffs)[4] = tData->coeffs;
    int interpolation = tData->interpolation;
    Ipp8u* pBuffer = tData->pBuffer;
    int numSlices = tData->numSlices;
    int sizeSlice = tData->sizeSlice;
    int lastSlice = tData->lastSlice;

    IpprCuboid curDstVOI = dstVOI;
    
    curDstVOI.depth = sizeSlice;
    if (t > (lastSlice - 1)) {
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth + lastSlice;
    }
    else {
        curDstVOI.depth += 1;
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth;
    }

    return ipprWarpAffine_8u_C1PV(pSrc, srcVolume, srcStep, srcVOI, pDst, dstStep, curDstVOI, coeffs, interpolation, pBuffer);
}

IppStatus ipprWarpAffine_16u_C1PV_Fun(int t, void* args) {
    WarpAffineThreadData_16u_PV* tData = (WarpAffineThreadData_16u_PV*)args;
    const Ipp16u* const *pSrc = tData->pSrc;
    Ipp16u* const *pDst = tData->pDst;
    IpprVolume srcVolume = tData->srcVolume;
    IpprCuboid srcVOI = tData->srcVOI;
    IpprCuboid dstVOI = tData->dstVOI;
    int srcStep = tData->srcStep;
    int dstStep = tData->dstStep;
    const double(*coeffs)[4] = tData->coeffs;
    int interpolation = tData->interpolation;
    Ipp8u* pBuffer = tData->pBuffer;
    int numSlices = tData->numSlices;
    int sizeSlice = tData->sizeSlice;
    int lastSlice = tData->lastSlice;

    IpprCuboid curDstVOI = dstVOI;

    curDstVOI.depth = sizeSlice;
    if (t > (lastSlice - 1)) {
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth + lastSlice;
    }
    else {
        curDstVOI.depth += 1;
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth;
    }

    return ipprWarpAffine_16u_C1PV(pSrc, srcVolume, srcStep, srcVOI, pDst, dstStep, curDstVOI, coeffs, interpolation, pBuffer);
}

IppStatus ipprWarpAffine_32f_C1PV_Fun(int t, void* args) {
    WarpAffineThreadData_32f_PV* tData = (WarpAffineThreadData_32f_PV*)args;
    const Ipp32f* const *pSrc = tData->pSrc;
    Ipp32f* const *pDst = tData->pDst;
    IpprVolume srcVolume = tData->srcVolume;
    IpprCuboid srcVOI = tData->srcVOI;
    IpprCuboid dstVOI = tData->dstVOI;
    int srcStep = tData->srcStep;
    int dstStep = tData->dstStep;
    const double(*coeffs)[4] = tData->coeffs;
    int interpolation = tData->interpolation;
    Ipp8u* pBuffer = tData->pBuffer;
    int numSlices = tData->numSlices;
    int sizeSlice = tData->sizeSlice;
    int lastSlice = tData->lastSlice;

    IpprCuboid curDstVOI = dstVOI;
    
    curDstVOI.depth = sizeSlice;
    if (t > (lastSlice - 1)) {
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth + lastSlice;
    }
    else {
        curDstVOI.depth += 1;
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth;
    }

    return ipprWarpAffine_32f_C1PV(pSrc, srcVolume, srcStep, srcVOI, pDst, dstStep, curDstVOI, coeffs, interpolation, pBuffer);
}

IppStatus ipprWarpAffine_8u_C1V_Fun(int t, void* args) {
    WarpAffineThreadData_8u_V* tData = (WarpAffineThreadData_8u_V*)args;
    const Ipp8u* pSrc = tData->pSrc;
    Ipp8u* pDst = tData->pDst;
    IpprVolume srcVolume = tData->srcVolume;
    IpprCuboid srcVOI = tData->srcVOI;
    IpprCuboid dstVOI = tData->dstVOI;
    int srcStep = tData->srcStep;
    int dstStep = tData->dstStep;
    int srcPlaneStep = tData->srcPlaneStep;
    int dstPlaneStep = tData->dstPlaneStep;
    const double(*coeffs)[4] = tData->coeffs;
    int interpolation = tData->interpolation;
    Ipp8u* pBuffer = tData->pBuffer;
    int numSlices = tData->numSlices;
    int sizeSlice = tData->sizeSlice;
    int lastSlice = tData->lastSlice;

    IpprCuboid curDstVOI = dstVOI;

    curDstVOI.depth = sizeSlice;
    if (t > (lastSlice - 1)) {
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth + lastSlice;
    }
    else {
        curDstVOI.depth += 1;
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth;
    }

    return ipprWarpAffine_8u_C1V(pSrc, srcVolume, srcStep, srcPlaneStep, srcVOI, pDst, dstStep, dstPlaneStep, curDstVOI, coeffs, interpolation, pBuffer);
}

IppStatus ipprWarpAffine_16u_C1V_Fun(int t, void* args) {
    WarpAffineThreadData_16u_V* tData = (WarpAffineThreadData_16u_V*)args;
    const Ipp16u* pSrc = tData->pSrc;
    Ipp16u* pDst = tData->pDst;
    IpprVolume srcVolume = tData->srcVolume;
    IpprCuboid srcVOI = tData->srcVOI;
    IpprCuboid dstVOI = tData->dstVOI;
    int srcStep = tData->srcStep;
    int dstStep = tData->dstStep;
    int srcPlaneStep = tData->srcPlaneStep;
    int dstPlaneStep = tData->dstPlaneStep;
    const double(*coeffs)[4] = tData->coeffs;
    int interpolation = tData->interpolation;
    Ipp8u* pBuffer = tData->pBuffer;
    int numSlices = tData->numSlices;
    int sizeSlice = tData->sizeSlice;
    int lastSlice = tData->lastSlice;

    IpprCuboid curDstVOI = dstVOI;

    curDstVOI.depth = sizeSlice;
    if (t > (lastSlice - 1)) {
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth + lastSlice;
    }
    else {
        curDstVOI.depth += 1;
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth;
    }

    return ipprWarpAffine_16u_C1V(pSrc, srcVolume, srcStep, srcPlaneStep, srcVOI, pDst, dstStep, dstPlaneStep, curDstVOI, coeffs, interpolation, pBuffer);
}

IppStatus ipprWarpAffine_32f_C1V_Fun(int t, void* args) {
    WarpAffineThreadData_32f_V* tData = (WarpAffineThreadData_32f_V*)args;
    const Ipp32f* pSrc = tData->pSrc;
    Ipp32f* pDst = tData->pDst;
    IpprVolume srcVolume = tData->srcVolume;
    IpprCuboid srcVOI = tData->srcVOI;
    IpprCuboid dstVOI = tData->dstVOI;
    int srcStep = tData->srcStep;
    int dstStep = tData->dstStep;
    int srcPlaneStep = tData->srcPlaneStep;
    int dstPlaneStep = tData->dstPlaneStep;
    const double(*coeffs)[4] = tData->coeffs;
    int interpolation = tData->interpolation;
    Ipp8u* pBuffer = tData->pBuffer;
    int numSlices = tData->numSlices;
    int sizeSlice = tData->sizeSlice;
    int lastSlice = tData->lastSlice;

    IpprCuboid curDstVOI = dstVOI;

    curDstVOI.depth = sizeSlice;
    if (t > (lastSlice - 1)) {
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth + lastSlice;
    }
    else {
        curDstVOI.depth += 1;
        curDstVOI.z = dstVOI.z + t * curDstVOI.depth;
    }

    return ipprWarpAffine_32f_C1V(pSrc, srcVolume, srcStep, srcPlaneStep, srcVOI, pDst, dstStep, dstPlaneStep, curDstVOI, coeffs, interpolation, pBuffer);
}

/*
//  Names:              ipprWarpAffine_8u/16u/32f_C1PV_T
//  Purpose:            Performs AFFINE TRANSFORM of the source volume by matrix a[3][4]
//                            |X'|   |a00 a01 a02|   |X|   |a03|
//                            |Y'| = |a10 a11 a12| * |Y| + |a13|
//                            |Z'|   |a20 a21 a22|   |Z|   |a23|
//  Parameters:
//    pSrc              array of pointers to 2D planes in source volume data
//    srcVolume         size of source volume
//    srcStep           step in every plane of source volume
//    srcVOI            volume of interest of source volume
//    pDst              array of pointers to 2D planes in destination volume data
//    dstStep           step in every plane of destination volume
//    dstVOI            volume of interest of destination volume
//    coeffs            affine transform matrix
//    interpolation     type of interpolation to perform for resizing the input volume:
//                        IPPI_INTER_NN      nearest neighbor interpolation
//                        IPPI_INTER_LINEAR  trilinear interpolation
//                        IPPI_INTER_CUBIC   tricubic polynomial interpolation
//                      including two-parameter cubic filters:
//                        IPPI_INTER_CUBIC2P_BSPLINE    B-spline filter (1, 0)
//                        IPPI_INTER_CUBIC2P_CATMULLROM Catmull-Rom filter (0, 1/2)
//                        IPPI_INTER_CUBIC2P_B05C03     special filter with parameters (1/2, 3/10)
//    pBuffer           pointer to work buffer
//  Returns:
//    ippStsNoErr             no errors
//    ippStsNullPtrErr        pSrc == NULL or pDst == NULL or pBuffer == NULL
//    ippStsSizeErr           width or height or depth of volumes is less or equal zero
//    ippStsCoeffErr          unallowable values of the transformation coefficients
//    ippStsInterpolationErr  interpolation has an illegal value
//    ippStsWrongIntersectVOI srcVOI has not intersection with the source volume, no operation
*/

IPPFUN(IppStatus, ipprWarpAffine_8u_C1PV_T, (const Ipp8u* const pSrc[], IpprVolume srcVolume, int srcStep, IpprCuboid srcVOI, Ipp8u* const pDst[], int dstStep, IpprCuboid dstVOI, const double coeffs[3][4], int interpolation, Ipp8u* pBuffer))
{
    /* check input parameters */
    if (pSrc == NULL || pDst == NULL) return ippStsNullPtrErr;
    if (coeffs == NULL) return ippStsNullPtrErr;
    if (srcVolume.height <= 0 || srcVolume.width <= 0 || srcVolume.depth <= 0) return ippStsSizeErr;
    if (srcVOI.x < 0 || srcVOI.width <= 0 || srcVOI.y < 0 || srcVOI.height <= 0 || srcVOI.z < 0 || srcVOI.depth <= 0) return ippStsSizeErr;
    if (dstVOI.x < 0 || dstVOI.width <= 0 || dstVOI.y < 0 || dstVOI.height <= 0 || dstVOI.z < 0 || dstVOI.depth <= 0) return ippStsSizeErr;
    UNREFERENCED_PARAMETER(pBuffer);

    /* check matrix */
    double det = coeffs[0][0] * (coeffs[1][1] * coeffs[2][2] - coeffs[1][2] * coeffs[2][1]) -
        coeffs[0][1] * (coeffs[1][0] * coeffs[2][2] - coeffs[1][2] * coeffs[2][0]) +
        coeffs[0][2] * (coeffs[1][0] * coeffs[2][1] - coeffs[1][1] * coeffs[2][0]);

    if ((det > -EPSD) && (det < EPSD)) return ippStsCoeffErr;

    /* check interpolation mode */
    if (interpolation != IPPI_INTER_NN &&
        interpolation != IPPI_INTER_LINEAR &&
        interpolation != IPPI_INTER_CUBIC &&
        interpolation != IPPI_INTER_CUBIC2P_BSPLINE &&
        interpolation != IPPI_INTER_CUBIC2P_CATMULLROM &&
        interpolation != IPPI_INTER_CUBIC2P_B05C03)   return ippStsInterpolationErr;

    int numThreads;
    int numSlices = 0;
    int sizeSlice = 0;
    int lastSlice = 0;
    IppStatus statusAll;
    ippGetNumThreads_T(&numThreads);
    numSlices = numThreads;
    
    sizeSlice = IPP_MAX(dstVOI.depth / numSlices, 1);
    numSlices = IPP_MIN(numSlices, dstVOI.depth / sizeSlice);

    if (numThreads == 1 || numSlices <= 1) {
        return ipprWarpAffine_8u_C1PV(pSrc, srcVolume, srcStep, srcVOI, pDst, dstStep, dstVOI, coeffs, interpolation, pBuffer);
    }
    else {
        lastSlice = dstVOI.depth % numSlices;
        WarpAffineThreadData_8u_PV tData;
        setWarpAffineThreadData_8u_PV(pSrc, pDst, srcVolume, srcVOI, dstVOI, srcStep, dstStep, coeffs, interpolation, pBuffer, numSlices, sizeSlice, lastSlice, &tData);
        statusAll = ippParallelFor_T(numSlices, &tData, ipprWarpAffine_8u_C1PV_Fun);
    }
    return statusAll;
}

IPPFUN(IppStatus, ipprWarpAffine_16u_C1PV_T, (const Ipp16u* const pSrc[], IpprVolume srcVolume, int srcStep, IpprCuboid srcVOI, Ipp16u* const pDst[], int dstStep, IpprCuboid dstVOI, const double coeffs[3][4], int interpolation, Ipp8u* pBuffer))
{
    /* check input parameters */
    if (pSrc == NULL || pDst == NULL) return ippStsNullPtrErr;
    if (coeffs == NULL) return ippStsNullPtrErr;
    if (srcVolume.height <= 0 || srcVolume.width <= 0 || srcVolume.depth <= 0) return ippStsSizeErr;
    if (srcVOI.x < 0 || srcVOI.width <= 0 || srcVOI.y < 0 || srcVOI.height <= 0 || srcVOI.z < 0 || srcVOI.depth <= 0) return ippStsSizeErr;
    if (dstVOI.x < 0 || dstVOI.width <= 0 || dstVOI.y < 0 || dstVOI.height <= 0 || dstVOI.z < 0 || dstVOI.depth <= 0) return ippStsSizeErr;
    UNREFERENCED_PARAMETER(pBuffer);

    /* check matrix */
    double det = coeffs[0][0] * (coeffs[1][1] * coeffs[2][2] - coeffs[1][2] * coeffs[2][1]) -
        coeffs[0][1] * (coeffs[1][0] * coeffs[2][2] - coeffs[1][2] * coeffs[2][0]) +
        coeffs[0][2] * (coeffs[1][0] * coeffs[2][1] - coeffs[1][1] * coeffs[2][0]);

    if ((det > -EPSD) && (det < EPSD)) return ippStsCoeffErr;

    /* check interpolation mode */
    if (interpolation != IPPI_INTER_NN &&
        interpolation != IPPI_INTER_LINEAR &&
        interpolation != IPPI_INTER_CUBIC &&
        interpolation != IPPI_INTER_CUBIC2P_BSPLINE &&
        interpolation != IPPI_INTER_CUBIC2P_CATMULLROM &&
        interpolation != IPPI_INTER_CUBIC2P_B05C03)   return ippStsInterpolationErr;

    int numThreads;
    int numSlices = 0;
    int sizeSlice = 0;
    int lastSlice = 0;
    IppStatus statusAll;
    ippGetNumThreads_T(&numThreads);
    numSlices = numThreads;
    
    sizeSlice = IPP_MAX(dstVOI.depth / numSlices, 1);
    numSlices = IPP_MIN(numSlices, dstVOI.depth / sizeSlice);

    if (numThreads == 1 || numSlices <= 1) {
        return ipprWarpAffine_16u_C1PV(pSrc, srcVolume, srcStep, srcVOI, pDst, dstStep, dstVOI, coeffs, interpolation, pBuffer);
    }
    else {
        lastSlice = dstVOI.depth % numSlices;
        WarpAffineThreadData_16u_PV tData;
        setWarpAffineThreadData_16u_PV(pSrc, pDst, srcVolume, srcVOI, dstVOI, srcStep, dstStep, coeffs, interpolation, pBuffer, numSlices, sizeSlice, lastSlice, &tData);
        statusAll = ippParallelFor_T(numSlices, &tData, ipprWarpAffine_16u_C1PV_Fun);
    }
    return statusAll;
}

IPPFUN(IppStatus, ipprWarpAffine_32f_C1PV_T, (const Ipp32f* const pSrc[], IpprVolume srcVolume, int srcStep, IpprCuboid srcVOI, Ipp32f* const pDst[], int dstStep, IpprCuboid dstVOI, const double coeffs[3][4], int interpolation, Ipp8u* pBuffer))
{
    /* check input parameters */
    if (pSrc == NULL || pDst == NULL) return ippStsNullPtrErr;
    if (coeffs == NULL) return ippStsNullPtrErr;
    if (srcVolume.height <= 0 || srcVolume.width <= 0 || srcVolume.depth <= 0) return ippStsSizeErr;
    if (srcVOI.x < 0 || srcVOI.width <= 0 || srcVOI.y < 0 || srcVOI.height <= 0 || srcVOI.z < 0 || srcVOI.depth <= 0) return ippStsSizeErr;
    if (dstVOI.x < 0 || dstVOI.width <= 0 || dstVOI.y < 0 || dstVOI.height <= 0 || dstVOI.z < 0 || dstVOI.depth <= 0) return ippStsSizeErr;
    UNREFERENCED_PARAMETER(pBuffer);

    /* check matrix */
    double det = coeffs[0][0] * (coeffs[1][1] * coeffs[2][2] - coeffs[1][2] * coeffs[2][1]) -
        coeffs[0][1] * (coeffs[1][0] * coeffs[2][2] - coeffs[1][2] * coeffs[2][0]) +
        coeffs[0][2] * (coeffs[1][0] * coeffs[2][1] - coeffs[1][1] * coeffs[2][0]);

    if ((det > -EPSD) && (det < EPSD)) return ippStsCoeffErr;

    /* check interpolation mode */
    if (interpolation != IPPI_INTER_NN &&
        interpolation != IPPI_INTER_LINEAR &&
        interpolation != IPPI_INTER_CUBIC &&
        interpolation != IPPI_INTER_CUBIC2P_BSPLINE &&
        interpolation != IPPI_INTER_CUBIC2P_CATMULLROM &&
        interpolation != IPPI_INTER_CUBIC2P_B05C03)   return ippStsInterpolationErr;

    int numThreads;
    int numSlices = 0;
    int sizeSlice = 0;
    int lastSlice = 0;
    IppStatus statusAll;
    ippGetNumThreads_T(&numThreads);
    numSlices = numThreads;
    
    sizeSlice = IPP_MAX(dstVOI.depth / numSlices, 1);
    numSlices = IPP_MIN(numSlices, dstVOI.depth / sizeSlice);

    if (numThreads == 1 || numSlices <= 1) {
        return ipprWarpAffine_32f_C1PV(pSrc, srcVolume, srcStep, srcVOI, pDst, dstStep, dstVOI, coeffs, interpolation, pBuffer);
    }
    else {
        lastSlice = dstVOI.depth % numSlices;
        WarpAffineThreadData_32f_PV tData;
        setWarpAffineThreadData_32f_PV(pSrc, pDst, srcVolume, srcVOI, dstVOI, srcStep, dstStep, coeffs, interpolation, pBuffer, numSlices, sizeSlice, lastSlice, &tData);
        statusAll = ippParallelFor_T(numSlices, &tData, ipprWarpAffine_32f_C1PV_Fun);
    }
    return statusAll;
}

/*
//  Names:              ipprWarpAffine_<mode>
//  Purpose:            Performs AFFINE transform of the source volume by matrix a[3][4]
//                            |X'|   |a00 a01 a02|   |X|   |a03|
//                            |Y'| = |a10 a11 a12| * |Y| + |a13|
//                            |Z'|   |a20 a21 a22|   |Z|   |a23|
//  Parameters:
//    pSrc              array of pointers to planes in source volume data
//    srcVolume         size of source volume
//    srcStep           step in every plane of source volume
//    srcPlaneStep      step between planes of source volume (8u_C1V_T, 16u_C1V_T, 32f_C1V_T modes)
//    srcVOI            volume of interest of source volume
//    pDst              array of pointers to planes in destination volume data
//    dstStep           step in every plane of destination volume
//    dstPlaneStep      step between planes of destination volume (8u_C1V_T, 16u_C1V_T, 32f_C1V_T modes)
//    dstVOI            volume of interest of destination volume
//    coeffs            affine transform matrix
//    interpolation     type of interpolation to perform for affine transform the input volume:
//                        IPPI_INTER_NN      nearest neighbor interpolation
//                        IPPI_INTER_LINEAR  trilinear interpolation
//                        IPPI_INTER_CUBIC   tricubic polynomial interpolation
//                      including two-parameter cubic filters:
//                        IPPI_INTER_CUBIC2P_BSPLINE      B-spline filter (1, 0)
//                        IPPI_INTER_CUBIC2P_CATMULLROM   Catmull-Rom filter (0, 1/2)
//                        IPPI_INTER_CUBIC2P_B05C03       special filter with parameters (1/2, 3/10)
//    pBuffer           pointer to work buffer
//  Returns:
//    ippStsNoErr             no errors
//    ippStsNullPtrErr        pSrc == NULL or pDst == NULL or pBuffer == NULL or coeffs == NULL
//    ippStsSizeErr           width or height or depth of source volume is less or equal zero
//    ippStsWrongIntersectVOI VOI hasn't an intersection with the source or destination volume
//    ippStsCoeffErr          determinant of the transform matrix Aij is equal to zero
//    ippStsInterpolationErr  interpolation has an illegal value
//  Notes:
//    <mode> are 8u_C1V_T or 16u_C1V_T or 32f_C1V_T
*/

IPPFUN(IppStatus, ipprWarpAffine_8u_C1V_T, (const Ipp8u* pSrc, IpprVolume srcVolume, int srcStep, int srcPlaneStep, IpprCuboid srcVOI, Ipp8u* pDst, int dstStep, int dstPlaneStep, IpprCuboid dstVOI, const double coeffs[3][4], int interpolation, Ipp8u* pBuffer))
{
    /* check input parameters */
    if (pSrc == NULL || pDst == NULL) return ippStsNullPtrErr;
    if (coeffs == NULL) return ippStsNullPtrErr;
    if (srcVolume.height <= 0 || srcVolume.width <= 0 || srcVolume.depth <= 0) return ippStsSizeErr;
    if (srcVOI.x < 0 || srcVOI.width <= 0 || srcVOI.y < 0 || srcVOI.height <= 0 || srcVOI.z < 0 || srcVOI.depth <= 0) return ippStsSizeErr;
    if (dstVOI.x < 0 || dstVOI.width <= 0 || dstVOI.y < 0 || dstVOI.height <= 0 || dstVOI.z < 0 || dstVOI.depth <= 0) return ippStsSizeErr;
    UNREFERENCED_PARAMETER(pBuffer);

    /* check matrix */
    double det = coeffs[0][0] * (coeffs[1][1] * coeffs[2][2] - coeffs[1][2] * coeffs[2][1]) -
        coeffs[0][1] * (coeffs[1][0] * coeffs[2][2] - coeffs[1][2] * coeffs[2][0]) +
        coeffs[0][2] * (coeffs[1][0] * coeffs[2][1] - coeffs[1][1] * coeffs[2][0]);

    if ((det > -EPSD) && (det < EPSD)) return ippStsCoeffErr;

    /* check interpolation mode */
    if (interpolation != IPPI_INTER_NN &&
        interpolation != IPPI_INTER_LINEAR &&
        interpolation != IPPI_INTER_CUBIC &&
        interpolation != IPPI_INTER_CUBIC2P_BSPLINE &&
        interpolation != IPPI_INTER_CUBIC2P_CATMULLROM &&
        interpolation != IPPI_INTER_CUBIC2P_B05C03)   return ippStsInterpolationErr;

    int numThreads;
    int numSlices = 0;
    int sizeSlice = 0;
    int lastSlice = 0;
    IppStatus statusAll;
    ippGetNumThreads_T(&numThreads);
    numSlices = numThreads;

    sizeSlice = IPP_MAX(dstVOI.depth / numSlices, 1);
    numSlices = IPP_MIN(numSlices, dstVOI.depth / sizeSlice);

    if (numThreads == 1 || numSlices <= 1) {
        return ipprWarpAffine_8u_C1V(pSrc, srcVolume, srcStep, srcPlaneStep, srcVOI, pDst, dstStep, dstPlaneStep, dstVOI, coeffs, interpolation, pBuffer);
    }
    else {
        lastSlice = dstVOI.depth % numSlices;
        WarpAffineThreadData_8u_V tData;
        setWarpAffineThreadData_8u_V(pSrc, pDst, srcVolume, srcVOI, dstVOI, srcStep, dstStep, srcPlaneStep, dstPlaneStep, coeffs, interpolation, pBuffer, numSlices, sizeSlice, lastSlice, &tData);
        statusAll = ippParallelFor_T(numSlices, &tData, ipprWarpAffine_8u_C1V_Fun);
    }
    return statusAll;
}

IPPFUN(IppStatus, ipprWarpAffine_16u_C1V_T, (const Ipp16u* pSrc, IpprVolume srcVolume, int srcStep, int srcPlaneStep, IpprCuboid srcVOI, Ipp16u* pDst, int dstStep, int dstPlaneStep, IpprCuboid dstVOI, const double coeffs[3][4], int interpolation, Ipp8u* pBuffer))
{
    /* check input parameters */
    if (pSrc == NULL || pDst == NULL) return ippStsNullPtrErr;
    if (coeffs == NULL) return ippStsNullPtrErr;
    if (srcVolume.height <= 0 || srcVolume.width <= 0 || srcVolume.depth <= 0) return ippStsSizeErr;
    if (srcVOI.x < 0 || srcVOI.width <= 0 || srcVOI.y < 0 || srcVOI.height <= 0 || srcVOI.z < 0 || srcVOI.depth <= 0) return ippStsSizeErr;
    if (dstVOI.x < 0 || dstVOI.width <= 0 || dstVOI.y < 0 || dstVOI.height <= 0 || dstVOI.z < 0 || dstVOI.depth <= 0) return ippStsSizeErr;
    UNREFERENCED_PARAMETER(pBuffer);

    /* check matrix */
    double det = coeffs[0][0] * (coeffs[1][1] * coeffs[2][2] - coeffs[1][2] * coeffs[2][1]) -
        coeffs[0][1] * (coeffs[1][0] * coeffs[2][2] - coeffs[1][2] * coeffs[2][0]) +
        coeffs[0][2] * (coeffs[1][0] * coeffs[2][1] - coeffs[1][1] * coeffs[2][0]);

    if ((det > -EPSD) && (det < EPSD)) return ippStsCoeffErr;

    /* check interpolation mode */
    if (interpolation != IPPI_INTER_NN &&
        interpolation != IPPI_INTER_LINEAR &&
        interpolation != IPPI_INTER_CUBIC &&
        interpolation != IPPI_INTER_CUBIC2P_BSPLINE &&
        interpolation != IPPI_INTER_CUBIC2P_CATMULLROM &&
        interpolation != IPPI_INTER_CUBIC2P_B05C03)   return ippStsInterpolationErr;

    int numThreads;
    int numSlices = 0;
    int sizeSlice = 0;
    int lastSlice = 0;
    IppStatus statusAll;
    ippGetNumThreads_T(&numThreads);
    numSlices = numThreads;

    sizeSlice = IPP_MAX(dstVOI.depth / numSlices, 1);
    numSlices = IPP_MIN(numSlices, dstVOI.depth / sizeSlice);

    if (numThreads == 1 || numSlices <= 1) {
        return ipprWarpAffine_16u_C1V(pSrc, srcVolume, srcStep, srcPlaneStep, srcVOI, pDst, dstStep, dstPlaneStep, dstVOI, coeffs, interpolation, pBuffer);
    }
    else {
        lastSlice = dstVOI.depth % numSlices;
        WarpAffineThreadData_16u_V tData;
        setWarpAffineThreadData_16u_V(pSrc, pDst, srcVolume, srcVOI, dstVOI, srcStep, dstStep, srcPlaneStep, dstPlaneStep, coeffs, interpolation, pBuffer, numSlices, sizeSlice, lastSlice, &tData);
        statusAll = ippParallelFor_T(numSlices, &tData, ipprWarpAffine_16u_C1V_Fun);
    }
    return statusAll;
}

IPPFUN(IppStatus, ipprWarpAffine_32f_C1V_T, (const Ipp32f* pSrc, IpprVolume srcVolume, int srcStep, int srcPlaneStep, IpprCuboid srcVOI, Ipp32f* pDst, int dstStep, int dstPlaneStep, IpprCuboid dstVOI, const double coeffs[3][4], int interpolation, Ipp8u* pBuffer))
{
    /* check input parameters */
    if (pSrc == NULL || pDst == NULL) return ippStsNullPtrErr;
    if (coeffs == NULL) return ippStsNullPtrErr;
    if (srcVolume.height <= 0 || srcVolume.width <= 0 || srcVolume.depth <= 0) return ippStsSizeErr;
    if (srcVOI.x < 0 || srcVOI.width <= 0 || srcVOI.y < 0 || srcVOI.height <= 0 || srcVOI.z < 0 || srcVOI.depth <= 0) return ippStsSizeErr;
    if (dstVOI.x < 0 || dstVOI.width <= 0 || dstVOI.y < 0 || dstVOI.height <= 0 || dstVOI.z < 0 || dstVOI.depth <= 0) return ippStsSizeErr;
    UNREFERENCED_PARAMETER(pBuffer);

    /* check matrix */
    double det = coeffs[0][0] * (coeffs[1][1] * coeffs[2][2] - coeffs[1][2] * coeffs[2][1]) -
        coeffs[0][1] * (coeffs[1][0] * coeffs[2][2] - coeffs[1][2] * coeffs[2][0]) +
        coeffs[0][2] * (coeffs[1][0] * coeffs[2][1] - coeffs[1][1] * coeffs[2][0]);

    if ((det > -EPSD) && (det < EPSD)) return ippStsCoeffErr;

    /* check interpolation mode */
    if (interpolation != IPPI_INTER_NN &&
        interpolation != IPPI_INTER_LINEAR &&
        interpolation != IPPI_INTER_CUBIC &&
        interpolation != IPPI_INTER_CUBIC2P_BSPLINE &&
        interpolation != IPPI_INTER_CUBIC2P_CATMULLROM &&
        interpolation != IPPI_INTER_CUBIC2P_B05C03)   return ippStsInterpolationErr;

    int numThreads;
    int numSlices = 0;
    int sizeSlice = 0;
    int lastSlice = 0;
    IppStatus statusAll;
    ippGetNumThreads_T(&numThreads);
    numSlices = numThreads;
    
    sizeSlice = IPP_MAX(dstVOI.depth / numSlices, 1);
    numSlices = IPP_MIN(numSlices, dstVOI.depth / sizeSlice);

    if (numThreads == 1 || numSlices <= 1) {
        return ipprWarpAffine_32f_C1V(pSrc, srcVolume, srcStep, srcPlaneStep, srcVOI, pDst, dstStep, dstPlaneStep, dstVOI, coeffs, interpolation, pBuffer);
    }
    else {
        lastSlice = dstVOI.depth % numSlices;
        WarpAffineThreadData_32f_V tData;
        setWarpAffineThreadData_32f_V(pSrc, pDst, srcVolume, srcVOI, dstVOI, srcStep, dstStep, srcPlaneStep, dstPlaneStep, coeffs, interpolation, pBuffer, numSlices, sizeSlice, lastSlice, &tData);
        statusAll = ippParallelFor_T(numSlices, &tData, ipprWarpAffine_32f_C1V_Fun);
    }
    return statusAll;
}
