Some users of XNAMath have reported a range problem with the XMVectorFloor and XMVectorCeiling functions with the SSE implementation (i.e. whenever the values are greater than INT_MAX). These fixes will be included in a future release of XNAMath, but since XNAMath is an all-header implementation it can also be fixed directly by affected developers in the xnamathvector.inl file.

Here are the corrected versions:

XMFINLINE XMVECTOR XMVectorFloor
(
FXMVECTOR V
)
{
#if defined(_XM_NO_INTRINSICS_)

XMVECTOR vResult = {
floorf(V.vector4_f32[0]),
floorf(V.vector4_f32[1]),
floorf(V.vector4_f32[2]),
floorf(V.vector4_f32[3])
};
return vResult;

#elif defined(_XM_SSE_INTRINSICS_)
// To handle NAN, INF and numbers greater than 8388608, use masking
// Get the abs value
__m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
// Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
// Convert to int and back to float for rounding
XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
__m128i vInt = _mm_cvtps_epi32(vResult);
// Convert back to floats
vResult = _mm_cvtepi32_ps(vInt);
// All numbers less than 8388608 will use the round to int
vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
// All others, use the ORIGINAL value
vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}

//------------------------------------------------------------------------------

XMFINLINE XMVECTOR XMVectorCeiling
(
FXMVECTOR V
)
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vResult = {
ceilf(V.vector4_f32[0]),
ceilf(V.vector4_f32[1]),
ceilf(V.vector4_f32[2]),
ceilf(V.vector4_f32[3])
};
return vResult;

#elif defined(_XM_SSE_INTRINSICS_)
// To handle NAN, INF and numbers greater than 8388608, use masking
// Get the abs value
__m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
// Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
// Convert to int and back to float for rounding
XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
__m128i vInt = _mm_cvtps_epi32(vResult);
// Convert back to floats
vResult = _mm_cvtepi32_ps(vInt);
// All numbers less than 8388608 will use the round to int
vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
// All others, use the ORIGINAL value
vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}

 Note that this is the same technique used by XMVectorRound to get around the same range issue in the SSE2 float<->integer conversion mechanism.

Note: This issue has been fixed for XNAMath v2.04 and DirectXMath.