c_cpp 用于轴上矢量测试的慢速SSE代码
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了c_cpp 用于轴上矢量测试的慢速SSE代码相关的知识,希望对你有一定的参考价值。
#include <stdio.h>
#include <tchar.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#include <smmintrin.h>
#include <iostream>
#include <chrono>
__declspec(noinline)
bool is_axial(__m128 vec)
{
auto r = _mm_castps_si128(_mm_cmpeq_ps(vec, _mm_setzero_ps()));
auto r1 = _mm_xor_si128(r, _mm_setr_epi32(0, -1, -1, -1));
auto r2 = _mm_xor_si128(r, _mm_setr_epi32(-1, 0, -1, -1));
auto r3 = _mm_xor_si128(r, _mm_setr_epi32(-1, -1, 0, -1));
auto m = _mm_setr_epi32(_mm_movemask_epi8(r1), _mm_movemask_epi8(r2), _mm_movemask_epi8(r3), -1);
auto n = _mm_cmpeq_epi32(m, _mm_setzero_si128());
return !_mm_testz_si128(n, n);
}
//__declspec(noinline)
bool is_axial2(__m128 vec)
{
auto r = _mm_castps_si128(_mm_cmpeq_ps(vec, _mm_setzero_ps()));
r = _mm_hadd_epi32(r, r);
r = _mm_hadd_epi32(r, r);
return _mm_cvtsi128_si32(r) + 3 == 0;
}
//__declspec(noinline)
bool is_axial6(float x, float y, float z)
{
auto r = _mm_castps_si128(_mm_cmpeq_ps(_mm_setr_ps(x, y, z, 0.f), _mm_setzero_ps()));
r = _mm_hadd_epi32(r, r);
r = _mm_hadd_epi32(r, r);
return _mm_cvtsi128_si32(r) + 3 == 0;
}
//__declspec(noinline)
bool is_axial3(float x, float y, float z)
{
return ((x != 0.0f) + (y != 0.0f) + (z != 0.0f)) == 1;
}
//__declspec(noinline)
bool is_axial4(float x, float y, float z)
{
return x == 0.f && y == 0.f && z != 0.f ||
x == 0.f && y != 0.f && z == 0.f ||
x != 0.f && y == 0.f && z == 0.f;
}
//__declspec(noinline)
bool is_axial5(float x, float y, float z)
{
return x == 0.f && (y == 0.f && z != 0.f ||
y != 0.f && z == 0.f) ||
x != 0.f && y == 0.f && z == 0.f;
}
bool is_axial7(float x, float y, float z)
{
if (x == 0.f)
if (y == 0.f)
return z != 0.f;
else
return z == 0.f;
return y == 0.f && z == 0.f;
}
using namespace std;
int main()
{
int temp = 0;
auto start_time = chrono::high_resolution_clock::now();
for (int i = -500; i < 500; i++)
for (int j = -500; j < 500; j++)
for (int k = -500; k < 500; k++)
//temp += is_axial2(_mm_setr_ps(i & 1, j & 1, k & 1, 0.f));
temp += is_axial7(i & 1, j & 1, k & 1);
auto end_time = chrono::high_resolution_clock::now();
cout << chrono::duration_cast<chrono::milliseconds>(end_time - start_time).count() << endl;
cout << temp;
return temp;
printf("%d\n", is_axial2(_mm_setr_ps(0.f, 0.f, 1.f, 0.f)));
printf("%d\n", is_axial2(_mm_setr_ps(1.f, 1.f, 0.f, 0.f)));
printf("%d\n", is_axial2(_mm_setr_ps(1.f, 1.f, 1.f, 0.f)));
printf("%d\n", is_axial2(_mm_setr_ps(0.f, 1.f, 0.f, 0.f)));
printf("%d\n", is_axial2(_mm_setr_ps(1.f, 0.f, 0.f, 0.f)));
printf("%d\n", is_axial2(_mm_setr_ps(0.f, 0.f, 0.f, 0.f)));
}
以上是关于c_cpp 用于轴上矢量测试的慢速SSE代码的主要内容,如果未能解决你的问题,请参考以下文章