00001 #ifndef CVD_INCLUDE_UTILITY_HELPERS_H
00002 #define CVD_INCLUDE_UTILITY_HELPERS_H
00003
00004 #include <cvd/utility.h>
00005 #include <xmmintrin.h>
00006 using namespace std;
00007
00008 namespace CVD{
00009 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_differences(const T1* a, const T1* b, T2* c, size_t count)
00010 {
00011 if (count < M*2) {
00012 F::unaligned_differences(a,b,c,count);
00013 return;
00014 }
00015 if (!is_aligned<A>(a)) {
00016 size_t steps = steps_to_align<A>(a);
00017 F::unaligned_differences(a,b,c,steps);
00018 count -= steps;
00019 a += steps;
00020 b += steps;
00021 c += steps;
00022 }
00023 if (!is_aligned<A>(c) || count < M) {
00024 F::unaligned_differences(a,b,c,count);
00025 return;
00026 }
00027 size_t block = (count/M)*M;
00028 F::aligned_differences(a,b,c,block);
00029 if (count > block) {
00030 F::unaligned_differences(a+block,b+block,c+block,count-block);
00031 }
00032 }
00033
00034 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_add_mul_add(const T1* a, const T1* b, const T1& c, T2* out, size_t count)
00035 {
00036 if (count < M*2) {
00037 F::unaligned_add_mul_add(a,b,c,out,count);
00038 return;
00039 }
00040 if (!is_aligned<A>(a)) {
00041 size_t steps = steps_to_align<A>(a);
00042 F::unaligned_add_mul_add(a,b,c,out,steps);
00043 count -= steps;
00044 a += steps;
00045 b += steps;
00046 out += steps;
00047 if (count < M || !is_aligned<16>(out)) {
00048 F::unaligned_add_mul_add(a,b,c,out,count);
00049 return;
00050 }
00051 }
00052 else if (count < M || !is_aligned<16>(out)) {
00053 F::unaligned_add_mul_add(a,b,c,out,count);
00054 return;
00055 }
00056 size_t block = (count/M)*M;
00057 F::aligned_add_mul_add(a,b,c,out,block);
00058 if (count > block)
00059 F::unaligned_add_mul_add(a+block,b+block,c, out+block,count-block);
00060 }
00061
00062 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_assign_mul(const T1* a, const T1& c, T2* out, size_t count)
00063 {
00064 if (count < M*2) {
00065 F::unaligned_assign_mul(a,c,out,count);
00066 return;
00067 }
00068 if (!is_aligned<A>(a)) {
00069 size_t steps = steps_to_align<A>(a);
00070 F::unaligned_assign_mul(a,c,out,steps);
00071 count -= steps;
00072 a += steps;
00073 out += steps;
00074 if (count < M) {
00075 F::unaligned_assign_mul(a,c,out,count);
00076 return;
00077 }
00078 }
00079 size_t block = (count/M)*M;
00080 F::aligned_assign_mul(a,c,out,block);
00081 if (count > block) {
00082 F::unaligned_assign_mul(a+block,c, out+block,count-block);
00083 }
00084 }
00085
00086 template <class F, class R, class T1, int A, int M> inline R maybe_aligned_inner_product(const T1* a, const T1* b, size_t count)
00087 {
00088 if (count < M*2) {
00089 return F::unaligned_inner_product(a,b,count);
00090 }
00091 R sum = 0;
00092 if (!is_aligned<A>(a)) {
00093 size_t steps = steps_to_align<A>(a);
00094 sum = F::unaligned_inner_product(a,b,steps);
00095 count -= steps;
00096 a += steps;
00097 b += steps;
00098 if (count < M) {
00099 return sum + F::unaligned_inner_product(a,b,count);
00100 }
00101 }
00102 size_t block = (count/M)*M;
00103 sum += F::aligned_inner_product(a,b,block);
00104 if (count > block)
00105 sum += F::unaligned_inner_product(a+block,b+block,count-block);
00106 return sum;
00107 }
00108
00109 template <class F, class R, class T1, int A, int M> inline R maybe_aligned_ssd(const T1* a, const T1* b, size_t count)
00110 {
00111 if (count < M*2) {
00112 return F::unaligned_ssd(a,b,count);
00113 }
00114 R sum = 0;
00115 if (!is_aligned<A>(a)) {
00116 size_t steps = steps_to_align<A>(a);
00117 sum = F::unaligned_ssd(a,b,steps);
00118 count -= steps;
00119 a += steps;
00120 b += steps;
00121 if (count < M) {
00122 return sum + F::unaligned_ssd(a,b,count);
00123 }
00124 }
00125 size_t block = (count/M)*M;
00126 sum += F::aligned_ssd(a,b,block);
00127 if (count > block)
00128 sum += F::unaligned_ssd(a+block,b+block,count-block);
00129 return sum;
00130 }
00131
00132 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_square(const T1* in, T2* out, size_t count)
00133 {
00134 if (count < M*2) {
00135 return F::unaligned_square(in,out,count);
00136 }
00137 if (!is_aligned<A>(in)) {
00138 size_t steps = steps_to_align<A>(in);
00139 F::unaligned_square(in,out,steps);
00140 count -= steps;
00141 in += steps;
00142 out += steps;
00143 if (count < M) {
00144 F::unaligned_square(in,out,count);
00145 }
00146 }
00147 size_t block = (count/M)*M;
00148 F::aligned_square(in,out,block);
00149 if (count > block)
00150 F::unaligned_square(in+block,out+block,count-block);
00151 }
00152
00153 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_subtract_square(const T1* in, T2* out, size_t count)
00154 {
00155 if (count < M*2) {
00156 return F::unaligned_subtract_square(in,out,count);
00157 }
00158 if (!is_aligned<A>(in)) {
00159 size_t steps = steps_to_align<A>(in);
00160 F::unaligned_subtract_square(in,out,steps);
00161 count -= steps;
00162 in += steps;
00163 out += steps;
00164 if (count < M) {
00165 F::unaligned_subtract_square(in,out,count);
00166 }
00167 }
00168 size_t block = (count/M)*M;
00169 F::aligned_subtract_square(in,out,block);
00170 if (count > block)
00171 F::unaligned_subtract_square(in+block,out+block,count-block);
00172 }
00173
00174
00175
00176 }
00177
00178 #endif