00001 #ifndef CVD_INCLUDE_UTILITY_HELPERS_H 00002 #define CVD_INCLUDE_UTILITY_HELPERS_H 00003 00004 #include <cvd/utility.h> 00005 #include <xmmintrin.h> 00006 using namespace std; 00007 00008 namespace CVD{ 00009 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_differences(const T1* a, const T1* b, T2* c, size_t count) 00010 { 00011 if (count < M*2) { 00012 F::unaligned_differences(a,b,c,count); 00013 return; 00014 } 00015 if (!is_aligned<A>(a)) { 00016 size_t steps = steps_to_align<A>(a); 00017 F::unaligned_differences(a,b,c,steps); 00018 count -= steps; 00019 a += steps; 00020 b += steps; 00021 c += steps; 00022 } 00023 if (!is_aligned<A>(c) || count < M) { 00024 F::unaligned_differences(a,b,c,count); 00025 return; 00026 } 00027 size_t block = (count/M)*M; 00028 F::aligned_differences(a,b,c,block); 00029 if (count > block) { 00030 F::unaligned_differences(a+block,b+block,c+block,count-block); 00031 } 00032 } 00033 00034 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_add_mul_add(const T1* a, const T1* b, const T1& c, T2* out, size_t count) 00035 { 00036 if (count < M*2) { 00037 F::unaligned_add_mul_add(a,b,c,out,count); 00038 return; 00039 } 00040 if (!is_aligned<A>(a)) { 00041 size_t steps = steps_to_align<A>(a); 00042 F::unaligned_add_mul_add(a,b,c,out,steps); 00043 count -= steps; 00044 a += steps; 00045 b += steps; 00046 out += steps; 00047 if (count < M || !is_aligned<16>(out)) { 00048 F::unaligned_add_mul_add(a,b,c,out,count); 00049 return; 00050 } 00051 } 00052 else if (count < M || !is_aligned<16>(out)) { 00053 F::unaligned_add_mul_add(a,b,c,out,count); 00054 return; 00055 } 00056 size_t block = (count/M)*M; 00057 F::aligned_add_mul_add(a,b,c,out,block); 00058 if (count > block) 00059 F::unaligned_add_mul_add(a+block,b+block,c, out+block,count-block); 00060 } 00061 00062 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_assign_mul(const T1* a, const T1& c, T2* out, size_t count) 00063 { 00064 if (count < M*2) { 00065 F::unaligned_assign_mul(a,c,out,count); 00066 return; 00067 } 00068 if (!is_aligned<A>(a)) { 00069 size_t steps = steps_to_align<A>(a); 00070 F::unaligned_assign_mul(a,c,out,steps); 00071 count -= steps; 00072 a += steps; 00073 out += steps; 00074 if (count < M) { 00075 F::unaligned_assign_mul(a,c,out,count); 00076 return; 00077 } 00078 } 00079 size_t block = (count/M)*M; 00080 F::aligned_assign_mul(a,c,out,block); 00081 if (count > block) { 00082 F::unaligned_assign_mul(a+block,c, out+block,count-block); 00083 } 00084 } 00085 00086 template <class F, class R, class T1, int A, int M> inline R maybe_aligned_inner_product(const T1* a, const T1* b, size_t count) 00087 { 00088 if (count < M*2) { 00089 return F::unaligned_inner_product(a,b,count); 00090 } 00091 R sum = 0; 00092 if (!is_aligned<A>(a)) { 00093 size_t steps = steps_to_align<A>(a); 00094 sum = F::unaligned_inner_product(a,b,steps); 00095 count -= steps; 00096 a += steps; 00097 b += steps; 00098 if (count < M) { 00099 return sum + F::unaligned_inner_product(a,b,count); 00100 } 00101 } 00102 size_t block = (count/M)*M; 00103 sum += F::aligned_inner_product(a,b,block); 00104 if (count > block) 00105 sum += F::unaligned_inner_product(a+block,b+block,count-block); 00106 return sum; 00107 } 00108 00109 template <class F, class R, class T1, int A, int M> inline R maybe_aligned_ssd(const T1* a, const T1* b, size_t count) 00110 { 00111 if (count < M*2) { 00112 return F::unaligned_ssd(a,b,count); 00113 } 00114 R sum = 0; 00115 if (!is_aligned<A>(a)) { 00116 size_t steps = steps_to_align<A>(a); 00117 sum = F::unaligned_ssd(a,b,steps); 00118 count -= steps; 00119 a += steps; 00120 b += steps; 00121 if (count < M) { 00122 return sum + F::unaligned_ssd(a,b,count); 00123 } 00124 } 00125 size_t block = (count/M)*M; 00126 sum += F::aligned_ssd(a,b,block); 00127 if (count > block) 00128 sum += F::unaligned_ssd(a+block,b+block,count-block); 00129 return sum; 00130 } 00131 00132 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_square(const T1* in, T2* out, size_t count) 00133 { 00134 if (count < M*2) { 00135 return F::unaligned_square(in,out,count); 00136 } 00137 if (!is_aligned<A>(in)) { 00138 size_t steps = steps_to_align<A>(in); 00139 F::unaligned_square(in,out,steps); 00140 count -= steps; 00141 in += steps; 00142 out += steps; 00143 if (count < M) { 00144 F::unaligned_square(in,out,count); 00145 } 00146 } 00147 size_t block = (count/M)*M; 00148 F::aligned_square(in,out,block); 00149 if (count > block) 00150 F::unaligned_square(in+block,out+block,count-block); 00151 } 00152 00153 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_subtract_square(const T1* in, T2* out, size_t count) 00154 { 00155 if (count < M*2) { 00156 return F::unaligned_subtract_square(in,out,count); 00157 } 00158 if (!is_aligned<A>(in)) { 00159 size_t steps = steps_to_align<A>(in); 00160 F::unaligned_subtract_square(in,out,steps); 00161 count -= steps; 00162 in += steps; 00163 out += steps; 00164 if (count < M) { 00165 F::unaligned_subtract_square(in,out,count); 00166 } 00167 } 00168 size_t block = (count/M)*M; 00169 F::aligned_subtract_square(in,out,block); 00170 if (count > block) 00171 F::unaligned_subtract_square(in+block,out+block,count-block); 00172 } 00173 00174 00175 00176 } 00177 00178 #endif