CVD 0.8
cvd_src/utility_helpers.h
00001 #ifndef CVD_INCLUDE_UTILITY_HELPERS_H
00002 #define CVD_INCLUDE_UTILITY_HELPERS_H
00003 
00004 #include <cvd/utility.h>
00005 #include <xmmintrin.h>
00006 using namespace std;
00007 
00008 namespace CVD{
00009     template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_differences(const T1* a, const T1* b, T2* c, size_t count)
00010     {
00011     if (count < M*2) {
00012         F::unaligned_differences(a,b,c,count);
00013         return;
00014     }
00015     if (!is_aligned<A>(a)) {        
00016         size_t steps = steps_to_align<A>(a);
00017         F::unaligned_differences(a,b,c,steps);
00018         count -= steps;
00019         a += steps;
00020         b += steps;
00021         c += steps;
00022     }
00023     if (!is_aligned<A>(c) || count < M) {
00024         F::unaligned_differences(a,b,c,count);
00025         return;
00026     }   
00027     size_t block = (count/M)*M;
00028     F::aligned_differences(a,b,c,block);
00029     if (count > block) {
00030         F::unaligned_differences(a+block,b+block,c+block,count-block);
00031     }
00032     }    
00033     
00034     template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_add_mul_add(const T1* a, const T1* b, const T1& c, T2* out, size_t count)
00035     {
00036     if (count < M*2) {
00037         F::unaligned_add_mul_add(a,b,c,out,count);
00038         return;
00039     }
00040     if (!is_aligned<A>(a)) {      
00041         size_t steps = steps_to_align<A>(a);
00042         F::unaligned_add_mul_add(a,b,c,out,steps);
00043         count -= steps;
00044         a += steps;
00045         b += steps;
00046         out += steps;
00047         if (count < M || !is_aligned<16>(out)) {
00048         F::unaligned_add_mul_add(a,b,c,out,count);
00049         return;
00050         }
00051     }
00052     else if (count < M || !is_aligned<16>(out)) {
00053         F::unaligned_add_mul_add(a,b,c,out,count);
00054         return;
00055     }
00056     size_t block = (count/M)*M;
00057     F::aligned_add_mul_add(a,b,c,out,block);
00058     if (count > block)
00059         F::unaligned_add_mul_add(a+block,b+block,c, out+block,count-block);
00060     }    
00061 
00062     template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_assign_mul(const T1* a, const T1& c, T2* out, size_t count)
00063     {
00064     if (count < M*2) {
00065         F::unaligned_assign_mul(a,c,out,count);
00066         return;
00067     }
00068     if (!is_aligned<A>(a)) {      
00069         size_t steps = steps_to_align<A>(a);
00070         F::unaligned_assign_mul(a,c,out,steps);
00071         count -= steps;
00072         a += steps;
00073         out += steps;
00074         if (count < M) {
00075         F::unaligned_assign_mul(a,c,out,count);
00076         return;
00077         }
00078     }
00079     size_t block = (count/M)*M;
00080     F::aligned_assign_mul(a,c,out,block);
00081     if (count > block) {
00082         F::unaligned_assign_mul(a+block,c, out+block,count-block);
00083     }
00084     }    
00085 
00086     template <class F, class R, class T1, int A, int M> inline R maybe_aligned_inner_product(const T1* a, const T1* b, size_t count)
00087     {
00088     if (count < M*2) {
00089         return F::unaligned_inner_product(a,b,count);
00090     }
00091     R sum = 0;
00092     if (!is_aligned<A>(a)) {      
00093         size_t steps = steps_to_align<A>(a);
00094         sum = F::unaligned_inner_product(a,b,steps);
00095         count -= steps;
00096         a += steps;
00097         b += steps;
00098         if (count < M) {
00099         return sum + F::unaligned_inner_product(a,b,count);
00100         }
00101     }
00102     size_t block = (count/M)*M;
00103     sum += F::aligned_inner_product(a,b,block);
00104     if (count > block)
00105         sum += F::unaligned_inner_product(a+block,b+block,count-block);
00106     return sum;
00107     }    
00108 
00109     template <class F, class R, class T1, int A, int M> inline R maybe_aligned_ssd(const T1* a, const T1* b, size_t count)
00110     {
00111     if (count < M*2) {
00112         return F::unaligned_ssd(a,b,count);
00113     }
00114     R sum = 0;
00115     if (!is_aligned<A>(a)) {      
00116         size_t steps = steps_to_align<A>(a);
00117         sum = F::unaligned_ssd(a,b,steps);
00118         count -= steps;
00119         a += steps;
00120         b += steps;
00121         if (count < M) {
00122         return sum + F::unaligned_ssd(a,b,count);
00123         }
00124     }
00125     size_t block = (count/M)*M;
00126     sum += F::aligned_ssd(a,b,block);
00127     if (count > block)
00128         sum += F::unaligned_ssd(a+block,b+block,count-block);
00129     return sum;
00130     }    
00131 
00132 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_square(const T1* in, T2* out, size_t count)
00133 {
00134     if (count < M*2) {
00135     return F::unaligned_square(in,out,count);
00136     }
00137     if (!is_aligned<A>(in)) {
00138     size_t steps = steps_to_align<A>(in);
00139     F::unaligned_square(in,out,steps);
00140     count -= steps;
00141     in += steps;
00142     out += steps;
00143     if (count < M) {
00144         F::unaligned_square(in,out,count);
00145     }
00146     }
00147     size_t block = (count/M)*M;
00148     F::aligned_square(in,out,block);
00149     if (count > block)
00150     F::unaligned_square(in+block,out+block,count-block);
00151 }    
00152 
00153 template <class F, class T1, class T2, int A, int M> inline void maybe_aligned_subtract_square(const T1* in, T2* out, size_t count)
00154 {
00155     if (count < M*2) {
00156     return F::unaligned_subtract_square(in,out,count);
00157     }
00158     if (!is_aligned<A>(in)) {
00159     size_t steps = steps_to_align<A>(in);
00160     F::unaligned_subtract_square(in,out,steps);
00161     count -= steps;
00162     in += steps;
00163     out += steps;
00164     if (count < M) {
00165         F::unaligned_subtract_square(in,out,count);
00166     }
00167     }
00168     size_t block = (count/M)*M;
00169     F::aligned_subtract_square(in,out,block);
00170     if (count > block)
00171     F::unaligned_subtract_square(in+block,out+block,count-block);
00172 }    
00173 
00174 
00175 
00176 }
00177 
00178 #endif