FAST-ER: load_data.cc Source File

00001 /*
00002 
00003     This file is part of the FAST-ER machine learning system.
00004     Copyright (C) 2008  Edward Rosten and Los Alamos National Laboratory
00005 
00006     This program is free software; you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation; either version 2 of the License, or
00009     (at your option) any later version.
00010 
00011     This program is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License along
00017     with this program; if not, write to the Free Software Foundation, Inc.,
00018     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00019 */
00020 #include <cvd/image_io.h>
00021 #include <cvd/vector_image_ref.h>
00022 #include <tag/printf.h>
00023 #include <tag/array.h>
00024 #include <TooN/TooN.h>
00025 #include <TooN/LU.h>
00026 #include <TooN/helpers.h>
00027 
00028 #include <cstdlib>
00029 
00030 #include "load_data.h"
00031 #include "warp_to_png.h"
00032 #include "utility.h"
00033 
00034 ///\cond never
00035 using namespace std;
00036 using namespace CVD;
00037 using namespace tag;
00038 using namespace TooN;
00039 ///\endcond
00040 
00041 /** Load images from a "Cambridge" style dataset.
00042 
00043 @param dir The base directory of the dataset.
00044 @param n   The number of images in the dataset.
00045 @param suffix   Image filename suffix to use.
00046 @return The loaded images.
00047 @ingroup gDataset
00048 */
00049 vector<Image<byte> > load_images_cambridge(string dir, int n, string suffix)
00050 {
00051     dir += "/frames/frame_%i." + suffix;
00052 
00053     vector<Image<byte> > ret;
00054 
00055     for(int i=0;  i < n; i++)
00056     {
00057         Image<byte> im;
00058         im = img_load(sPrintf(dir, i));
00059         ret.push_back(im);
00060     }
00061 
00062     return ret;
00063 }
00064 
00065 /** Load images from an "Oxford VGG" style dataset.
00066 
00067 @param dir The base directory of the dataset.
00068 @param n   The number of images in the dataset.
00069 @return The loaded images.
00070 @ingroup gDataset
00071 */
00072 vector<Image<byte> > load_images_vgg(string dir, int n)
00073 {
00074     dir += "/img%i.ppm";
00075 
00076     vector<Image<byte> > ret;
00077 
00078     for(int i=0;  i < n; i++)
00079         ret.push_back(img_load(sPrintf(dir, i+1)));
00080 
00081     return ret;
00082 }
00083 
00084 ///Load an array from an istream
00085 ///@param i Stream to load from
00086 ///@param f array to load in to
00087 ///@ingroup gUtility
00088 istream& operator>>(istream& i, array<float, 2>& f)
00089 {
00090     i >> f[0] >> f[1];
00091     return i;
00092 }
00093 
00094 ///Convert a vector in to an array
00095 ///@param vec Vector to convert
00096 ///@ingroup gUtility
00097 array<float, 2> Arr(const Vector<2>& vec)
00098 {
00099     return array<float, 2>((TupleHead, vec[0], vec[1]));
00100 }
00101 
00102 
00103 /**Load warps from a "Cambridge" repeatability dataset, with the warps
00104 stored encoded in PNG files. See load_warps_cambridge
00105 
00106 
00107 @param dir  The base directory of the dataset.
00108 @param num   The numbers of images in the dataset.
00109 @param size  The size of the corresponding images.
00110 @return  <code>return_value[i][j][y][x]</code> is where pixel x, y in image i warps to in image j.
00111 @ingroup gDataset
00112 */
00113 vector<vector<Image<array<float,2> > > > load_warps_cambridge_png(string dir, int num, ImageRef size)
00114 {
00115     dir += "/pngwarps/warp_%i_%i.png";
00116 
00117     vector<vector<Image<array<float, 2> > > > ret(num, vector<Image<array<float, 2> > >(num));
00118 
00119     BasicImage<byte> tester(NULL, size);
00120 
00121     array<float, 2> outside((TupleHead, -1, -1));
00122 
00123     for(int from = 0; from < num; from ++)
00124         for(int to = 0; to < num; to ++)
00125             if(from != to)
00126             {
00127                 string fname = sPrintf(dir, from, to);
00128                 Image<Rgb<unsigned short> > p = img_load(fname);
00129 
00130                 if(p.size() != size)
00131                 {
00132                     cerr << "Error: warp file " << fname << " is the wrong size!\n";
00133                     exit(1);
00134                 }
00135 
00136                 Image<array<float,2> > w(size, outside);
00137 
00138                 for(int y=0; y < size.y; y++)
00139                     for(int x=0; x < size.x; x++)
00140                     {
00141                         w[y][x][0] = p[y][x].red / MULTIPLIER - SHIFT;
00142                         w[y][x][1] = p[y][x].green / MULTIPLIER - SHIFT;
00143                     }
00144 
00145 
00146                 cerr << "Loaded " << fname << endl;
00147 
00148                 ret[from][to] = w;
00149             }
00150 
00151     return ret;
00152 }
00153 
00154 /**Load warps from a "Cambridge" repeatability dataset. 
00155 
00156 The dataset contains warps which round to outside the image by one pixel in the max direction.
00157 
00158 Note that the line labelled "prune" is diasbled in the evaluation of the FAST-ER system. This
00159 causes the two systems to produce slightly different results. If this line is commented out, then
00160 FAST-ER generated detectors produce exactly the same results when loaded back in to this system.
00161 
00162 @param dir  The base directory of the dataset.
00163 @param num   The numbers of images in the dataset.
00164 @param size  The size of the corresponding images.
00165 @return  <code>return_value[i][j][y][x]</code> is where pixel x, y in image i warps to in image j.
00166 @ingroup gDataset
00167 */
00168 vector<vector<Image<array<float,2> > > > load_warps_cambridge(string dir, int num, ImageRef size)
00169 {
00170     dir += "/warps/warp_%i_%i.warp";
00171 
00172     vector<vector<Image<array<float, 2> > > > ret(num, vector<Image<array<float, 2> > >(num));
00173 
00174     BasicImage<byte> tester(NULL, size);
00175 
00176     array<float, 2> outside((TupleHead, -1, -1));
00177 
00178     for(int from = 0; from < num; from ++)
00179         for(int to = 0; to < num; to ++)
00180             if(from != to)
00181             {
00182                 Image<array<float,2> > w(size, outside);
00183                 int n = size.x * size.y;
00184                 Image<array<float,2> >::iterator p = w.begin();
00185 
00186                 ifstream f;
00187                 string fname = sPrintf(dir, from, to);
00188                 f.open(fname.c_str());
00189 
00190                 if(!f.good())
00191                 {
00192                     cerr << "Error: " << fname << ": " << strerror(errno) << endl;
00193                     exit(1);
00194                 }
00195 
00196                 array<float, 2> v;
00197 
00198                 for(int i=0; i < n; ++i, ++p)
00199                 {
00200                     f >> v;
00201                     //prune
00202                     //if(v[0] >= 0 && v[1] >= 0 && v[0] <= size.x-1 && v[1] <= size.y-1)
00203                         *p = v;
00204                 }
00205                 
00206                 if(!f.good())
00207                 {
00208                     cerr << "Error: " << fname << " went bad" << endl;
00209                     exit(1);
00210                 }
00211 
00212                 cerr << "Loaded " << fname << endl;
00213 
00214                 ret[from][to] = w;
00215             }
00216 
00217     return ret;
00218 }
00219 
00220 ///Invert a matrix
00221 ///@param m Matrix to invert
00222 ///@ingroup gUtility
00223 Matrix<3> invert(const Matrix<3>& m)
00224 {
00225     LU<3> i(m);
00226     return i.get_inverse();
00227 }
00228 
00229 /**Load warps from an "Oxford VGG" repeatability dataset.  The warps are stored
00230 as homographies, so warps need to be generated.
00231 
00232 @param dir  The base directory of the dataset.
00233 @param num   The numbers of images in the dataset.
00234 @param size  The size of the corresponding images.
00235 @return  <code>return_value[i][j][y][x]</code> is where pixel x, y in image i warps to in image j.
00236 @ingroup gDataset
00237 */
00238 vector<vector<Image<array<float, 2> > > > load_warps_vgg(string dir, int num, ImageRef size)
00239 {
00240     dir += "/H1to%ip";
00241     array<float, 2> outside((TupleHead, -1, -1));
00242 
00243     //Load the homographies
00244     vector<Matrix<3> > H_1_to_x;
00245     
00246     //The first homography is always the identity.
00247     {
00248         Matrix<3> i;
00249         Identity(i);
00250         H_1_to_x.push_back(i);
00251     }
00252 
00253     for(int i=2; i <= num; i++)
00254     {
00255         ifstream f;
00256         string fname = sPrintf(dir, i).c_str();
00257         f.open(fname.c_str());
00258 
00259         Matrix<3> h;
00260         f >> h;
00261 
00262         if(!f.good())
00263         {
00264             cerr << "Error: " << fname << " went bad" << endl;
00265             exit(1);
00266         }
00267 
00268         H_1_to_x.push_back(h);
00269     }
00270 
00271     vector<vector<Image<array<float, 2> > > > ret(num, vector<Image<array<float, 2> > >(num));
00272     
00273     //Generate the warps.
00274     for(int from = 0; from < num; from ++)
00275         for(int to = 0; to < num; to ++)
00276             if(from != to)
00277             {
00278                 Matrix<3> from_to_one = invert(H_1_to_x[from]);
00279                 Matrix<3> one_to_to   = H_1_to_x[to];
00280                 Matrix<3> from_to_to = one_to_to * from_to_one;
00281 
00282                 Image<array<float,2> > w(size, outside);
00283 
00284                 for(int y=0; y < size.y; y++)
00285                     for(int x=0; x < size.x; x++)
00286                     {
00287                         Vector<2> p = project(from_to_to * Vector<3>((make_Vector, x, y, 1)));
00288 
00289                         if(p[0] >= 0 && p[1] >= 0 && p[0] <= size.x-1 && p[1] <= size.y-1)
00290                             w[y][x] = Arr(p);
00291                     }
00292 
00293                 ret[from][to] = w;
00294 
00295                 cerr << "Created warp " << from << " -> " << to << endl;
00296             }
00297     
00298     return ret;
00299 }
00300 
00301 
00302 enum DataFormat
00303 {
00304     Cambridge,
00305     CambridgePNGWarp,
00306     VGG
00307 };
00308 
00309 /**Load a dataset.
00310 @param dir The base directory of the dataset.
00311 @param num The number of images in the dataset.
00312 @param format The type of the dataset. This should be one of `vgg', `cam-png' or `cam'.
00313 @return The images and the warps.
00314 @ingroup gDataset
00315 */
00316 pair<vector<Image<byte> >, vector<vector<Image<array<float, 2> > > > > load_data(string dir, int num, string format)
00317 {
00318     vector<Image<byte> > images;
00319     vector<vector<Image<array<float, 2> > > > warps;
00320 
00321     DataFormat d;
00322 
00323     if(format == "vgg")
00324         d = VGG;
00325     else if(format == "cam-png")
00326         d = CambridgePNGWarp;
00327     else
00328         d = Cambridge;
00329 
00330     switch(d)
00331     {
00332         case Cambridge:
00333             images = load_images_cambridge(dir, num, "pgm");
00334             break;
00335 
00336         case CambridgePNGWarp:
00337             images = load_images_cambridge(dir, num, "png");
00338             break;
00339 
00340         case VGG:
00341             images = load_images_vgg(dir, num);
00342     };
00343 
00344     //Check for sanity
00345     if(images.size() == 0)
00346     {
00347         cerr << "No images!\n";
00348         exit(1);
00349     }
00350 
00351     for(unsigned int i=0; i < images.size(); i++)
00352         if(images[i].size() != images[0].size())
00353         {
00354             cerr << "Images are different sizes!\n";
00355             exit(1);
00356         }
00357 
00358     switch(d)
00359     {
00360         case CambridgePNGWarp:
00361             warps = load_warps_cambridge_png(dir, num, images[0].size());
00362             break;
00363 
00364         case Cambridge:
00365             warps = load_warps_cambridge(dir, num, images[0].size());
00366             break;
00367 
00368         case VGG:
00369             warps = load_warps_vgg(dir, num, images[0].size());
00370     };
00371 
00372 
00373     return make_pair(images, warps);
00374 }
00375 
00376 
00377 /**
00378 This function prunes a dataset so that no warped point will lie outside an image. This
00379 will save on .in_image() tests later.
00380 @param warps The warps to prune.
00381 @param size the image size to prune to.
00382 @ingroup gDataset
00383 */
00384 void prune_warps(vector<vector<Image<array<float, 2> > > >& warps, ImageRef size)
00385 {
00386     BasicImage<byte> test(NULL, size);
00387     array<float, 2> outside = make_tuple(-1, -1);
00388 
00389     for(unsigned int i=0; i < warps.size(); i++)    
00390         for(unsigned int j=0; j < warps[i].size(); j++) 
00391         {
00392             for(Image<array<float, 2> >::iterator  p=warps[i][j].begin(); p != warps[i][j].end(); p++)
00393                 if(!test.in_image(ir_rounded(*p)))
00394                     *p = outside;
00395         }
00396 }
00397 
00398 
00399