[sift] Remove direct downscaling from input image to top level of every octave (#178)

griwodz · Carsten Griwodz · web-flow · commit a30a990a5704 · 2025-10-23T07:24:55.000+02:00
* [sift] Remove option to create all octaves directly from the input
* [sift] reduce size of GaussTable dd to 1: The other levels of this table existed only for direct downscaling.
* update CHANGES
* [sift] remove some code that is now dead

---------

Co-authored-by: Carsten Griwodz &lt;griff@ifi.uio.no&gt;
diff --git a/CHANGES.md b/CHANGES.md
@@ -15,6 +15,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Removed
 
+## [0.10.1] - 2025-10-21
+
+### Removed
+
+- Removed option to create top level of every octave from input image [PR](https://github.com/alicevision/popsift/pull/178)
+
 ## [0.10.0] - 2025-10-14
 
 ### Added
diff --git a/src/application/main.cpp b/src/application/main.cpp
@@ -88,8 +88,6 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
         "reject points when reaching max iterations, "
         "first contrast threshold is floor(.5 * peak thresh). "
         "Computed filter width are lower than VLFeat/PopSift")
-        ("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
-         "Direct each octave from upscaled orig instead of blurred level.")
         ("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
         ( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
           popsift::Config::getNormModeUsage() )
diff --git a/src/application/match.cpp b/src/application/match.cpp
@@ -86,8 +86,6 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
         "reject points when reaching max iterations, "
         "first contrast threshold is floor(.5 * peak thresh). "
         "Computed filter width are lower than VLFeat/PopSift")
-        ("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
-         "Direct each octave from upscaled orig instead of blurred level.")
         ("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
         ( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
           popsift::Config::getNormModeUsage() )
diff --git a/src/popsift/gauss_filter.cu b/src/popsift/gauss_filter.cu
@@ -102,21 +102,19 @@ void print_gauss_filter_symbol( int columns )
     }
     printf("\n");
 
-    printf("    level 0-filters for direct downscaling\n");
+    printf("    level 0-filter for the creation of the first level of the first octave\n" );
 
-    for( int lvl=0; lvl<MAX_OCTAVES; lvl++ ) {
-        int span = d_gauss.dd.span[lvl] + d_gauss.dd.span[lvl] - 1;
+    int span = d_gauss.dd.span[0] + d_gauss.dd.span[0] - 1;
 
-        printf("      %d %d %2.6f: ", lvl, span, d_gauss.dd.sigma[lvl] );
-        int m = min( d_gauss.dd.span[lvl], columns );
-        for( int x=0; x<m; x++ ) {
-            printf("%0.8f ", d_gauss.dd.filter[lvl*GAUSS_ALIGN+x] );
-        }
-        if( m < d_gauss.dd.span[lvl] )
-            printf("...\n");
-        else
-            printf("\n");
+    printf("      %d %d %2.6f: ", 0, span, d_gauss.dd.sigma[0] );
+    int m = min( d_gauss.dd.span[0], columns );
+    for( int x=0; x<m; x++ ) {
+        printf("%0.8f ", d_gauss.dd.filter[x] );
     }
+    if( m < d_gauss.dd.span[0] )
+        printf("...\n");
+    else
+        printf("\n");
     printf("\n");
 }
 
@@ -215,26 +213,17 @@ void init_filter( const Config& conf,
     h_gauss.abs_oN.computeBlurTable( &h_gauss );
 
     /* dd :
-     * The direct-downscaling kernels make use of the assumption that downscaling
-     * from MAX_LEVEL-3 is identical to applying 2*sigma on the identical image
-     * before downscaling, which would be identical to applying 1*sigma after
-     * downscaling.
-     * In reality, this is not true because images are not continuous, but we
-     * support the options because it is interesting. Perhaps it works for the later
-     * octaves, where it is also good for performance.
+     * A leftover from an attempt to create all top levels of all octaves from the
+     * input image.
      * dd is only for creating level 0 of all octave directly from the input image.
      */
-    for( int oct=0; oct<MAX_OCTAVES; oct++ ) {
-        // sigma * 2^i
-        float oct_sigma = scalbnf( sigma0, oct );
 
-        // subtract initial blur
-        float b = sqrt( fabs( oct_sigma * oct_sigma - initial_blur * initial_blur ) );
+    // subtract initial blur
+    const float sigma_o0_l0 = sqrt( fabs( sigma0 * sigma0 - initial_blur * initial_blur ) );
 
-        // sigma / 2^i
-        h_gauss.dd.sigma[oct] = scalbnf( b, -oct );
-        h_gauss.dd.computeBlurTable( &h_gauss );
-    }
+    // sigma / 2^i
+    h_gauss.dd.sigma[0] = sigma_o0_l0;
+    h_gauss.dd.computeBlurTable( &h_gauss );
 
     cudaError_t err;
     err = cudaMemcpyToSymbol( d_gauss,
diff --git a/src/popsift/gauss_filter.h b/src/popsift/gauss_filter.h
@@ -74,12 +74,13 @@ struct GaussInfo
      */
     GaussTable<GAUSS_LEVELS> abs_oN;
 
-    /* In theory, level 0 of octave 2 contains the same information
-     * whether it is constructed by downscaling and blurring the
-     * input image with sigma or by blurring the input image with 2*sigma
-     * and downscaling afterwards.
+    /* The dd table was meant for the creation of every top-level of
+     * every octave directly from the upscaling input image. This option
+     * has been removed because it didn't work well.
+     * As a consequence, the table dd needs only its first entry for
+     * Gaussian filtering of the first octave.
      */
-    GaussTable<MAX_OCTAVES> dd;
+    GaussTable<1> dd;
 
     __host__
     void clearTables( );
diff --git a/src/popsift/s_pyramid_build.cu b/src/popsift/s_pyramid_build.cu
@@ -94,9 +94,9 @@ void make_dog( cudaTextureObject_t src_data,
 } // namespace gauss
 
 __host__
-inline void Pyramid::horiz_from_input_image( const Config& conf, ImageBase* base, int octave, cudaStream_t stream )
+inline void Pyramid::horiz_from_input_image( const Config& conf, ImageBase* base, cudaStream_t stream )
 {
-    Octave&   oct_obj = _octaves[octave];
+    Octave&   oct_obj = _octaves[0];
 
     const int width   = oct_obj.getWidth();
     const int height  = oct_obj.getHeight();
@@ -109,8 +109,8 @@ inline void Pyramid::horiz_from_input_image( const Config& conf, ImageBase* base
     const Config::SiftMode& mode = conf.getSiftMode();
     float shift  = 0.5f;
 
-    if( octave == 0 && ( mode == Config::PopSift || mode == Config::VLFeat ) ) {
-        shift  = 0.5f * powf( 2.0f, conf.getUpscaleFactor() - octave );
+    if( mode == Config::PopSift || mode == Config::VLFeat ) {
+        shift  = 0.5f * powf( 2.0f, conf.getUpscaleFactor() );
     }
 
     gauss::normalizedSource::horiz
@@ -119,7 +119,6 @@ inline void Pyramid::horiz_from_input_image( const Config& conf, ImageBase* base
           oct_obj.getIntermediateSurface(),
           width,
           height,
-          octave,
           shift );
 
     POP_SYNC_CHK;
@@ -475,16 +474,7 @@ void Pyramid::build_pyramid( const Config& conf, ImageBase* base )
         Octave&      oct_obj = _octaves[octave];
         cudaStream_t stream  = oct_obj.getStream();
 
-        if( ( conf.getScalingMode() == Config::ScaleDirect ) &&
-            ( conf.getGaussMode() == Config::Fixed9 || conf.getGaussMode() == Config::Fixed15 ) ) {
-            if( octave == 0 ) {
-                make_octave( conf, base, oct_obj, stream, true );
-            } else {
-                horiz_from_input_image( conf, base, octave, stream );
-                vert_from_interm( octave, 0, stream, NotInterpolated_FromPrevious );
-                make_octave( conf, base, oct_obj, stream, false );
-            }
-        } else if( conf.getGaussMode() == Config::Fixed9 || conf.getGaussMode() == Config::Fixed15 ) {
+        if( conf.getGaussMode() == Config::Fixed9 || conf.getGaussMode() == Config::Fixed15 ) {
             if( octave == 0 ) {
                 make_octave( conf, base, oct_obj, stream, true );
             } else {
@@ -496,30 +486,14 @@ void Pyramid::build_pyramid( const Config& conf, ImageBase* base )
             }
 
             cuda::event_record( oct_obj.getEventScaleDone(), stream, __FILE__, __LINE__ );
-        } else if( conf.getScalingMode() == Config::ScaleDirect ) {
-            GaussTableChoice useGauss = ( conf.getGaussMode() == Config::VLFeat_Relative ) ? Interpolated_FromPrevious
-                                                                                           : NotInterpolated_FromPrevious;
-            for( int level=0; level<_levels; level++ )
-            {
-                if( level == 0 )
-                {
-                    horiz_from_input_image( conf, base, octave, stream );
-                    vert_from_interm( octave, level, stream, useGauss );
-                }
-                else
-                {
-                    horiz_from_prev_level( octave, level, stream, useGauss );
-                    vert_from_interm( octave, level, stream, useGauss );
-                }
-            }
         } else if( conf.getGaussMode() == Config::VLFeat_Relative ) {
             for( int level=0; level<_levels; level++ )
             {
                 if( level == 0 )
                 {
                     if( octave == 0 )
                     {
-                        horiz_from_input_image( conf, base, 0, stream );
+                        horiz_from_input_image( conf, base, stream );
                         vert_from_interm( octave, 0, stream, Interpolated_FromPrevious );
                     }
                     else
@@ -551,7 +525,7 @@ void Pyramid::build_pyramid( const Config& conf, ImageBase* base )
                 {
                     if( octave == 0 )
                     {
-                        horiz_from_input_image( conf, base, 0, stream );
+                        horiz_from_input_image( conf, base, stream );
                         vert_from_interm( octave, 0, stream, NotInterpolated_FromPrevious );
                     }
                     else
diff --git a/src/popsift/s_pyramid_build_ra.cu b/src/popsift/s_pyramid_build_ra.cu
@@ -19,21 +19,15 @@ void horiz( cudaTextureObject_t src_linear_tex,
             cudaSurfaceObject_t dst_data,
             int                 dst_w,
             int                 dst_h,
-            int                 octave,
             float               shift )
 {
-    // Create level-0 for any octave from the input image.
-    // Since we are computing the direct-downscaling gauss filter tables
-    // and the first entry in that table is identical to the "normal"
-    // table, we do not need a special case.
-
     const int    write_x = blockIdx.x * blockDim.x + threadIdx.x;
     const int    write_y = blockIdx.y;
 
     if( write_x >= dst_w ) return;
 
-    const int    span    =  d_gauss.dd.span[octave];
-    const float* filter  = &d_gauss.dd.filter[octave*GAUSS_ALIGN];
+    const int    span    =  d_gauss.dd.span[0];
+    const float* filter  = &d_gauss.dd.filter[0];
     const float  read_x  = ( blockIdx.x * blockDim.x + threadIdx.x + shift ) / dst_w;
     const float  read_y  = ( blockIdx.y + shift ) / dst_h;
 
diff --git a/src/popsift/s_pyramid_build_ra.h b/src/popsift/s_pyramid_build_ra.h
@@ -15,7 +15,6 @@ __global__ void horiz(cudaTextureObject_t src_data,
                       cudaSurfaceObject_t dst_data,
                       int dst_w,
                       int dst_h,
-                      int octave,
                       float shift);
 
 __global__ void horiz_level(cudaTextureObject_t src_linear_tex,
diff --git a/src/popsift/sift_conf.cu b/src/popsift/sift_conf.cu
@@ -25,7 +25,6 @@ Config::Config( )
     , _gauss_mode( getGaussModeDefault() )
     , _sift_mode( Config::PopSift )
     , _log_mode( Config::None )
-    , _scaling_mode( Config::ScaleDefault )
     , _desc_mode( Config::Loop )
     , _grid_filter_mode( Config::RandomScale )
     , verbose( false )
@@ -182,11 +181,6 @@ Config::LogMode Config::getLogMode( ) const
     return _log_mode;
 }
 
-void Config::setScalingMode( ScalingMode mode )
-{
-    _scaling_mode = mode;
-}
-
 /**
  * Normalization mode
  * Should the descriptor normalization use L2-like classic normalization
@@ -313,7 +307,6 @@ bool Config::equal( const Config& other ) const
         COMPARE( _edge_limit ) ||
         COMPARE( _threshold ) ||
         COMPARE( _upscale_factor ) ||
-        COMPARE( _scaling_mode ) ||
         COMPARE( _max_extrema ) ||
         COMPARE( _gauss_mode ) ||
         COMPARE( _sift_mode ) ||
diff --git a/src/popsift/sift_conf.h b/src/popsift/sift_conf.h
@@ -69,16 +69,6 @@ struct Config
         All
     };
 
-    /**
-     * @brief The scaling mode.
-     */
-    enum ScalingMode
-    {
-        ScaleDirect,
-        /// Indirect - only working method
-        ScaleDefault
-    };
-
     /**
      * @brief Modes for descriptor extraction.
      */
@@ -164,7 +154,6 @@ struct Config
      * @see LogMode
      */
     void setLogMode( LogMode mode = All );
-    void setScalingMode( ScalingMode mode = ScaleDefault );
 
     /**
      * @brief Enable/desable verbose mode.
@@ -328,13 +317,6 @@ struct Config
      */
     GridFilterMode getFilterSorting() const { return _grid_filter_mode; }
 
-    /**
-     * @brief Get the scaling mode.
-     * @return the descriptor extraction mode.
-     * @see ScalingMode
-     */
-    inline ScalingMode getScalingMode() const { return _scaling_mode; }
-
     /**
      * @brief Get the descriptor extraction mode
      * @return the descriptor extraction mode
@@ -361,9 +343,6 @@ struct Config
     /// default LogMode::None
     LogMode  _log_mode;
 
-    /// default: ScalingMode::DownscaledOctaves
-    ScalingMode _scaling_mode;
-
     /// default: DescMode::Loop
     DescMode    _desc_mode;
 
diff --git a/src/popsift/sift_pyramid.h b/src/popsift/sift_pyramid.h
@@ -116,7 +116,6 @@ class Pyramid
 private:
     inline void horiz_from_input_image( const Config&    conf,
                                         ImageBase*       base,
-					                    int              octave,
 					                    cudaStream_t     stream );
     inline void horiz_level_from_input_image( const Config&    conf,
                                               ImageBase*       base,