Skip to content

Commit 960c546

Browse files
committed
Fix inconsistency that could cause the LIBSVM optimization algorithm to oscillate.
See more details in the upstream pull request: cjlin1/libsvm#228. Fixes scikit-learn#30353.
1 parent 6cccd99 commit 960c546

File tree

1 file changed

+56
-18
lines changed

1 file changed

+56
-18
lines changed

sklearn/svm/src/libsvm/svm.cpp

Lines changed: 56 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5959
6060
- Exposed number of iterations run in optimization, Juan Martín Loyola.
6161
See <https://github.com/scikit-learn/scikit-learn/pull/21408/>
62+
63+
Modified 2024:
64+
65+
- Fix inconsistency that could cause the optimization algorithm to oscillate.
66+
By Darren Mo.
6267
*/
6368

6469
#include <math.h>
@@ -277,6 +282,37 @@ void Cache::swap_index(int i, int j)
277282
}
278283
}
279284

285+
// Provides access to the elements in a single column of a QMatrix.
286+
class QColumn {
287+
public:
288+
QColumn(const Qfloat *values, int column_idx, double diagonal_value)
289+
: values(values), column_idx(column_idx), diagonal_value(diagonal_value) {
290+
}
291+
292+
double operator[](int row_idx) const {
293+
if (row_idx != column_idx) {
294+
return values[row_idx];
295+
} else {
296+
// Return the value from the QD array so that calculations stay
297+
// consistent, regardless of whether QColumn or the QD array
298+
// are used.
299+
//
300+
// QD array is double precision while Qfloat could be single
301+
// precision, so the diagonal values could be very different
302+
// between the two. If one calculation uses QColumn while
303+
// another calculation uses the QD array, the optimization
304+
// algorithm may not converge.
305+
return diagonal_value;
306+
}
307+
}
308+
309+
private:
310+
const Qfloat *values;
311+
312+
int column_idx;
313+
double diagonal_value;
314+
};
315+
280316
//
281317
// Kernel evaluation
282318
//
@@ -286,7 +322,7 @@ void Cache::swap_index(int i, int j)
286322
//
287323
class QMatrix {
288324
public:
289-
virtual Qfloat *get_Q(int column, int len) const = 0;
325+
virtual QColumn get_Q(int column, int len) const = 0;
290326
virtual double *get_QD() const = 0;
291327
virtual void swap_index(int i, int j) const = 0;
292328
virtual ~QMatrix() {}
@@ -303,7 +339,7 @@ class Kernel: public QMatrix {
303339

304340
static double k_function(const PREFIX(node) *x, const PREFIX(node) *y,
305341
const svm_parameter& param, BlasFunctions *blas_functions);
306-
virtual Qfloat *get_Q(int column, int len) const = 0;
342+
virtual QColumn get_Q(int column, int len) const = 0;
307343
virtual double *get_QD() const = 0;
308344
virtual void swap_index(int i, int j) const // no so const...
309345
{
@@ -641,7 +677,7 @@ void Solver::reconstruct_gradient()
641677
{
642678
for(i=active_size;i<l;i++)
643679
{
644-
const Qfloat *Q_i = Q->get_Q(i,active_size);
680+
QColumn Q_i = Q->get_Q(i,active_size);
645681
for(j=0;j<active_size;j++)
646682
if(is_free(j))
647683
G[i] += alpha[j] * Q_i[j];
@@ -652,7 +688,7 @@ void Solver::reconstruct_gradient()
652688
for(i=0;i<active_size;i++)
653689
if(is_free(i))
654690
{
655-
const Qfloat *Q_i = Q->get_Q(i,l);
691+
QColumn Q_i = Q->get_Q(i,l);
656692
double alpha_i = alpha[i];
657693
for(j=active_size;j<l;j++)
658694
G[j] += alpha_i * Q_i[j];
@@ -703,7 +739,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
703739
for(i=0;i<l;i++)
704740
if(!is_lower_bound(i))
705741
{
706-
const Qfloat *Q_i = Q.get_Q(i,l);
742+
QColumn Q_i = Q.get_Q(i,l);
707743
double alpha_i = alpha[i];
708744
int j;
709745
for(j=0;j<l;j++)
@@ -755,8 +791,8 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
755791

756792
// update alpha[i] and alpha[j], handle bounds carefully
757793

758-
const Qfloat *Q_i = Q.get_Q(i,active_size);
759-
const Qfloat *Q_j = Q.get_Q(j,active_size);
794+
QColumn Q_i = Q.get_Q(i,active_size);
795+
QColumn Q_j = Q.get_Q(j,active_size);
760796

761797
double C_i = get_C(i);
762798
double C_j = get_C(j);
@@ -975,9 +1011,11 @@ int Solver::select_working_set(int &out_i, int &out_j)
9751011
}
9761012

9771013
int i = Gmax_idx;
978-
const Qfloat *Q_i = NULL;
979-
if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
980-
Q_i = Q->get_Q(i,active_size);
1014+
if (i == -1) {
1015+
return 1;
1016+
}
1017+
1018+
QColumn Q_i = Q->get_Q(i,active_size);
9811019

9821020
for(int j=0;j<active_size;j++)
9831021
{
@@ -1224,8 +1262,8 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)
12241262

12251263
int ip = Gmaxp_idx;
12261264
int in = Gmaxn_idx;
1227-
const Qfloat *Q_ip = NULL;
1228-
const Qfloat *Q_in = NULL;
1265+
QColumn Q_ip = QColumn(NULL, -1, 0);
1266+
QColumn Q_in = QColumn(NULL, -1, 0);
12291267
if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
12301268
Q_ip = Q->get_Q(ip,active_size);
12311269
if(in != -1)
@@ -1433,7 +1471,7 @@ class SVC_Q: public Kernel
14331471
QD[i] = (this->*kernel_function)(i,i);
14341472
}
14351473

1436-
Qfloat *get_Q(int i, int len) const
1474+
QColumn get_Q(int i, int len) const
14371475
{
14381476
Qfloat *data;
14391477
int start, j;
@@ -1442,7 +1480,7 @@ class SVC_Q: public Kernel
14421480
for(j=start;j<len;j++)
14431481
data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
14441482
}
1445-
return data;
1483+
return QColumn(data, i, QD[i]);
14461484
}
14471485

14481486
double *get_QD() const
@@ -1482,7 +1520,7 @@ class ONE_CLASS_Q: public Kernel
14821520
QD[i] = (this->*kernel_function)(i,i);
14831521
}
14841522

1485-
Qfloat *get_Q(int i, int len) const
1523+
QColumn get_Q(int i, int len) const
14861524
{
14871525
Qfloat *data;
14881526
int start, j;
@@ -1491,7 +1529,7 @@ class ONE_CLASS_Q: public Kernel
14911529
for(j=start;j<len;j++)
14921530
data[j] = (Qfloat)(this->*kernel_function)(i,j);
14931531
}
1494-
return data;
1532+
return QColumn(data, i, QD[i]);
14951533
}
14961534

14971535
double *get_QD() const
@@ -1548,7 +1586,7 @@ class SVR_Q: public Kernel
15481586
swap(QD[i],QD[j]);
15491587
}
15501588

1551-
Qfloat *get_Q(int i, int len) const
1589+
QColumn get_Q(int i, int len) const
15521590
{
15531591
Qfloat *data;
15541592
int j, real_i = index[i];
@@ -1564,7 +1602,7 @@ class SVR_Q: public Kernel
15641602
schar si = sign[i];
15651603
for(j=0;j<len;j++)
15661604
buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1567-
return buf;
1605+
return QColumn(buf, i, QD[i]);
15681606
}
15691607

15701608
double *get_QD() const

0 commit comments

Comments
 (0)