@@ -59,6 +59,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59
59
60
60
- Exposed number of iterations run in optimization, Juan Martín Loyola.
61
61
See <https://github.com/scikit-learn/scikit-learn/pull/21408/>
62
+
63
+ Modified 2024:
64
+
65
+ - Fix inconsistency that could cause the optimization algorithm to oscillate.
66
+ By Darren Mo.
62
67
*/
63
68
64
69
#include < math.h>
@@ -277,6 +282,37 @@ void Cache::swap_index(int i, int j)
277
282
}
278
283
}
279
284
285
+ // Provides access to the elements in a single column of a QMatrix.
286
+ class QColumn {
287
+ public:
288
+ QColumn (const Qfloat *values, int column_idx, double diagonal_value)
289
+ : values(values), column_idx(column_idx), diagonal_value(diagonal_value) {
290
+ }
291
+
292
+ double operator [](int row_idx) const {
293
+ if (row_idx != column_idx) {
294
+ return values[row_idx];
295
+ } else {
296
+ // Return the value from the QD array so that calculations stay
297
+ // consistent, regardless of whether QColumn or the QD array
298
+ // are used.
299
+ //
300
+ // QD array is double precision while Qfloat could be single
301
+ // precision, so the diagonal values could be very different
302
+ // between the two. If one calculation uses QColumn while
303
+ // another calculation uses the QD array, the optimization
304
+ // algorithm may not converge.
305
+ return diagonal_value;
306
+ }
307
+ }
308
+
309
+ private:
310
+ const Qfloat *values;
311
+
312
+ int column_idx;
313
+ double diagonal_value;
314
+ };
315
+
280
316
//
281
317
// Kernel evaluation
282
318
//
@@ -286,7 +322,7 @@ void Cache::swap_index(int i, int j)
286
322
//
287
323
class QMatrix {
288
324
public:
289
- virtual Qfloat * get_Q (int column, int len) const = 0;
325
+ virtual QColumn get_Q (int column, int len) const = 0;
290
326
virtual double *get_QD () const = 0;
291
327
virtual void swap_index (int i, int j) const = 0;
292
328
virtual ~QMatrix () {}
@@ -303,7 +339,7 @@ class Kernel: public QMatrix {
303
339
304
340
static double k_function (const PREFIX (node) *x, const PREFIX(node) *y,
305
341
const svm_parameter& param, BlasFunctions *blas_functions);
306
- virtual Qfloat * get_Q (int column, int len) const = 0;
342
+ virtual QColumn get_Q (int column, int len) const = 0;
307
343
virtual double *get_QD () const = 0;
308
344
virtual void swap_index (int i, int j) const // no so const...
309
345
{
@@ -641,7 +677,7 @@ void Solver::reconstruct_gradient()
641
677
{
642
678
for (i=active_size;i<l;i++)
643
679
{
644
- const Qfloat * Q_i = Q->get_Q (i,active_size);
680
+ QColumn Q_i = Q->get_Q (i,active_size);
645
681
for (j=0 ;j<active_size;j++)
646
682
if (is_free (j))
647
683
G[i] += alpha[j] * Q_i[j];
@@ -652,7 +688,7 @@ void Solver::reconstruct_gradient()
652
688
for (i=0 ;i<active_size;i++)
653
689
if (is_free (i))
654
690
{
655
- const Qfloat * Q_i = Q->get_Q (i,l);
691
+ QColumn Q_i = Q->get_Q (i,l);
656
692
double alpha_i = alpha[i];
657
693
for (j=active_size;j<l;j++)
658
694
G[j] += alpha_i * Q_i[j];
@@ -703,7 +739,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
703
739
for (i=0 ;i<l;i++)
704
740
if (!is_lower_bound (i))
705
741
{
706
- const Qfloat * Q_i = Q.get_Q (i,l);
742
+ QColumn Q_i = Q.get_Q (i,l);
707
743
double alpha_i = alpha[i];
708
744
int j;
709
745
for (j=0 ;j<l;j++)
@@ -755,8 +791,8 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
755
791
756
792
// update alpha[i] and alpha[j], handle bounds carefully
757
793
758
- const Qfloat * Q_i = Q.get_Q (i,active_size);
759
- const Qfloat * Q_j = Q.get_Q (j,active_size);
794
+ QColumn Q_i = Q.get_Q (i,active_size);
795
+ QColumn Q_j = Q.get_Q (j,active_size);
760
796
761
797
double C_i = get_C (i);
762
798
double C_j = get_C (j);
@@ -975,9 +1011,11 @@ int Solver::select_working_set(int &out_i, int &out_j)
975
1011
}
976
1012
977
1013
int i = Gmax_idx;
978
- const Qfloat *Q_i = NULL ;
979
- if (i != -1 ) // NULL Q_i not accessed: Gmax=-INF if i=-1
980
- Q_i = Q->get_Q (i,active_size);
1014
+ if (i == -1 ) {
1015
+ return 1 ;
1016
+ }
1017
+
1018
+ QColumn Q_i = Q->get_Q (i,active_size);
981
1019
982
1020
for (int j=0 ;j<active_size;j++)
983
1021
{
@@ -1224,8 +1262,8 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)
1224
1262
1225
1263
int ip = Gmaxp_idx;
1226
1264
int in = Gmaxn_idx;
1227
- const Qfloat * Q_ip = NULL ;
1228
- const Qfloat * Q_in = NULL ;
1265
+ QColumn Q_ip = QColumn ( NULL , - 1 , 0 ) ;
1266
+ QColumn Q_in = QColumn ( NULL , - 1 , 0 ) ;
1229
1267
if (ip != -1 ) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1230
1268
Q_ip = Q->get_Q (ip,active_size);
1231
1269
if (in != -1 )
@@ -1433,7 +1471,7 @@ class SVC_Q: public Kernel
1433
1471
QD[i] = (this ->*kernel_function)(i,i);
1434
1472
}
1435
1473
1436
- Qfloat * get_Q (int i, int len) const
1474
+ QColumn get_Q (int i, int len) const
1437
1475
{
1438
1476
Qfloat *data;
1439
1477
int start, j;
@@ -1442,7 +1480,7 @@ class SVC_Q: public Kernel
1442
1480
for (j=start;j<len;j++)
1443
1481
data[j] = (Qfloat)(y[i]*y[j]*(this ->*kernel_function)(i,j));
1444
1482
}
1445
- return data;
1483
+ return QColumn ( data, i, QD[i]) ;
1446
1484
}
1447
1485
1448
1486
double *get_QD () const
@@ -1482,7 +1520,7 @@ class ONE_CLASS_Q: public Kernel
1482
1520
QD[i] = (this ->*kernel_function)(i,i);
1483
1521
}
1484
1522
1485
- Qfloat * get_Q (int i, int len) const
1523
+ QColumn get_Q (int i, int len) const
1486
1524
{
1487
1525
Qfloat *data;
1488
1526
int start, j;
@@ -1491,7 +1529,7 @@ class ONE_CLASS_Q: public Kernel
1491
1529
for (j=start;j<len;j++)
1492
1530
data[j] = (Qfloat)(this ->*kernel_function)(i,j);
1493
1531
}
1494
- return data;
1532
+ return QColumn ( data, i, QD[i]) ;
1495
1533
}
1496
1534
1497
1535
double *get_QD () const
@@ -1548,7 +1586,7 @@ class SVR_Q: public Kernel
1548
1586
swap (QD[i],QD[j]);
1549
1587
}
1550
1588
1551
- Qfloat * get_Q (int i, int len) const
1589
+ QColumn get_Q (int i, int len) const
1552
1590
{
1553
1591
Qfloat *data;
1554
1592
int j, real_i = index[i];
@@ -1564,7 +1602,7 @@ class SVR_Q: public Kernel
1564
1602
schar si = sign[i];
1565
1603
for (j=0 ;j<len;j++)
1566
1604
buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1567
- return buf;
1605
+ return QColumn ( buf, i, QD[i]) ;
1568
1606
}
1569
1607
1570
1608
double *get_QD () const
0 commit comments