|
148 | 148 | }
|
149 | 149 | ],
|
150 | 150 | "source": [
|
151 |
| - "show targets:exec diagnosis_M from .ml.onehot[targets;cols targets]" |
| 151 | + "show targets:exec diagnosis_M from .ml.oneHot.fitTransform[targets;cols targets]" |
152 | 152 | ]
|
153 | 153 | },
|
154 | 154 | {
|
|
218 | 218 | ],
|
219 | 219 | "source": [
|
220 | 220 | "// add second order polynomial features to the table \n",
|
221 |
| - "5#table:table^.ml.polytab[table;2]" |
| 221 | + "5#table:table^.ml.polyTab[table;2]" |
222 | 222 | ]
|
223 | 223 | },
|
224 | 224 | {
|
|
245 | 245 | ],
|
246 | 246 | "source": [
|
247 | 247 | "/ complete standard scaling of the dataset to avoid biases due to orders of magnitude in the data\n",
|
248 |
| - "5#table:.ml.minmaxscaler table" |
| 248 | + "5#table:.ml.minMaxScaler.fitTransform table" |
249 | 249 | ]
|
250 | 250 | },
|
251 | 251 | {
|
|
266 | 266 | ],
|
267 | 267 | "source": [
|
268 | 268 | "/ complete a train-test-split on the data - below 20% of data is used in the test set\n",
|
269 |
| - "show tts:.ml.traintestsplit[table;targets;.2]" |
| 269 | + "show tts:.ml.trainTestSplit[table;targets;.2]" |
270 | 270 | ]
|
271 | 271 | },
|
272 | 272 | {
|
|
301 | 301 | "a:{.p.import[`sklearn.ensemble][`:RandomForestClassifier]}\n",
|
302 | 302 | "\n",
|
303 | 303 | "/ scoring function which takes a function, parameters to apply to that function and data as arguments\n",
|
304 |
| - "score_func:.ml.xv.fitscore[a][`n_estimators pykw 500]" |
| 304 | + "score_func:.ml.xv.fitScore[a][`n_estimators pykw 500]" |
305 | 305 | ]
|
306 | 306 | },
|
307 | 307 | {
|
|
316 | 316 | "Average Model Scores:\n",
|
317 | 317 | "----------------------------------------------------------------------------\n",
|
318 | 318 | "Sequential split indices with basic k-fold cross validation: 0.9736264\n",
|
319 |
| - "Random split indices with basic k-fold cross validation: 0.9714286\n", |
320 |
| - "Stratified split indices with basic k-fold cross validation: 0.9736736\n" |
| 319 | + "Random split indices with basic k-fold cross validation: 0.9736264\n", |
| 320 | + "Stratified split indices with basic k-fold cross validation: 0.9758714\n" |
321 | 321 | ]
|
322 | 322 | }
|
323 | 323 | ],
|
324 | 324 | "source": [
|
325 | 325 | "/ split data into k-folds and train/validate the model\n",
|
326 |
| - "s1:.ml.xv.kfsplit[k;n;xtrain;ytrain;score_func] / sequentially split\n", |
327 |
| - "s2:.ml.xv.kfshuff[k;n;xtrain;ytrain;score_func] / randomized split\n", |
328 |
| - "s3:.ml.xv.kfstrat[k;n;xtrain;ytrain;score_func] / stratified split\n", |
| 326 | + "s1:.ml.xv.kfSplit[k;n;xtrain;ytrain;score_func] / sequentially split\n", |
| 327 | + "s2:.ml.xv.kfShuff[k;n;xtrain;ytrain;score_func] / randomized split\n", |
| 328 | + "s3:.ml.xv.kfStrat[k;n;xtrain;ytrain;score_func] / stratified split\n", |
329 | 329 | "\n",
|
330 | 330 | "-1\"Average Model Scores:\";\n",
|
331 | 331 | "-1\"----------------------------------------------------------------------------\";\n",
|
|
352 | 352 | "text": [
|
353 | 353 | "Average Model Scores:\n",
|
354 | 354 | "----------------------------------------------------------------------------\n",
|
355 |
| - "Monte-Carlo cross validation with 5 repetitions and training size of 80%: 0.9714286\n", |
356 |
| - "Repeated stratified cross validation, 5 fold, 5 repetitions: 0.9736264\n", |
357 |
| - "Repeated sequential cross validation, 5 fold, 5 repetitions: 0.9727473\n" |
| 355 | + "Monte-Carlo cross validation with 5 repetitions and training size of 80%: 0.967033\n", |
| 356 | + "Repeated stratified cross validation, 5 fold, 5 repetitions: 0.9727473\n", |
| 357 | + "Repeated sequential cross validation, 5 fold, 5 repetitions: 0.9740659\n" |
358 | 358 | ]
|
359 | 359 | }
|
360 | 360 | ],
|
361 | 361 | "source": [
|
362 | 362 | "p:.2 / percentage of data in validation set\n",
|
363 | 363 | "n: 5 / number of repetitions\n",
|
364 | 364 | "\n",
|
365 |
| - "r1:.ml.xv.mcsplit[p;n;xtrain;ytrain;score_func]\n", |
366 |
| - "r2:.ml.xv.kfshuff[k;n;xtrain;ytrain;score_func]\n", |
367 |
| - "r3:.ml.xv.kfsplit[k;n;xtrain;ytrain;score_func]\n", |
| 365 | + "r1:.ml.xv.mcSplit[p;n;xtrain;ytrain;score_func]\n", |
| 366 | + "r2:.ml.xv.kfShuff[k;n;xtrain;ytrain;score_func]\n", |
| 367 | + "r3:.ml.xv.kfSplit[k;n;xtrain;ytrain;score_func]\n", |
368 | 368 | "\n",
|
369 | 369 | "-1\"Average Model Scores:\";\n",
|
370 | 370 | "-1\"----------------------------------------------------------------------------\";\n",
|
|
407 | 407 | "outputs": [],
|
408 | 408 | "source": [
|
409 | 409 | "/ new scoring function\n",
|
410 |
| - "sf:.ml.xv.fitscore[a]\n", |
| 410 | + "sf:.ml.xv.fitScore[a]\n", |
411 | 411 | "\n",
|
412 | 412 | "/ dictionary of parameters\n",
|
413 | 413 | "gs_hp:`n_estimators`criterion`max_depth!(10 50 100 500;`gini`entropy;2 5 10 20 30)"
|
|
435 | 435 | "\n",
|
436 | 436 | "n_estimators criterion max_depth| ..\n",
|
437 | 437 | "--------------------------------| -------------------------------------------..\n",
|
438 |
| - "10 gini 2 | 0.956044 0.967033 0.956044 0.956044 0.9..\n", |
439 |
| - "10 gini 5 | 0.9230769 0.967033 0.956044 0.978022 0.9..\n", |
440 |
| - "10 gini 10 | 0.956044 0.967033 0.9230769 1 0.9..\n", |
441 |
| - "10 gini 20 | 0.967033 0.9450549 0.978022 0.956044 0.9..\n", |
442 |
| - "10 gini 30 | 0.9450549 0.956044 0.9450549 0.978022 0.9..\n", |
443 |
| - "10 entropy 2 | 0.9450549 0.9340659 0.9450549 0.9450549 0.9..\n", |
444 |
| - "10 entropy 5 | 0.956044 0.978022 0.967033 0.9230769 0.9..\n", |
445 |
| - "10 entropy 10 | 0.9450549 0.956044 0.956044 0.978022 0.9..\n", |
446 |
| - "10 entropy 20 | 0.9450549 0.9450549 0.978022 0.9450549 0.9..\n", |
447 |
| - "10 entropy 30 | 0.956044 0.967033 0.967033 0.978022 0.9..\n", |
448 |
| - "50 gini 2 | 0.9340659 0.956044 0.956044 0.967033 0.9..\n", |
449 |
| - "50 gini 5 | 0.956044 0.978022 0.956044 0.989011 0.9..\n", |
450 |
| - "50 gini 10 | 0.956044 0.978022 0.978022 0.967033 0.9..\n", |
451 |
| - "50 gini 20 | 0.956044 0.978022 0.967033 1 0.9..\n", |
452 |
| - "50 gini 30 | 0.9450549 0.967033 0.967033 0.978022 0.9..\n", |
453 |
| - "50 entropy 2 | 0.978022 0.967033 0.956044 0.9340659 0.9..\n", |
454 |
| - "50 entropy 5 | 0.9450549 0.967033 0.967033 0.989011 0.9..\n", |
455 |
| - "50 entropy 10 | 0.956044 0.978022 0.989011 0.967033 0.9..\n", |
456 |
| - "50 entropy 20 | 0.956044 0.978022 1 0.978022 0.9..\n", |
457 |
| - "50 entropy 30 | 0.956044 0.978022 0.978022 0.967033 0.9..\n", |
458 |
| - "100 gini 2 | 0.9450549 0.967033 0.967033 0.9450549 0.9..\n", |
459 |
| - "100 gini 5 | 0.956044 0.967033 0.989011 1 0.9..\n", |
| 438 | + "10 gini 2 | 0.956044 0.956044 0.956044 0.956044 0.9..\n", |
| 439 | + "10 gini 5 | 0.9450549 0.956044 0.956044 0.978022 0.9..\n", |
| 440 | + "10 gini 10 | 0.956044 0.9450549 0.978022 0.9450549 0.9..\n", |
| 441 | + "10 gini 20 | 0.956044 0.967033 0.9340659 0.978022 0.9..\n", |
| 442 | + "10 gini 30 | 0.9340659 0.978022 0.967033 0.9340659 0.9..\n", |
| 443 | + "10 entropy 2 | 0.9450549 0.9450549 0.967033 0.9450549 0.9..\n", |
| 444 | + "10 entropy 5 | 0.967033 0.978022 0.956044 0.9450549 0.9..\n", |
| 445 | + "10 entropy 10 | 0.956044 0.967033 0.967033 0.978022 0.9..\n", |
| 446 | + "10 entropy 20 | 0.9340659 0.967033 0.967033 0.978022 0.9..\n", |
| 447 | + "10 entropy 30 | 0.956044 0.978022 0.967033 0.956044 0.9..\n", |
| 448 | + "50 gini 2 | 0.956044 0.967033 0.967033 0.9340659 0.9..\n", |
| 449 | + "50 gini 5 | 0.956044 0.989011 0.967033 0.978022 0.9..\n", |
| 450 | + "50 gini 10 | 0.956044 0.967033 0.967033 1 0.9..\n", |
| 451 | + "50 gini 20 | 0.956044 0.978022 0.989011 0.967033 0.9..\n", |
| 452 | + "50 gini 30 | 0.956044 0.967033 0.956044 0.978022 0.9..\n", |
| 453 | + "50 entropy 2 | 0.967033 0.967033 0.956044 0.956044 0.9..\n", |
| 454 | + "50 entropy 5 | 0.967033 0.978022 0.967033 0.967033 0.9..\n", |
| 455 | + "50 entropy 10 | 0.978022 0.978022 0.967033 0.967033 0.9..\n", |
| 456 | + "50 entropy 20 | 0.956044 0.967033 0.956044 0.989011 0.9..\n", |
| 457 | + "50 entropy 30 | 0.978022 0.978022 0.9450549 0.989011 0.9..\n", |
| 458 | + "100 gini 2 | 0.967033 0.967033 0.967033 0.9340659 0.9..\n", |
| 459 | + "100 gini 5 | 0.967033 0.967033 0.978022 0.978022 0.9..\n", |
460 | 460 | "..\n"
|
461 | 461 | ]
|
462 | 462 | }
|
463 | 463 | ],
|
464 | 464 | "source": [
|
465 | 465 | "-1\"Grid search: hyperparameters and resulting score from each fold:\\n\";\n",
|
466 |
| - "show gr:.ml.gs.kfsplit[k;n;xtrain;ytrain;sf;gs_hp;0]" |
| 466 | + "show gr:.ml.gs.kfSplit[k;n;xtrain;ytrain;sf;gs_hp;0]" |
467 | 467 | ]
|
468 | 468 | },
|
469 | 469 | {
|
|
508 | 508 | {
|
509 | 509 | "data": {
|
510 | 510 | "text/plain": [
|
511 |
| - "`n_estimators`criterion`max_depth!(500;`entropy;5)\n", |
| 511 | + "`n_estimators`criterion`max_depth!(500;`entropy;10)\n", |
512 | 512 | "0.9824561\n"
|
513 | 513 | ]
|
514 | 514 | },
|
|
518 | 518 | }
|
519 | 519 | ],
|
520 | 520 | "source": [
|
521 |
| - "-2#.ml.gs.kfsplit[k;n;flip value flip table;targets;sf;gs_hp;.2]" |
| 521 | + "-2#.ml.gs.kfSplit[k;n;flip value flip table;targets;sf;gs_hp;.2]" |
522 | 522 | ]
|
523 | 523 | },
|
524 | 524 | {
|
|
537 | 537 | "data": {
|
538 | 538 | "text/plain": [
|
539 | 539 | "`n_estimators`criterion`max_depth!(500;`gini;10)\n",
|
540 |
| - "0.9561404\n" |
| 540 | + "0.9473684\n" |
541 | 541 | ]
|
542 | 542 | },
|
543 | 543 | "execution_count": 15,
|
|
546 | 546 | }
|
547 | 547 | ],
|
548 | 548 | "source": [
|
549 |
| - "-2#.ml.gs.kfsplit[k;n;flip value flip table;targets;sf;gs_hp;-.2]" |
| 549 | + "-2#.ml.gs.kfSplit[k;n;flip value flip table;targets;sf;gs_hp;-.2]" |
550 | 550 | ]
|
551 | 551 | },
|
552 | 552 | {
|
|
612 | 612 | {
|
613 | 613 | "data": {
|
614 | 614 | "text/plain": [
|
615 |
| - "`n_estimators`criterion`max_depth!(410;`entropy;4)\n", |
| 615 | + "`n_estimators`criterion`max_depth!(130;`entropy;20)\n", |
616 | 616 | "0.9912281\n"
|
617 | 617 | ]
|
618 | 618 | },
|
|
622 | 622 | }
|
623 | 623 | ],
|
624 | 624 | "source": [
|
625 |
| - "-2#.ml.rs.kfsplit[k;n;flip value flip table;targets;sf;rdm_hp;.2]" |
| 625 | + "-2#.ml.rs.kfSplit[k;n;flip value flip table;targets;sf;rdm_hp;.2]" |
626 | 626 | ]
|
627 | 627 | },
|
628 | 628 | {
|
|
653 | 653 | {
|
654 | 654 | "data": {
|
655 | 655 | "text/plain": [
|
656 |
| - "`n_estimators`criterion`max_depth!(378;`entropy;9)\n", |
| 656 | + "`n_estimators`criterion`max_depth!(316;`entropy;6)\n", |
657 | 657 | "0.9824561\n"
|
658 | 658 | ]
|
659 | 659 | },
|
|
663 | 663 | }
|
664 | 664 | ],
|
665 | 665 | "source": [
|
666 |
| - "-2#.ml.rs.kfsplit[k;n;flip value flip table;targets;sf;sbl_hp;.2]" |
| 666 | + "-2#.ml.rs.kfSplit[k;n;flip value flip table;targets;sf;sbl_hp;.2]" |
667 | 667 | ]
|
668 | 668 | },
|
669 | 669 | {
|
|
0 commit comments