KxSystems
diff --git a/‎notebooks/01 Decision Trees.ipynb
Lines changed: 11 additions & 239 deletions b/‎notebooks/01 Decision Trees.ipynb
Lines changed: 11 additions & 239 deletions
diff --git a/‎notebooks/02 Random Forests.ipynb
Lines changed: 37 additions & 37 deletions b/‎notebooks/02 Random Forests.ipynb
Lines changed: 37 additions & 37 deletions
diff --git a/‎notebooks/03 Neural Networks.ipynb
Lines changed: 40 additions & 64 deletions b/‎notebooks/03 Neural Networks.ipynb
Lines changed: 40 additions & 64 deletions
diff --git a/‎notebooks/04 Dimensionality Reduction.ipynb
Lines changed: 51 additions & 96 deletions b/‎notebooks/04 Dimensionality Reduction.ipynb
Lines changed: 51 additions & 96 deletions
diff --git a/‎notebooks/05 Feature Engineering.ipynb
Lines changed: 10 additions & 10 deletions b/‎notebooks/05 Feature Engineering.ipynb
Lines changed: 10 additions & 10 deletions
diff --git a/‎notebooks/06 Feature Extraction and Selection.ipynb
Lines changed: 172 additions & 111 deletions b/‎notebooks/06 Feature Extraction and Selection.ipynb
Lines changed: 172 additions & 111 deletions
diff --git a/‎notebooks/07 Cross Validation.ipynb
Lines changed: 48 additions & 48 deletions b/‎notebooks/07 Cross Validation.ipynb
Lines changed: 48 additions & 48 deletions
diff --git a/‎notebooks/08 Natural Language Processing.ipynb
Lines changed: 114 additions & 112 deletions b/‎notebooks/08 Natural Language Processing.ipynb
Lines changed: 114 additions & 112 deletions
diff --git a/‎notebooks/09 K Nearest Neighbours.ipynb
Lines changed: 6 additions & 6 deletions b/‎notebooks/09 K Nearest Neighbours.ipynb
Lines changed: 6 additions & 6 deletions
@@ -148,7 +148,7 @@
     }
    ],
    "source": [
-    "show targets:exec diagnosis_M from .ml.onehot[targets;cols targets]"
+    "show targets:exec diagnosis_M from .ml.oneHot.fitTransform[targets;cols targets]"
    ]
   },
   {
@@ -218,7 +218,7 @@
    ],
    "source": [
     "// add second order polynomial features to the table \n",
-    "5#table:table^.ml.polytab[table;2]"
+    "5#table:table^.ml.polyTab[table;2]"
    ]
   },
   {
@@ -245,7 +245,7 @@
    ],
    "source": [
     "/ complete standard scaling of the dataset to avoid biases due to orders of magnitude in the data\n",
-    "5#table:.ml.minmaxscaler table"
+    "5#table:.ml.minMaxScaler.fitTransform table"
    ]
   },
   {
@@ -266,7 +266,7 @@
    ],
    "source": [
     "/ complete a train-test-split on the data - below 20% of data is used in the test set\n",
-    "show tts:.ml.traintestsplit[table;targets;.2]"
+    "show tts:.ml.trainTestSplit[table;targets;.2]"
    ]
   },
   {
@@ -301,7 +301,7 @@
     "a:{.p.import[`sklearn.ensemble][`:RandomForestClassifier]}\n",
     "\n",
     "/ scoring function which takes a function, parameters to apply to that function and data as arguments\n",
-    "score_func:.ml.xv.fitscore[a][`n_estimators pykw 500]"
+    "score_func:.ml.xv.fitScore[a][`n_estimators pykw 500]"
    ]
   },
   {
@@ -316,16 +316,16 @@
       "Average Model Scores:\n",
       "----------------------------------------------------------------------------\n",
       "Sequential split indices with basic k-fold cross validation: 0.9736264\n",
-      "Random split indices with basic k-fold cross validation: 0.9714286\n",
-      "Stratified split indices with basic k-fold cross validation: 0.9736736\n"
+      "Random split indices with basic k-fold cross validation: 0.9736264\n",
+      "Stratified split indices with basic k-fold cross validation: 0.9758714\n"
      ]
     }
    ],
    "source": [
     "/ split data into k-folds and train/validate the model\n",
-    "s1:.ml.xv.kfsplit[k;n;xtrain;ytrain;score_func]  / sequentially split\n",
-    "s2:.ml.xv.kfshuff[k;n;xtrain;ytrain;score_func]  / randomized split\n",
-    "s3:.ml.xv.kfstrat[k;n;xtrain;ytrain;score_func]  / stratified split\n",
+    "s1:.ml.xv.kfSplit[k;n;xtrain;ytrain;score_func]  / sequentially split\n",
+    "s2:.ml.xv.kfShuff[k;n;xtrain;ytrain;score_func]  / randomized split\n",
+    "s3:.ml.xv.kfStrat[k;n;xtrain;ytrain;score_func]  / stratified split\n",
     "\n",
     "-1\"Average Model Scores:\";\n",
     "-1\"----------------------------------------------------------------------------\";\n",
@@ -352,19 +352,19 @@
      "text": [
       "Average Model Scores:\n",
       "----------------------------------------------------------------------------\n",
-      "Monte-Carlo cross validation with 5 repetitions and training size of 80%: 0.9714286\n",
-      "Repeated stratified cross validation, 5 fold, 5 repetitions: 0.9736264\n",
-      "Repeated sequential cross validation, 5 fold, 5 repetitions: 0.9727473\n"
+      "Monte-Carlo cross validation with 5 repetitions and training size of 80%: 0.967033\n",
+      "Repeated stratified cross validation, 5 fold, 5 repetitions: 0.9727473\n",
+      "Repeated sequential cross validation, 5 fold, 5 repetitions: 0.9740659\n"
      ]
     }
    ],
    "source": [
     "p:.2  / percentage of data in validation set\n",
     "n: 5  / number of repetitions\n",
     "\n",
-    "r1:.ml.xv.mcsplit[p;n;xtrain;ytrain;score_func]\n",
-    "r2:.ml.xv.kfshuff[k;n;xtrain;ytrain;score_func]\n",
-    "r3:.ml.xv.kfsplit[k;n;xtrain;ytrain;score_func]\n",
+    "r1:.ml.xv.mcSplit[p;n;xtrain;ytrain;score_func]\n",
+    "r2:.ml.xv.kfShuff[k;n;xtrain;ytrain;score_func]\n",
+    "r3:.ml.xv.kfSplit[k;n;xtrain;ytrain;score_func]\n",
     "\n",
     "-1\"Average Model Scores:\";\n",
     "-1\"----------------------------------------------------------------------------\";\n",
@@ -407,7 +407,7 @@
    "outputs": [],
    "source": [
     "/ new scoring function\n",
-    "sf:.ml.xv.fitscore[a]\n",
+    "sf:.ml.xv.fitScore[a]\n",
     "\n",
     "/ dictionary of parameters\n",
     "gs_hp:`n_estimators`criterion`max_depth!(10 50 100 500;`gini`entropy;2 5 10 20 30)"
@@ -435,35 +435,35 @@
       "\n",
       "n_estimators criterion max_depth|                                            ..\n",
       "--------------------------------| -------------------------------------------..\n",
-      "10           gini      2        | 0.956044  0.967033  0.956044  0.956044  0.9..\n",
-      "10           gini      5        | 0.9230769 0.967033  0.956044  0.978022  0.9..\n",
-      "10           gini      10       | 0.956044  0.967033  0.9230769 1         0.9..\n",
-      "10           gini      20       | 0.967033  0.9450549 0.978022  0.956044  0.9..\n",
-      "10           gini      30       | 0.9450549 0.956044  0.9450549 0.978022  0.9..\n",
-      "10           entropy   2        | 0.9450549 0.9340659 0.9450549 0.9450549 0.9..\n",
-      "10           entropy   5        | 0.956044  0.978022  0.967033  0.9230769 0.9..\n",
-      "10           entropy   10       | 0.9450549 0.956044  0.956044  0.978022  0.9..\n",
-      "10           entropy   20       | 0.9450549 0.9450549 0.978022  0.9450549 0.9..\n",
-      "10           entropy   30       | 0.956044  0.967033  0.967033  0.978022  0.9..\n",
-      "50           gini      2        | 0.9340659 0.956044  0.956044  0.967033  0.9..\n",
-      "50           gini      5        | 0.956044  0.978022  0.956044  0.989011  0.9..\n",
-      "50           gini      10       | 0.956044  0.978022  0.978022  0.967033  0.9..\n",
-      "50           gini      20       | 0.956044  0.978022  0.967033  1         0.9..\n",
-      "50           gini      30       | 0.9450549 0.967033  0.967033  0.978022  0.9..\n",
-      "50           entropy   2        | 0.978022  0.967033  0.956044  0.9340659 0.9..\n",
-      "50           entropy   5        | 0.9450549 0.967033  0.967033  0.989011  0.9..\n",
-      "50           entropy   10       | 0.956044  0.978022  0.989011  0.967033  0.9..\n",
-      "50           entropy   20       | 0.956044  0.978022  1         0.978022  0.9..\n",
-      "50           entropy   30       | 0.956044  0.978022  0.978022  0.967033  0.9..\n",
-      "100          gini      2        | 0.9450549 0.967033  0.967033  0.9450549 0.9..\n",
-      "100          gini      5        | 0.956044  0.967033  0.989011  1         0.9..\n",
+      "10           gini      2        | 0.956044  0.956044  0.956044  0.956044  0.9..\n",
+      "10           gini      5        | 0.9450549 0.956044  0.956044  0.978022  0.9..\n",
+      "10           gini      10       | 0.956044  0.9450549 0.978022  0.9450549 0.9..\n",
+      "10           gini      20       | 0.956044  0.967033  0.9340659 0.978022  0.9..\n",
+      "10           gini      30       | 0.9340659 0.978022  0.967033  0.9340659 0.9..\n",
+      "10           entropy   2        | 0.9450549 0.9450549 0.967033  0.9450549 0.9..\n",
+      "10           entropy   5        | 0.967033  0.978022  0.956044  0.9450549 0.9..\n",
+      "10           entropy   10       | 0.956044  0.967033  0.967033  0.978022  0.9..\n",
+      "10           entropy   20       | 0.9340659 0.967033  0.967033  0.978022  0.9..\n",
+      "10           entropy   30       | 0.956044  0.978022  0.967033  0.956044  0.9..\n",
+      "50           gini      2        | 0.956044  0.967033  0.967033  0.9340659 0.9..\n",
+      "50           gini      5        | 0.956044  0.989011  0.967033  0.978022  0.9..\n",
+      "50           gini      10       | 0.956044  0.967033  0.967033  1         0.9..\n",
+      "50           gini      20       | 0.956044  0.978022  0.989011  0.967033  0.9..\n",
+      "50           gini      30       | 0.956044  0.967033  0.956044  0.978022  0.9..\n",
+      "50           entropy   2        | 0.967033  0.967033  0.956044  0.956044  0.9..\n",
+      "50           entropy   5        | 0.967033  0.978022  0.967033  0.967033  0.9..\n",
+      "50           entropy   10       | 0.978022  0.978022  0.967033  0.967033  0.9..\n",
+      "50           entropy   20       | 0.956044  0.967033  0.956044  0.989011  0.9..\n",
+      "50           entropy   30       | 0.978022  0.978022  0.9450549 0.989011  0.9..\n",
+      "100          gini      2        | 0.967033  0.967033  0.967033  0.9340659 0.9..\n",
+      "100          gini      5        | 0.967033  0.967033  0.978022  0.978022  0.9..\n",
       "..\n"
      ]
     }
    ],
    "source": [
     "-1\"Grid search: hyperparameters and resulting score from each fold:\\n\";\n",
-    "show gr:.ml.gs.kfsplit[k;n;xtrain;ytrain;sf;gs_hp;0]"
+    "show gr:.ml.gs.kfSplit[k;n;xtrain;ytrain;sf;gs_hp;0]"
    ]
   },
   {
@@ -508,7 +508,7 @@
     {
      "data": {
       "text/plain": [
-       "`n_estimators`criterion`max_depth!(500;`entropy;5)\n",
+       "`n_estimators`criterion`max_depth!(500;`entropy;10)\n",
        "0.9824561\n"
       ]
      },
@@ -518,7 +518,7 @@
     }
    ],
    "source": [
-    "-2#.ml.gs.kfsplit[k;n;flip value flip table;targets;sf;gs_hp;.2]"
+    "-2#.ml.gs.kfSplit[k;n;flip value flip table;targets;sf;gs_hp;.2]"
    ]
   },
   {
@@ -537,7 +537,7 @@
      "data": {
       "text/plain": [
        "`n_estimators`criterion`max_depth!(500;`gini;10)\n",
-       "0.9561404\n"
+       "0.9473684\n"
       ]
      },
      "execution_count": 15,
@@ -546,7 +546,7 @@
     }
    ],
    "source": [
-    "-2#.ml.gs.kfsplit[k;n;flip value flip table;targets;sf;gs_hp;-.2]"
+    "-2#.ml.gs.kfSplit[k;n;flip value flip table;targets;sf;gs_hp;-.2]"
    ]
   },
   {
@@ -612,7 +612,7 @@
     {
      "data": {
       "text/plain": [
-       "`n_estimators`criterion`max_depth!(410;`entropy;4)\n",
+       "`n_estimators`criterion`max_depth!(130;`entropy;20)\n",
        "0.9912281\n"
       ]
      },
@@ -622,7 +622,7 @@
     }
    ],
    "source": [
-    "-2#.ml.rs.kfsplit[k;n;flip value flip table;targets;sf;rdm_hp;.2]"
+    "-2#.ml.rs.kfSplit[k;n;flip value flip table;targets;sf;rdm_hp;.2]"
    ]
   },
   {
@@ -653,7 +653,7 @@
     {
      "data": {
       "text/plain": [
-       "`n_estimators`criterion`max_depth!(378;`entropy;9)\n",
+       "`n_estimators`criterion`max_depth!(316;`entropy;6)\n",
        "0.9824561\n"
       ]
      },
@@ -663,7 +663,7 @@
     }
    ],
    "source": [
-    "-2#.ml.rs.kfsplit[k;n;flip value flip table;targets;sf;sbl_hp;.2]"
+    "-2#.ml.rs.kfSplit[k;n;flip value flip table;targets;sf;sbl_hp;.2]"
    ]
   },
   {