|
80 | 80 | " \n",
|
81 | 81 | " def __str__(self):\n",
|
82 | 82 | " return f\"\"\"\n",
|
83 |
| - "Number of featurs: {self._num_features}\n", |
84 |
| - "Pheromone values: {self._pheremone}\n", |
85 |
| - "Probability value: {self._probability_value}\n", |
| 83 | + " Number of featurs: {self._num_features}\n", |
| 84 | + " Pheromone values: {self._pheremone}\n", |
| 85 | + " Probability value: {self._probability_value}\n", |
86 | 86 | "\"\"\""
|
87 | 87 | ]
|
88 | 88 | },
|
|
166 | 166 | " \n",
|
167 | 167 | " def __str__(self):\n",
|
168 | 168 | " return f\"\"\"\n",
|
169 |
| - "Selected features: {self._selected_features}\n", |
170 |
| - "Last selected feature: {self._last_selected_feature} \n", |
171 |
| - "Last selected view: {self._last_selected_view}\n", |
172 |
| - "Sum of relevance values: {self._total_relevance_value}\n", |
173 |
| - "Sum of correlation values: {self._total_correlation_value}\n", |
174 |
| - "Total performance: {self._total_performance_value}\n", |
| 169 | + " Selected features: {self._selected_features}\n", |
| 170 | + " Last selected feature: {self._last_selected_feature} \n", |
| 171 | + " Last selected view: {self._last_selected_view}\n", |
| 172 | + " Sum of relevance values: {self._total_relevance_value}\n", |
| 173 | + " Sum of correlation values: {self._total_correlation_value}\n", |
| 174 | + " Total performance: {self._total_performance_value}\n", |
175 | 175 | "\"\"\""
|
176 | 176 | ]
|
177 | 177 | },
|
|
473 | 473 | " \n",
|
474 | 474 | " def print_agents(self):\n",
|
475 | 475 | " for agent_index in range(self.total_num_agents):\n",
|
476 |
| - " print(f\"########## Agent {agent_index} ###########\")\n", |
| 476 | + " print(f\" ########## Agent {agent_index} ###########\")\n", |
477 | 477 | " print(self._agents[agent_index])\n",
|
478 |
| - " print(\"#########################################\")\n", |
| 478 | + " print(\" #########################################\")\n", |
479 | 479 | " \n",
|
480 | 480 | " def print_views(self):\n",
|
481 | 481 | " for view_index in range(self.num_views):\n",
|
482 |
| - " print(f\"############# View {view_index} ########\")\n", |
| 482 | + " print(f\" ############# View {view_index} ########\")\n", |
483 | 483 | " print(self.views[view_index])\n",
|
484 |
| - " print(\"#########################################\")\n", |
| 484 | + " print(\" #########################################\")\n", |
485 | 485 | " \n",
|
486 | 486 | " def start(self):\n",
|
487 | 487 | " for iteration_index in range(self.num_iters):\n",
|
488 |
| - " print(f\"------------------------------- iteration {iteration_index} -------------------------------\")\n", |
| 488 | + " print(f\" ------------------------------- Iteration {iteration_index + 1} -------------------------------\")\n", |
489 | 489 | " self.reset_feature_counter()\n",
|
490 | 490 | " self.reset_agents()\n",
|
491 | 491 | " self.set_agents_start_nodes()\n",
|
492 | 492 | " views_probability = self.get_views_probability()\n",
|
493 | 493 | " \n",
|
494 | 494 | " for feature_index in range(self.num_selected_features - 1):\n",
|
495 |
| - " print(f\" ---------- Selected feature {feature_index} --------------------- \")\n", |
| 495 | + " print(f\" ---------- Current selected feature {feature_index + 2} --------------------- \")\n", |
496 | 496 | " for agent_index in range(self.total_num_agents):\n",
|
497 | 497 | " next_view, next_feature, sum_correlation, relevance_value = self.apply_state_transition_rule(agent_index,\n",
|
498 | 498 | " views_probability)\n",
|
|
509 | 509 | " \n",
|
510 | 510 | " def __str__(self):\n",
|
511 | 511 | " return f\"\"\"\n",
|
512 |
| - "Number of iteration: {self.num_iters}\n", |
513 |
| - "Number of selected features: {self.num_selected_features} \n", |
514 |
| - "Alpha: {self.alpha}\n", |
515 |
| - "Beta: {self.beta}\n", |
516 |
| - "Q0: {self.q0}\n", |
517 |
| - "Discount rate: {self.discount_rate}\n", |
518 |
| - "Number of views: {self.num_views}\n", |
519 |
| - "Number of agents: {self.num_agents}\n", |
520 |
| - "Total number of agents: {self.total_num_agents}\n", |
521 |
| - "Best selected features: {self.best_selected_features}\n", |
522 |
| - "Count corr computation: {self._count_correlation_computation}\n", |
| 512 | + " Number of iteration: {self.num_iters}\n", |
| 513 | + " Number of selected features: {self.num_selected_features} \n", |
| 514 | + " Alpha: {self.alpha}\n", |
| 515 | + " Beta: {self.beta}\n", |
| 516 | + " Q0: {self.q0}\n", |
| 517 | + " Discount rate: {self.discount_rate}\n", |
| 518 | + " Number of views: {self.num_views}\n", |
| 519 | + " Number of agents: {self.num_agents}\n", |
| 520 | + " Total number of agents: {self.total_num_agents}\n", |
| 521 | + " Best selected features: {self.best_selected_features}\n", |
| 522 | + " Count corr computation: {self._count_correlation_computation}\n", |
523 | 523 | "\"\"\" "
|
524 | 524 | ]
|
525 | 525 | },
|
|
900 | 900 | "omics3_final = omics3_final.dropna(axis=1)"
|
901 | 901 | ]
|
902 | 902 | },
|
903 |
| - { |
904 |
| - "cell_type": "code", |
905 |
| - "execution_count": null, |
906 |
| - "id": "eb7df36e", |
907 |
| - "metadata": {}, |
908 |
| - "outputs": [], |
909 |
| - "source": [ |
910 |
| - "def count_expected_removed_features(df):\n", |
911 |
| - " count_values = 0\n", |
912 |
| - " for col in df.columns.values:\n", |
913 |
| - " if df[col].isna().sum() > 0:\n", |
914 |
| - " count_values += 1\n", |
915 |
| - " return count_values" |
916 |
| - ] |
917 |
| - }, |
918 | 903 | {
|
919 | 904 | "cell_type": "code",
|
920 | 905 | "execution_count": null,
|
|
923 | 908 | "outputs": [],
|
924 | 909 | "source": [
|
925 | 910 | "#DNA_methylation Dataset\n",
|
926 |
| - "print('#Original features = ', len(omics1_final.columns))\n", |
927 |
| - "# print('#Remaind features (Without missing values) = ', len(df_dna_transposed_shrinked_removed.columns))\n", |
928 |
| - "print('#Expected removed features = ', count_expected_removed_features(omics1_final))" |
| 911 | + "print('#Remained features = ', len(omics1_final.columns))" |
929 | 912 | ]
|
930 | 913 | },
|
931 | 914 | {
|
|
936 | 919 | "outputs": [],
|
937 | 920 | "source": [
|
938 | 921 | "#Genelevel_copy_number_alteration_CNA Dataset\n",
|
939 |
| - "print('#Original features = ', len(omics2_final.columns))\n", |
940 |
| - "# print('#Remaind features (Without missing values) = ', len(df_cna_transposed_shrinked_removed.columns))\n", |
941 |
| - "print('#Expected removed features = ', count_expected_removed_features(omics2_final))" |
| 922 | + "print('#Remained features = ', len(omics2_final.columns))" |
942 | 923 | ]
|
943 | 924 | },
|
944 | 925 | {
|
|
949 | 930 | "outputs": [],
|
950 | 931 | "source": [
|
951 | 932 | "#RNASeq Dataset\n",
|
952 |
| - "print('#Original features = ', len(omics3_final.columns))\n", |
953 |
| - "# print('#Remaind features (Without missing values) = ', len(df_rna_transposed_shrinked_removed.columns))\n", |
954 |
| - "print('#Expected removed features = ', count_expected_removed_features(omics3_final))" |
| 933 | + "print('#Remained features = ', len(omics3_final.columns))" |
955 | 934 | ]
|
956 | 935 | },
|
957 | 936 | {
|
|
1064 | 1043 | "from sklearn.model_selection import RepeatedStratifiedKFold\n",
|
1065 | 1044 | "from sklearn.linear_model import LogisticRegression\n",
|
1066 | 1045 | "from sklearn.ensemble import RandomForestClassifier\n",
|
1067 |
| - "from sklearn.metrics import accuracy_score" |
| 1046 | + "from sklearn.metrics import accuracy_score\n", |
| 1047 | + "from sklearn.model_selection import cross_val_score" |
1068 | 1048 | ]
|
1069 | 1049 | },
|
1070 | 1050 | {
|
|
1105 | 1085 | },
|
1106 | 1086 | "outputs": [],
|
1107 | 1087 | "source": [
|
1108 |
| - "from sklearn.model_selection import RepeatedStratifiedKFold\n", |
1109 |
| - "from sklearn.model_selection import cross_val_score\n", |
1110 |
| - "from sklearn.linear_model import LogisticRegression\n", |
1111 |
| - "from sklearn.metrics import accuracy_score\n", |
1112 |
| - "from sklearn.metrics import mean_absolute_error\n", |
1113 |
| - "\n", |
1114 | 1088 | "# settings of Multi Agent algorithm\n",
|
1115 | 1089 | "init_pheromone_value = 0.2\n",
|
1116 | 1090 | "num_views = 3\n",
|
|
1126 | 1100 | "\n",
|
1127 | 1101 | "# repeat algorithm for different sizes of feature subsets\n",
|
1128 | 1102 | "for feature_size in feature_sizes:\n",
|
1129 |
| - " print(f\"\\n\\n*************feature size {feature_size}**********************\")\n", |
| 1103 | + " print(\"\\n\\n***************************************************************\")\n", |
| 1104 | + " print(f\"********************* Feature size {feature_size} *************************\")\n", |
| 1105 | + " print(\"***************************************************************\")\n", |
1130 | 1106 | " model_acc = {}\n",
|
1131 | 1107 | " # configurations to repeat the k-fold cross-validation process (designed for imbalanced Classification)\n",
|
1132 |
| - " cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=1)\n", |
| 1108 | + " cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)\n", |
1133 | 1109 | " for train_index, test_index in cv.split(omics1_final, y=label_final):\n",
|
1134 | 1110 | " # split datasets into training and test sets\n",
|
1135 |
| - " print(\"\\n\\n****************** Split **********************\")\n", |
1136 | 1111 | " omics1_final_train, omics1_final_test = omics1_final.iloc[train_index], omics1_final.iloc[test_index]\n",
|
1137 | 1112 | " omics2_final_train, omics2_final_test = omics2_final.iloc[train_index], omics2_final.iloc[test_index]\n",
|
1138 | 1113 | " omics3_final_train, omics3_final_test = omics3_final.iloc[train_index], omics3_final.iloc[test_index]\n",
|
|
1161 | 1136 | " alg.start()\n",
|
1162 | 1137 | " final_subset = alg.best_selected_features\n",
|
1163 | 1138 | "\n",
|
1164 |
| - " print(f\"\\n\\n Final selected subset: {final_subset}\")\n", |
| 1139 | + " print(f\"\\n\\n Final selected subset: {final_subset}\")\n", |
1165 | 1140 | " \n",
|
1166 | 1141 | " # create reduced datasets based on final selected features\n",
|
1167 | 1142 | " feature_indices_view1 = final_subset.get(0, [])\n",
|
|
1184 | 1159 | " # get the list of models to evaluate performance\n",
|
1185 | 1160 | " models = get_models()\n",
|
1186 | 1161 | " # evaluate each model\n",
|
| 1162 | + " new_output_file_lines = f\"Feature size: {feature_size}\"\n", |
1187 | 1163 | " for model_index in models:\n",
|
1188 | 1164 | " acc = evaluate_model(models[model_index], \n",
|
1189 | 1165 | " X_train=final_train_dataset.values,\n",
|
|
1193 | 1169 | " value = model_acc.get(model_index, [])\n",
|
1194 | 1170 | " value.append(acc)\n",
|
1195 | 1171 | " model_acc[model_index] = value\n",
|
| 1172 | + " print(f\" Classifier index ({model_index}) --> classification accuracy: {acc}\")\n", |
| 1173 | + " new_output_file_lines += f\"\\nClassifier index ({model_index}) --> classification accuracy {model_acc[model_index]}\"\n", |
| 1174 | + "\n", |
| 1175 | + " # write the current results in the output file\n", |
| 1176 | + " output_file_lines = \"\"\n", |
| 1177 | + " try:\n", |
| 1178 | + " output_file_lines = open(\"output_multi_agent.txt\", 'r').readlines()\n", |
| 1179 | + " output_file_lines[-3:] = new_output_file_lines\n", |
| 1180 | + " except Exception:\n", |
| 1181 | + " output_file_lines = new_output_file_lines\n", |
1196 | 1182 | " \n",
|
1197 |
| - " with open(\"output_multi_agent.txt\", \"a\") as f:\n", |
1198 |
| - " print(f\"Feature size: {feature_size}\", file=f)\n", |
1199 |
| - " print(model_acc, file=f)\n", |
1200 |
| - " \n", |
| 1183 | + " open(\"output_multi_agent.txt\", 'w').writelines(output_file_lines)\n", |
| 1184 | + " \n", |
| 1185 | + " \n", |
| 1186 | + " print(\" ###########################################################################\")\n", |
| 1187 | + " print(\" ###########################################################################\")\n", |
| 1188 | + " \n", |
| 1189 | + " # prepare the output file for the next feature size\n", |
| 1190 | + " try:\n", |
| 1191 | + " output_file_lines = open(\"output_multi_agent.txt\", 'r').readlines()\n", |
| 1192 | + " output_file_lines.append(\"\\n\\n\\n\\n\\n\")\n", |
| 1193 | + " open(\"output_multi_agent.txt\", 'w').writelines(output_file_lines)\n", |
| 1194 | + " except Exception:\n", |
| 1195 | + " pass\n", |
| 1196 | + " \n", |
| 1197 | + " print(f\"Final classification accuracies for {feature_size} selected features\")\n", |
1201 | 1198 | " for model_index in model_acc:\n",
|
1202 |
| - " print(f\"model index: {model_index},,, acc: {model_acc[model_index]}\")" |
| 1199 | + " print(f\" Classifier index ({model_index}) --> classification accuracy: {model_acc[model_index]}\")" |
1203 | 1200 | ]
|
1204 | 1201 | }
|
1205 | 1202 | ],
|
|
0 commit comments