-
Notifications
You must be signed in to change notification settings - Fork 94
Description
%% =========================================================================
% A MATLAB-Based Machine Learning Approach for Predicting Ultimate Bearing
% Capacity of Shallow Foundations: A Comparative Study
%
% Author: [Your Name]
% Course: MATLAB Course
% Date: [Current Date]
% Data Source: Compiled from various literature sources on shallow foundation testing.
%
% This script implements multiple machine learning algorithms to predict
% the ultimate bearing capacity (UBC) of shallow foundations, and compares their
% performance against the traditional Meyerhof equation.
% =========================================================================
clear; clc; close all;
% Suppress specific warnings common in machine learning scripts
warning('off', 'MATLAB:table:RowsAddedExistingVars');
warning('off', 'MATLAB:nearlySingularMatrix');
%% =========================================================================
% 1. DATA LOADING AND PREPROCESSING
% =========================================================================
fprintf('=== A MATLAB-Based Machine Learning Approach for Predicting ===\n');
fprintf('=== Ultimate Bearing Capacity of Shallow Foundations ===\n');
fprintf('=== Comparative Study of ML Algorithms ===\n\n');
% Load the dataset
filename = 'soil_dataset.csv';
if ~exist(filename, 'file')
error('Dataset file not found. Please ensure "soil_dataset.csv" is in the current directory.');
end
data = readtable(filename);
fprintf('Dataset loaded successfully.\n');
fprintf('Source: Compiled from various literature sources.\n');
fprintf('Number of samples: %d\n', height(data));
fprintf('Number of features: %d\n\n', width(data)-1);
% Display dataset information
fprintf('Dataset Variables:\n');
disp(data.Properties.VariableNames);
fprintf('\nFirst 5 samples:\n');
disp(head(data, 5));
% Calculate descriptive statistics manually
fprintf('\nDataset Statistics:\n');
numeric_vars = varfun(@isnumeric, data, 'OutputFormat', 'uniform');
numeric_data = data(:, numeric_vars);
stats_table = table();
stats_table.Variable = numeric_data.Properties.VariableNames';
stats_table.Min = min(numeric_data{:,:})';
stats_table.Max = max(numeric_data{:,:})';
stats_table.Mean = mean(numeric_data{:,:})';
stats_table.Median = median(numeric_data{:,:})';
stats_table.Std = std(numeric_data{:,:})';
disp(stats_table);
%% 1.1 Preprocessing - Convert categorical to numerical
% Convert soil_type to categorical and then to dummy variables
soil_types = unique(data.soil_type);
fprintf('\nSoil Types in dataset: ');
fprintf('%s ', soil_types{:});
fprintf('\n');
% Create dummy variables for soil_type
data.soil_type = categorical(data.soil_type);
dummy_soil = dummyvar(data.soil_type);
% Keep one less column to avoid dummy variable trap (reference category)
dummy_soil = dummy_soil(:, 1:end-1);
% Combine all features
numerical_features = {'foundation_width_B', 'foundation_depth_D', ...
'aspect_ratio_LB', 'unit_weight_gamma', ...
'cohesion_c', 'friction_angle_phi', ...
'porosity_n', 'moisture_content_w'};
X_numerical = table2array(data(:, numerical_features));
X = [X_numerical, dummy_soil];
% Target variable
y = data.ultimate_bearing_capacity;
% Feature names including soil types (for interpretability)
feature_names = [numerical_features, string(soil_types(1:end-1))'];
fprintf('\n=== Feature Engineering ===\n');
fprintf('Total features after encoding: %d\n', size(X, 2));
%% 1.2 Feature Correlation Analysis
figure('Position', [100, 100, 1200, 500]);
% Correlation matrix including target
all_data = [X, y];
all_names = [feature_names, "ultimate_bearing_capacity"];
subplot(1,2,1);
R = corrcoef(all_data);
imagesc(R);
colorbar;
caxis([-1 1]);
title('Correlation Matrix', 'FontSize', 12, 'FontWeight', 'bold');
set(gca, 'XTick', 1:length(all_names), 'XTickLabel', all_names, ...
'XTickLabelRotation', 45, 'YTick', 1:length(all_names), ...
'YTickLabel', all_names);
% Correlation with target
subplot(1,2,2);
corr_with_target = R(1:end -1, end);
barh(corr_with_target);
xlabel('Correlation Coefficient');
title('Correlation with Ultimate Bearing Capacity', 'FontSize', 12, 'FontWeight', 'bold');
set(gca, 'YTick', 1:length(feature_names), 'YTickLabel', feature_names);
grid on;
xlim([-1 1]);
%% 1.3 Data Splitting (80% train, 20% test)
rng(42); % For reproducibility
cv = cvpartition(size(X,1), 'HoldOut', 0.2);
idxTrain = training(cv);
idxTest = test(cv);
X_train = X(idxTrain, :);
y_train = y(idxTrain);
X_test = X(idxTest, :);
y_test = y(idxTest);
fprintf('\n=== Data Splitting ===\n');
fprintf('Training samples: %d (%.1f%%)\n', sum(idxTrain), 100sum(idxTrain)/length(idxTrain));
fprintf('Testing samples: %d (%.1f%%)\n\n', sum(idxTest), 100sum(idxTest)/length(idxTest));
%% 1.4 Feature Scaling (Standardization)
% Scale features X
mu = mean(X_train);
sigma = std(X_train);
X_train_scaled = (X_train - mu) ./ sigma;
X_test_scaled = (X_test - mu) ./ sigma;
% Scale target y only for ANN (to improve training convergence)
y_mu = mean(y_train);
y_sigma = std(y_train);
y_train_scaled = (y_train - y_mu) ./ y_sigma;
% Note: Linear Regression, Random Forest, and Gradient Boosting are trained
% on the original (unscaled) target y_train for better interpretability of output.
%% =========================================================================
% 2. TRADITIONAL METHODS FOR COMPARISON
% =========================================================================
fprintf('=== Traditional Bearing Capacity Methods ===\n');
% Requires the separate function 'meyerhof_calc.m' to be in the MATLAB path.
fprintf('\nCalculating bearing capacity using Meyerhof''s equation...\n');
% Preallocate predictions
y_meyerhof = zeros(size(y_test));
for i = 1:size(X_test, 1)
% Extract parameters from test set (unscaled)
B = X_test(i, 1);
D = X_test(i, 2);
L_over_B = X_test(i, 3);
gamma = X_test(i, 4);
c = X_test(i, 5);
phi_deg = X_test(i, 6);
% Calculate bearing capacity using the dedicated function
y_meyerhof(i) = meyerhof_calc(B, D, L_over_B, gamma, c, phi_deg);
end
% Calculate performance metrics for Meyerhof's method
mse_meyerhof = mean((y_meyerhof - y_test).^2);
rmse_meyerhof = sqrt(mse_meyerhof);
mae_meyerhof = mean(abs(y_meyerhof - y_test));
r2_meyerhof = 1 - sum((y_test - y_meyerhof).^2) / sum((y_test - mean(y_test)).^2);
% Calculate MAPE for Meyerhof
mape_meyerhof = mean(abs((y_test - y_meyerhof) ./ y_test)) * 100;
fprintf('Meyerhof''s Equation Performance:\n');
fprintf(' RMSE: %.2f kPa\n', rmse_meyerhof);
fprintf(' MAE: %.2f kPa\n', mae_meyerhof);
fprintf(' R²: %.4f\n\n', r2_meyerhof);
%% =========================================================================
% 3. MACHINE LEARNING MODELS IMPLEMENTATION
% =========================================================================
fprintf('=== Machine Learning Models Implementation ===\n');
% Initialize results structure
results = struct();
% Define Hyperparameters (to be justified in manuscript via tuning)
hiddenLayerSize = [10, 10];
rf_num_trees = 100;
gbm_cycles = 150;
%% 3.1 Multiple Linear Regression
fprintf('\n1. Training Multiple Linear Regression...\n');
% Add column of ones for intercept
X_train_lr = [ones(size(X_train_scaled, 1), 1), X_train_scaled];
X_test_lr = [ones(size(X_test_scaled, 1), 1), X_test_scaled];
% Solve using normal equation
beta = (X_train_lr' * X_train_lr) \ (X_train_lr' * y_train);
y_pred_lr = X_test_lr * beta;
% Store results
results.LinearRegression.y_pred = y_pred_lr;
results.LinearRegression.model = beta;
results.LinearRegression.model_name = 'Multiple Linear Regression';
%% 3.2 Artificial Neural Network (ANN)
fprintf('\n2. Training Artificial Neural Network (ANN)...\n');
% Create and train neural network
net = fitrnet(X_train_scaled, y_train_scaled, ...
'LayerSizes', hiddenLayerSize, ...
'Activations', 'relu', ...
'Standardize', false, ...
'Lambda', 0.01, ...
'IterationLimit', 1000);
% Predict (scale back)
y_pred_ann_scaled = predict(net, X_test_scaled);
y_pred_ann = y_pred_ann_scaled * y_sigma + y_mu;
% Store results
results.ANN.y_pred = y_pred_ann;
results.ANN.model = net;
results.ANN.model_name = 'Artificial Neural Network';
%% 3.3 Random Forest (Ensemble of Decision Trees)
fprintf('\n3. Training Random Forest (Ensemble Method)...\n');
% Train Random Forest model (CRITICAL FIX: Added 'OOBPredictorImportance', 'on')
rf_model = TreeBagger(rf_num_trees, X_train_scaled, y_train, ...
'Method', 'regression', ...
'OOBPrediction', 'on', ...
'OOBPredictorImportance', 'on', ... % <--- FIX TO ENABLE FEATURE IMPORTANCE
'MinLeafSize', 5, ...
'NumPredictorsToSample', 'all');
% Predict and convert from cell to array
y_pred_rf_cell = predict(rf_model, X_test_scaled);
% Robustly convert cell array output to double array
if iscell(y_pred_rf_cell)
y_pred_rf = cellfun(@str2double, y_pred_rf_cell);
else
y_pred_rf = y_pred_rf_cell;
end
% Check for NaN values and replace with mean prediction
if any(isnan(y_pred_rf))
fprintf(' Warning: Random Forest produced NaN predictions. Replacing with mean.\n');
nan_indices = isnan(y_pred_rf);
y_pred_rf(nan_indices) = mean(y_pred_rf(~nan_indices));
end
% Store results
results.RandomForest.y_pred = y_pred_rf;
results.RandomForest.model = rf_model;
results.RandomForest.model_name = 'Random Forest';
%% 3.4 Gradient Boosting Machine
fprintf('\n4. Training Gradient Boosting Machine...\n');
% Train Gradient Boosting model
try
gbm_model = fitrensemble(X_train_scaled, y_train, ...
'Method', 'LSBoost', ...
'NumLearningCycles', gbm_cycles, ...
'LearnRate', 0.1, ...
'Learners', templateTree('MaxNumSplits', 20));
% Predict
y_pred_gbm = predict(gbm_model, X_test_scaled);
% Store results
results.GBM.y_pred = y_pred_gbm;
results.GBM.model = gbm_model;
results.GBM.model_name = 'Gradient Boosting';
catch ME
% Fallback only if fitrensemble is not available
fprintf(' Gradient Boosting failed or toolbox not available. Using simple mean fallback.\n');
y_pred_gbm = mean(y_train) * ones(size(y_test));
results.GBM.y_pred = y_pred_gbm;
results.GBM.model = [];
results.GBM.model_name = 'Ensemble Average (Fallback)';
end
%% =========================================================================
% 4. MODEL EVALUATION AND COMPARISON
% =========================================================================
fprintf('\n=== Model Evaluation and Comparison ===\n');
% Calculate performance metrics for each model
model_fields = fieldnames(results);
performance_metrics = struct();
for i = 1:length(model_fields)
model_name = model_fields{i};
y_pred = results.(model_name).y_pred;
% Remove NaN values for metric calculation
valid_indices = ~isnan(y_pred) & ~isnan(y_test);
y_pred_valid = y_pred(valid_indices);
y_test_valid = y_test(valid_indices);
% Calculate metrics
mse = mean((y_pred_valid - y_test_valid).^2);
rmse = sqrt(mse);
mae = mean(abs(y_pred_valid - y_test_valid));
r2 = 1 - sum((y_test_valid - y_pred_valid).^2) / sum((y_test_valid - mean(y_test_valid)).^2);
% Calculate MAPE (Mean Absolute Percentage Error)
mape = mean(abs((y_test_valid - y_pred_valid) ./ y_test_valid)) * 100;
% Store metrics
performance_metrics.(model_name).RMSE = rmse;
performance_metrics.(model_name).MAE = mae;
performance_metrics.(model_name).R2 = r2;
performance_metrics.(model_name).MAPE = mape;
performance_metrics.(model_name).MSE = mse;
fprintf('\n%s:\n', results.(model_name).model_name);
fprintf(' RMSE: %.2f kPa\n', rmse);
fprintf(' MAE: %.2f kPa\n', mae);
fprintf(' R²: %.4f\n', r2);
fprintf(' MAPE: %.2f%%\n', mape);
end
% Add Meyerhof to performance metrics
performance_metrics.Meyerhof.RMSE = rmse_meyerhof;
performance_metrics.Meyerhof.MAE = mae_meyerhof;
performance_metrics.Meyerhof.R2 = r2_meyerhof;
performance_metrics.Meyerhof.MAPE = mape_meyerhof;
performance_metrics.Meyerhof.MSE = mse_meyerhof;
%% 4.1 Comparative Visualization
figure('Position', [50, 50, 1400, 800]);
all_models = [model_fields; {'Meyerhof'}];
r2_values = zeros(length(all_models), 1);
for i = 1:length(all_models)
if i <= length(model_fields)
r2_values(i) = performance_metrics.(all_models{i}).R2;
else
r2_values(i) = performance_metrics.Meyerhof.R2;
end
end
% Determine the best performing ML model (excluding Meyerhof)
[~, best_idx_ml] = max(r2_values(1:end-1));
best_model = all_models{best_idx_ml};
y_pred_best = results.(best_model).y_pred;
% Subplot 1: RMSE Comparison
subplot(2,3,1);
rmse_values = [r2_values*0]; % Preallocate with dummy values
for i = 1:length(all_models)
if i <= length(model_fields)
rmse_values(i) = performance_metrics.(all_models{i}).RMSE;
else
rmse_values(i) = performance_metrics.Meyerhof.RMSE;
end
end
bar(rmse_values);
ylabel('RMSE (kPa)');
title('Root Mean Square Error Comparison', 'FontWeight', 'bold');
set(gca, 'XTickLabel', all_models, 'XTickLabelRotation', 45);
grid on;
% Subplot 2: R² Comparison
subplot(2,3,2);
bar(r2_values);
ylabel('R² Score');
title('Coefficient of Determination (R²)', 'FontWeight', 'bold');
set(gca, 'XTickLabel', all_models, 'XTickLabelRotation', 45);
grid on;
ylim([-0.1 1]);
% Subplot 3: MAE Comparison
subplot(2,3,3);
mae_values = [r2_values*0]; % Preallocate with dummy values
for i = 1:length(all_models)
if i <= length(model_fields)
mae_values(i) = performance_metrics.(all_models{i}).MAE;
else
mae_values(i) = performance_metrics.Meyerhof.MAE;
end
end
bar(mae_values);
ylabel('MAE (kPa)');
title('Mean Absolute Error Comparison', 'FontWeight', 'bold');
set(gca, 'XTickLabel', all_models, 'XTickLabelRotation', 45);
grid on;
% Subplot 4: Predicted vs Actual for best model
subplot(2,3,4);
scatter(y_test, y_pred_best, 50, 'filled', 'MarkerFaceAlpha', 0.6);
hold on;
plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--', 'LineWidth', 2);
xlabel('Actual Bearing Capacity (kPa)');
ylabel('Predicted Bearing Capacity (kPa)');
title(sprintf('Predicted vs Actual (%s)', results.(best_model).model_name), 'FontWeight', 'bold');
grid on;
axis equal;
xlim([min(y_test), max(y_test)]);
ylim([min(y_test), max(y_test)]);
% Add R² value to plot
text(0.05, 0.95, sprintf('R² = %.4f', r2_values(best_idx_ml)), ...
'Units', 'normalized', 'FontSize', 11, 'FontWeight', 'bold', ...
'BackgroundColor', 'white', 'EdgeColor', 'black');
% Subplot 5: Residual Analysis for best model
subplot(2,3,5);
residuals = y_test - y_pred_best;
residuals = residuals(isfinite(y_pred_best)); % Only use finite predictions
y_pred_best_valid = y_pred_best(isfinite(y_pred_best));
scatter(y_pred_best_valid, residuals, 50, 'filled', 'MarkerFaceAlpha', 0.6);
hold on;
plot([min(y_pred_best_valid), max(y_pred_best_valid)], [0, 0], 'r--', 'LineWidth', 2);
xlabel('Predicted Bearing Capacity (kPa)');
ylabel('Residuals (Actual - Predicted)');
title(sprintf('Residual Plot (%s)', results.(best_model).model_name), 'FontWeight', 'bold');
grid on;
% Subplot 6: Error Distribution Comparison (KSDENSITY for errors)
subplot(2,3,6);
hold on;
colors = lines(length(model_fields));
legend_entries = cell(length(all_models), 1);
% Plot error distributions for ML models
for i = 1:length(model_fields)
errors = y_test - results.(model_fields{i}).y_pred;
valid_errors = errors(isfinite(errors));
legend_entries{i} = results.(model_fields{i}).model_name;
if ~isempty(valid_errors) && length(unique(valid_errors)) > 1
[f, xi] = ksdensity(valid_errors);
plot(xi, f, 'LineWidth', 2, 'Color', colors(i,:));
end
end
% Add Meyerhof errors
errors_meyerhof = y_test - y_meyerhof;
valid_errors_meyerhof = errors_meyerhof(isfinite(errors_meyerhof));
legend_entries{end} = 'Meyerhof Equation';
if ~isempty(valid_errors_meyerhof) && length(unique(valid_errors_meyerhof)) > 1
[f, xi] = ksdensity(valid_errors_meyerhof);
plot(xi, f, 'LineWidth', 2, 'Color', 'k', 'LineStyle', '--');
end
xlabel('Prediction Error (kPa)');
ylabel('Density');
title('Error Distribution Comparison', 'FontWeight', 'bold');
grid on;
legend(legend_entries, 'Location', 'best');
sgtitle('Comparative Analysis of ML Models for Bearing Capacity Prediction', ...
'FontSize', 14, 'FontWeight', 'bold');
%% 4.2 Performance Metrics Table
fprintf('\n=== Performance Metrics Summary ===\n');
fprintf('%-30s %-10s %-10s %-10s %-10s\n', ...
'Model', 'RMSE', 'MAE', 'R²', 'MAPE(%)');
fprintf('%s\n', repmat('-', 1, 70));
% Display metrics for each model
for i = 1:length(all_models)
model_key = all_models{i};
metrics_struct = performance_metrics.(model_key);
if i <= length(model_fields)
model_display = results.(model_key).model_name;
else
model_display = 'Meyerhof Equation';
end
fprintf('%-30s %-10.2f %-10.2f %-10.4f %-10.2f\n', ...
model_display, metrics_struct.RMSE, metrics_struct.MAE, metrics_struct.R2, metrics_struct.MAPE);
end
%% =========================================================================
% 5. FEATURE IMPORTANCE ANALYSIS
% =========================================================================
fprintf('\n=== Feature Importance Analysis ===\n');
% Calculate feature importance using Random Forest (interpretable ensemble method)
if isfield(results, 'RandomForest')
fprintf('\nFeature Importance (Random Forest):\n');
try
% FIX: Access the feature importance property directly from the TreeBagger model
% This property is now guaranteed to exist due to the 'OOBPredictorImportance', 'on' flag
imp = results.RandomForest.model.OOBPermutedPredictorDeltaError;
if isempty(imp)
fprintf(' Warning: OOBPermutedPredictorDeltaError property is empty. Feature importance requires sufficient data variance.\n');
else
% Sort importance
[imp_sorted, idx] = sort(imp, 'descend');
% Plot feature importance
figure('Position', [100, 100, 800, 500]);
barh(imp_sorted);
set(gca, 'YTick', 1:length(feature_names), 'YTickLabel', feature_names(idx));
xlabel('Importance (Increase in OOB MSE)');
title('Feature Importance - Random Forest', 'FontSize', 12, 'FontWeight', 'bold');
grid on;
% Display top features
fprintf('\nTop 5 Most Important Features:\n');
for i = 1:min(5, length(feature_names))
fprintf('%d. %s (Importance: %.4f)\n', i, feature_names{idx(i)}, imp_sorted(i));
end
end
catch ME
% Catch block handles unexpected errors during property access
fprintf(' Error accessing feature importance: %s\n', ME.message);
fprintf(' Ensure the Statistics and Machine Learning Toolbox is properly installed.\n');
end
end
%% =========================================================================
% 6. CROSS-VALIDATION FOR ROBUSTNESS CHECK
% =========================================================================
fprintf('\n=== Cross-Validation Analysis ===\n');
% Perform 5-fold cross-validation on ALL ML models for stability check.
fprintf('\nPerforming 5-fold cross-validation on all ML models...\n');
% Prepare data
X_full_scaled = (X - mu) ./ sigma;
% Create cross-validation partition
cv_partition = cvpartition(length(y), 'KFold', 5);
% Initialize arrays for CV results
cv_results = struct();
cv_r2 = zeros(5, length(model_fields));
cv_rmse = zeros(5, length(model_fields));
fprintf('Fold\t\t');
for i = 1:length(model_fields)
fprintf('%s R²\t\t', model_fields{i});
end
fprintf('\n%s\n', repmat('-', 1, 10 + 10 * length(model_fields)));
for fold = 1:5
fprintf('%d\t\t', fold);
% Get training and validation indices
trainIdx = training(cv_partition, fold);
valIdx = test(cv_partition, fold);
% Split data
X_train_cv = X_full_scaled(trainIdx, :);
y_train_cv = y(trainIdx);
X_val_cv = X_full_scaled(valIdx, :);
y_val_cv = y(valIdx);
for i = 1:length(model_fields)
model_key = model_fields{i};
y_pred_cv = []; % Initialize prediction
switch model_key
case 'LinearRegression'
X_train_lr_cv = [ones(size(X_train_cv, 1), 1), X_train_cv];
X_val_lr_cv = [ones(size(X_val_cv, 1), 1), X_val_cv];
beta_cv = (X_train_lr_cv' * X_train_lr_cv) \ (X_train_lr_cv' * y_train_cv);
y_pred_cv = X_val_lr_cv * beta_cv;
case 'ANN'
y_mu_cv = mean(y_train_cv);
y_sigma_cv = std(y_train_cv);
y_train_scaled_cv = (y_train_cv - y_mu_cv) ./ y_sigma_cv;
net_cv = fitrnet(X_train_cv, y_train_scaled_cv, ...
'LayerSizes', hiddenLayerSize, 'Standardize', false);
y_pred_scaled_cv = predict(net_cv, X_val_cv);
y_pred_cv = y_pred_scaled_cv * y_sigma_cv + y_mu_cv;
case 'RandomForest'
% Note: OOBPredictorImportance is NOT needed here as we only need predictions
rf_model_cv = TreeBagger(50, X_train_cv, y_train_cv, 'Method', 'regression', 'MinLeafSize', 5);
y_pred_cv_cell = predict(rf_model_cv, X_val_cv);
if iscell(y_pred_cv_cell)
y_pred_cv = cellfun(@str2double, y_pred_cv_cell);
else
y_pred_cv = y_pred_cv_cell;
end
case 'GBM'
% Ensure a new model is trained for each fold
gbm_model_cv = fitrensemble(X_train_cv, y_train_cv, ...
'Method', 'LSBoost', 'NumLearningCycles', 50);
y_pred_cv = predict(gbm_model_cv, X_val_cv);
end
% Calculate metrics
valid_idx = ~isnan(y_pred_cv) & ~isnan(y_val_cv);
y_pred_cv_valid = y_pred_cv(valid_idx);
y_val_cv_valid = y_val_cv(valid_idx);
if isempty(y_val_cv_valid) || std(y_val_cv_valid) == 0
cv_r2(fold, i) = NaN;
cv_rmse(fold, i) = NaN;
else
cv_r2(fold, i) = 1 - sum((y_val_cv_valid - y_pred_cv_valid).^2) / sum((y_val_cv_valid - mean(y_val_cv_valid)).^2);
cv_rmse(fold, i) = sqrt(mean((y_val_cv_valid - y_pred_cv_valid).^2));
end
fprintf('%.4f\t', cv_r2(fold, i));
end
fprintf('\n');
end
% Summarize CV Results
fprintf('\nCross-Validation Results Summary:\n');
cv_metrics_table = table();
for i = 1:length(model_fields)
model_name = results.(model_fields{i}).model_name;
R2_mean = nanmean(cv_r2(:, i));
R2_std = nanstd(cv_r2(:, i));
RMSE_mean = nanmean(cv_rmse(:, i));
RMSE_std = nanstd(cv_rmse(:, i));
fprintf('%-30s\tAvg R²: %.4f ± %.4f\tAvg RMSE: %.2f ± %.2f kPa\n', ...
model_name, R2_mean, R2_std, RMSE_mean, RMSE_std);
% Store in table for easy saving
cv_metrics_table.Model{i} = model_name;
cv_metrics_table.AvgR2(i) = R2_mean;
cv_metrics_table.StdR2(i) = R2_std;
cv_metrics_table.AvgRMSE(i) = RMSE_mean;
cv_metrics_table.StdRMSE(i) = RMSE_std;
end
%% =========================================================================
% 7. PREDICTION ON NEW DATA (DEMONSTRATION)
% =========================================================================
fprintf('\n=== Prediction Demonstration ===\n');
% Determine the correct number of features
num_numerical_features = 8;
num_soil_types = length(soil_types) - 1;
total_features = num_numerical_features + num_soil_types;
fprintf('Number of numerical features: %d\n', num_numerical_features);
fprintf('Number of soil type dummy variables: %d\n', num_soil_types);
fprintf('Total features expected: %d\n', total_features);
% Define sample data parameters
sample_B = 1.50;
sample_D = 1.20;
sample_LB = 1.5;
sample_gamma = 18.5;
sample_c = 75.0;
sample_phi = 30.0;
sample_n = 0.45;
sample_w = 20.0;
sample_soil = 'Alluvial Deposit';
% Find which soil type is the sample soil
alluvial_index = find(strcmp(soil_types, sample_soil));
fprintf('Sample Soil Type: %s (#%d of %d)\n', sample_soil, alluvial_index, length(soil_types));
% Create new sample data
new_sample_numerical = [sample_B, sample_D, sample_LB, sample_gamma, sample_c, sample_phi, sample_n, sample_w];
% Create soil type encoding
soil_encoding = zeros(1, num_soil_types);
if alluvial_index <= num_soil_types
soil_encoding(alluvial_index) = 1;
end % If it's the reference category, all dummies are 0
new_sample = [new_sample_numerical, soil_encoding];
% Verify dimensions
fprintf('\nNew sample dimensions: %d x %d\n', size(new_sample, 1), size(new_sample, 2));
if length(new_sample) ~= total_features
error('New sample has incorrect number of features. Expected %d, got %d.', total_features, length(new_sample));
end
% Scale the new sample
new_sample_scaled = (new_sample - mu) ./ sigma;
% Predict using all models
fprintf('\nPredicting bearing capacity for new sample:\n');
fprintf('Foundation width: %.2f m\n', sample_B);
fprintf('Foundation depth: %.2f m\n', sample_D);
fprintf('Soil type: %s\n', sample_soil);
fprintf('Cohesion: %.1f kPa\n', sample_c);
fprintf('Friction angle: %.1f°\n\n', sample_phi);
pred_lr = ([1, new_sample_scaled] * beta);
fprintf('Linear Regression: %.2f kPa\n', pred_lr);
pred_ann_scaled = predict(net, new_sample_scaled);
pred_ann = pred_ann_scaled * y_sigma + y_mu;
fprintf('ANN: %.2f kPa\n', pred_ann);
pred_rf_cell = predict(rf_model, new_sample_scaled);
if iscell(pred_rf_cell)
pred_rf = str2double(pred_rf_cell{1});
else
pred_rf = pred_rf_cell;
end
fprintf('Random Forest: %.2f kPa\n', pred_rf);
if ~isempty(results.GBM.model)
pred_gbm = predict(results.GBM.model, new_sample_scaled);
else
pred_gbm = mean([pred_lr, pred_ann, pred_rf]); % Fallback
end
fprintf('Gradient Boosting: %.2f kPa\n', pred_gbm);
% Meyerhof Equation
pred_meyerhof = meyerhof_calc(sample_B, sample_D, sample_LB, sample_gamma, sample_c, sample_phi);
fprintf('Meyerhof Equation: %.2f kPa\n', pred_meyerhof);
% Calculate average prediction
ml_predictions = [pred_lr, pred_ann, pred_rf, pred_gbm];
valid_ml_preds = ml_predictions(isfinite(ml_predictions));
avg_prediction = mean(valid_ml_preds);
fprintf('\nAverage ML Prediction: %.2f kPa\n', avg_prediction);
fprintf('Range: %.2f to %.2f kPa\n', min(valid_ml_preds), max(valid_ml_preds));
%% =========================================================================
% 8. SAVE RESULTS AND MODELS
% =========================================================================
fprintf('\n=== Saving Results ===\n');
% Save workspace variables
save('bearing_capacity_analysis_results.mat', ...
'results', 'performance_metrics', 'cv_metrics_table', 'feature_names', ...
'X_train', 'X_test', 'y_train', 'y_test', ...
'mu', 'sigma', 'y_mu', 'y_sigma');
% Save performance metrics to CSV
model_names_display = cell(length(all_models), 1);
for i = 1:length(model_fields)
model_names_display{i} = results.(model_fields{i}).model_name;
end
model_names_display{end+1} = 'Meyerhof Equation';
metrics_table = table();
metrics_table.Model = model_names_display;
metrics_table.RMSE = zeros(length(model_names_display), 1);
metrics_table.MAE = zeros(length(model_names_display), 1);
metrics_table.R2 = zeros(length(model_names_display), 1);
metrics_table.MAPE = zeros(length(model_names_display), 1);
% Fill the table (FIXED INDEXING)
for i = 1:length(model_names_display)
if i <= length(model_fields)
model_key = model_fields{i};
else
model_key = 'Meyerhof';
end
metrics_table.RMSE(i) = performance_metrics.(model_key).RMSE;
metrics_table.MAE(i) = performance_metrics.(model_key).MAE;
metrics_table.R2(i) = performance_metrics.(model_key).R2;
metrics_table.MAPE(i) = performance_metrics.(model_key).MAPE;
end
writetable(metrics_table, 'model_performance_metrics.csv');
fprintf('Performance metrics saved to: model_performance_metrics.csv\n');
writetable(cv_metrics_table, 'cv_robustness_metrics.csv');
fprintf('Cross-validation metrics saved to: cv_robustness_metrics.csv\n');
% Save predictions
predictions_table = table();
predictions_table.Actual = y_test;
for i = 1:length(model_fields)
model = model_fields{i};
predictions_table.(results.(model).model_name) = results.(model).y_pred;
end
predictions_table.Meyerhof = y_meyerhof;
writetable(predictions_table, 'model_predictions.csv');
fprintf('Predictions saved to: model_predictions.csv\n');
%% =========================================================================
% 9. SUMMARY AND CONCLUSIONS
% =========================================================================
fprintf('\n=== Summary and Conclusions ===\n');
fprintf('\nProject Title: A MATLAB-Based Machine Learning Approach for Predicting\n');
fprintf(' Ultimate Bearing Capacity of Shallow Foundations\n');
fprintf(' A Comparative Study\n\n');
fprintf('Key Findings:\n');
fprintf('1. Dataset: %d samples with %d features (Source: Literature Review)\n', height(data), width(data)-1);
fprintf('2. Best Performing ML Model (Test Set): %s (R² = %.4f)\n', ...
results.(best_model).model_name, performance_metrics.(best_model).R2);
fprintf('3. Traditional Method (Meyerhof): R² = %.4f\n', performance_metrics.Meyerhof.R2);
if performance_metrics.(best_model).R2 > performance_metrics.Meyerhof.R2
improvement = ((performance_metrics.(best_model).R2 - performance_metrics.Meyerhof.R2) / ...
performance_metrics.Meyerhof.R2) * 100;
fprintf('4. ML Improvement over Meyerhof: +%.1f%% in R²\n', improvement);
end
fprintf('\nRecommendations:\n');
fprintf('1. For high accuracy and complex non-linear mapping: Use %s model\n', results.(best_model).model_name);
fprintf('2. For model interpretability and quick analysis: Use Linear Regression or Random Forest (for feature importance)\n');
fprintf('3. Traditional methods remain valuable for preliminary estimates and sanity checks\n');
fprintf('\n=== Project Completed Successfully ===\n');
fprintf('All results have been saved to files.\n');
fprintf('Visualizations are displayed in figures.\n');
fprintf('\n=== END OF SCRIPT ===\n');