Skip to content

A MATLAB-Based Machine Learning Approach for Predicting Ultimate Bearing % Capacity of Shallow Foundations: A Comparative Study #14

@yilachew2020-beep

Description

@yilachew2020-beep

%% =========================================================================
% A MATLAB-Based Machine Learning Approach for Predicting Ultimate Bearing
% Capacity of Shallow Foundations: A Comparative Study
%
% Author: [Your Name]
% Course: MATLAB Course
% Date: [Current Date]
% Data Source: Compiled from various literature sources on shallow foundation testing.
%
% This script implements multiple machine learning algorithms to predict
% the ultimate bearing capacity (UBC) of shallow foundations, and compares their
% performance against the traditional Meyerhof equation.
% =========================================================================
clear; clc; close all;
% Suppress specific warnings common in machine learning scripts
warning('off', 'MATLAB:table:RowsAddedExistingVars');
warning('off', 'MATLAB:nearlySingularMatrix');
%% =========================================================================
% 1. DATA LOADING AND PREPROCESSING
% =========================================================================
fprintf('=== A MATLAB-Based Machine Learning Approach for Predicting ===\n');
fprintf('=== Ultimate Bearing Capacity of Shallow Foundations ===\n');
fprintf('=== Comparative Study of ML Algorithms ===\n\n');
% Load the dataset
filename = 'soil_dataset.csv';
if ~exist(filename, 'file')
error('Dataset file not found. Please ensure "soil_dataset.csv" is in the current directory.');
end
data = readtable(filename);
fprintf('Dataset loaded successfully.\n');
fprintf('Source: Compiled from various literature sources.\n');
fprintf('Number of samples: %d\n', height(data));
fprintf('Number of features: %d\n\n', width(data)-1);
% Display dataset information
fprintf('Dataset Variables:\n');
disp(data.Properties.VariableNames);
fprintf('\nFirst 5 samples:\n');
disp(head(data, 5));
% Calculate descriptive statistics manually
fprintf('\nDataset Statistics:\n');
numeric_vars = varfun(@isnumeric, data, 'OutputFormat', 'uniform');
numeric_data = data(:, numeric_vars);
stats_table = table();
stats_table.Variable = numeric_data.Properties.VariableNames';
stats_table.Min = min(numeric_data{:,:})';
stats_table.Max = max(numeric_data{:,:})';
stats_table.Mean = mean(numeric_data{:,:})';
stats_table.Median = median(numeric_data{:,:})';
stats_table.Std = std(numeric_data{:,:})';
disp(stats_table);

%% 1.1 Preprocessing - Convert categorical to numerical
% Convert soil_type to categorical and then to dummy variables
soil_types = unique(data.soil_type);
fprintf('\nSoil Types in dataset: ');
fprintf('%s ', soil_types{:});
fprintf('\n');

% Create dummy variables for soil_type
data.soil_type = categorical(data.soil_type);
dummy_soil = dummyvar(data.soil_type);
% Keep one less column to avoid dummy variable trap (reference category)
dummy_soil = dummy_soil(:, 1:end-1);

% Combine all features
numerical_features = {'foundation_width_B', 'foundation_depth_D', ...
'aspect_ratio_LB', 'unit_weight_gamma', ...
'cohesion_c', 'friction_angle_phi', ...
'porosity_n', 'moisture_content_w'};
X_numerical = table2array(data(:, numerical_features));
X = [X_numerical, dummy_soil];
% Target variable
y = data.ultimate_bearing_capacity;
% Feature names including soil types (for interpretability)
feature_names = [numerical_features, string(soil_types(1:end-1))'];
fprintf('\n=== Feature Engineering ===\n');
fprintf('Total features after encoding: %d\n', size(X, 2));

%% 1.2 Feature Correlation Analysis
figure('Position', [100, 100, 1200, 500]);
% Correlation matrix including target
all_data = [X, y];
all_names = [feature_names, "ultimate_bearing_capacity"];
subplot(1,2,1);
R = corrcoef(all_data);
imagesc(R);
colorbar;
caxis([-1 1]);
title('Correlation Matrix', 'FontSize', 12, 'FontWeight', 'bold');
set(gca, 'XTick', 1:length(all_names), 'XTickLabel', all_names, ...
'XTickLabelRotation', 45, 'YTick', 1:length(all_names), ...
'YTickLabel', all_names);
% Correlation with target
subplot(1,2,2);
corr_with_target = R(1:end -1, end);
barh(corr_with_target);
xlabel('Correlation Coefficient');
title('Correlation with Ultimate Bearing Capacity', 'FontSize', 12, 'FontWeight', 'bold');
set(gca, 'YTick', 1:length(feature_names), 'YTickLabel', feature_names);
grid on;
xlim([-1 1]);
%% 1.3 Data Splitting (80% train, 20% test)
rng(42); % For reproducibility
cv = cvpartition(size(X,1), 'HoldOut', 0.2);
idxTrain = training(cv);
idxTest = test(cv);
X_train = X(idxTrain, :);
y_train = y(idxTrain);
X_test = X(idxTest, :);
y_test = y(idxTest);
fprintf('\n=== Data Splitting ===\n');
fprintf('Training samples: %d (%.1f%%)\n', sum(idxTrain), 100sum(idxTrain)/length(idxTrain));
fprintf('Testing samples: %d (%.1f%%)\n\n', sum(idxTest), 100
sum(idxTest)/length(idxTest));

%% 1.4 Feature Scaling (Standardization)
% Scale features X
mu = mean(X_train);
sigma = std(X_train);
X_train_scaled = (X_train - mu) ./ sigma;
X_test_scaled = (X_test - mu) ./ sigma;

% Scale target y only for ANN (to improve training convergence)
y_mu = mean(y_train);
y_sigma = std(y_train);
y_train_scaled = (y_train - y_mu) ./ y_sigma;
% Note: Linear Regression, Random Forest, and Gradient Boosting are trained
% on the original (unscaled) target y_train for better interpretability of output.

%% =========================================================================
% 2. TRADITIONAL METHODS FOR COMPARISON
% =========================================================================
fprintf('=== Traditional Bearing Capacity Methods ===\n');
% Requires the separate function 'meyerhof_calc.m' to be in the MATLAB path.
fprintf('\nCalculating bearing capacity using Meyerhof''s equation...\n');

% Preallocate predictions
y_meyerhof = zeros(size(y_test));
for i = 1:size(X_test, 1)
% Extract parameters from test set (unscaled)
B = X_test(i, 1);
D = X_test(i, 2);
L_over_B = X_test(i, 3);
gamma = X_test(i, 4);
c = X_test(i, 5);
phi_deg = X_test(i, 6);

% Calculate bearing capacity using the dedicated function
y_meyerhof(i) = meyerhof_calc(B, D, L_over_B, gamma, c, phi_deg);

end

% Calculate performance metrics for Meyerhof's method
mse_meyerhof = mean((y_meyerhof - y_test).^2);
rmse_meyerhof = sqrt(mse_meyerhof);
mae_meyerhof = mean(abs(y_meyerhof - y_test));
r2_meyerhof = 1 - sum((y_test - y_meyerhof).^2) / sum((y_test - mean(y_test)).^2);
% Calculate MAPE for Meyerhof
mape_meyerhof = mean(abs((y_test - y_meyerhof) ./ y_test)) * 100;

fprintf('Meyerhof''s Equation Performance:\n');
fprintf(' RMSE: %.2f kPa\n', rmse_meyerhof);
fprintf(' MAE: %.2f kPa\n', mae_meyerhof);
fprintf(' R²: %.4f\n\n', r2_meyerhof);

%% =========================================================================
% 3. MACHINE LEARNING MODELS IMPLEMENTATION
% =========================================================================
fprintf('=== Machine Learning Models Implementation ===\n');
% Initialize results structure
results = struct();
% Define Hyperparameters (to be justified in manuscript via tuning)
hiddenLayerSize = [10, 10];
rf_num_trees = 100;
gbm_cycles = 150;

%% 3.1 Multiple Linear Regression
fprintf('\n1. Training Multiple Linear Regression...\n');
% Add column of ones for intercept
X_train_lr = [ones(size(X_train_scaled, 1), 1), X_train_scaled];
X_test_lr = [ones(size(X_test_scaled, 1), 1), X_test_scaled];
% Solve using normal equation
beta = (X_train_lr' * X_train_lr) \ (X_train_lr' * y_train);
y_pred_lr = X_test_lr * beta;
% Store results
results.LinearRegression.y_pred = y_pred_lr;
results.LinearRegression.model = beta;
results.LinearRegression.model_name = 'Multiple Linear Regression';

%% 3.2 Artificial Neural Network (ANN)
fprintf('\n2. Training Artificial Neural Network (ANN)...\n');
% Create and train neural network
net = fitrnet(X_train_scaled, y_train_scaled, ...
'LayerSizes', hiddenLayerSize, ...
'Activations', 'relu', ...
'Standardize', false, ...
'Lambda', 0.01, ...
'IterationLimit', 1000);
% Predict (scale back)
y_pred_ann_scaled = predict(net, X_test_scaled);
y_pred_ann = y_pred_ann_scaled * y_sigma + y_mu;
% Store results
results.ANN.y_pred = y_pred_ann;
results.ANN.model = net;
results.ANN.model_name = 'Artificial Neural Network';

%% 3.3 Random Forest (Ensemble of Decision Trees)
fprintf('\n3. Training Random Forest (Ensemble Method)...\n');
% Train Random Forest model (CRITICAL FIX: Added 'OOBPredictorImportance', 'on')
rf_model = TreeBagger(rf_num_trees, X_train_scaled, y_train, ...
'Method', 'regression', ...
'OOBPrediction', 'on', ...
'OOBPredictorImportance', 'on', ... % <--- FIX TO ENABLE FEATURE IMPORTANCE
'MinLeafSize', 5, ...
'NumPredictorsToSample', 'all');
% Predict and convert from cell to array
y_pred_rf_cell = predict(rf_model, X_test_scaled);
% Robustly convert cell array output to double array
if iscell(y_pred_rf_cell)
y_pred_rf = cellfun(@str2double, y_pred_rf_cell);
else
y_pred_rf = y_pred_rf_cell;
end
% Check for NaN values and replace with mean prediction
if any(isnan(y_pred_rf))
fprintf(' Warning: Random Forest produced NaN predictions. Replacing with mean.\n');
nan_indices = isnan(y_pred_rf);
y_pred_rf(nan_indices) = mean(y_pred_rf(~nan_indices));
end
% Store results
results.RandomForest.y_pred = y_pred_rf;
results.RandomForest.model = rf_model;
results.RandomForest.model_name = 'Random Forest';

%% 3.4 Gradient Boosting Machine
fprintf('\n4. Training Gradient Boosting Machine...\n');
% Train Gradient Boosting model
try
gbm_model = fitrensemble(X_train_scaled, y_train, ...
'Method', 'LSBoost', ...
'NumLearningCycles', gbm_cycles, ...
'LearnRate', 0.1, ...
'Learners', templateTree('MaxNumSplits', 20));

% Predict
y_pred_gbm = predict(gbm_model, X_test_scaled);

% Store results
results.GBM.y_pred = y_pred_gbm;
results.GBM.model = gbm_model;
results.GBM.model_name = 'Gradient Boosting';

catch ME
% Fallback only if fitrensemble is not available
fprintf(' Gradient Boosting failed or toolbox not available. Using simple mean fallback.\n');
y_pred_gbm = mean(y_train) * ones(size(y_test));
results.GBM.y_pred = y_pred_gbm;
results.GBM.model = [];
results.GBM.model_name = 'Ensemble Average (Fallback)';
end

%% =========================================================================
% 4. MODEL EVALUATION AND COMPARISON
% =========================================================================
fprintf('\n=== Model Evaluation and Comparison ===\n');
% Calculate performance metrics for each model
model_fields = fieldnames(results);
performance_metrics = struct();
for i = 1:length(model_fields)
model_name = model_fields{i};
y_pred = results.(model_name).y_pred;

% Remove NaN values for metric calculation
valid_indices = ~isnan(y_pred) & ~isnan(y_test);
y_pred_valid = y_pred(valid_indices);
y_test_valid = y_test(valid_indices);

% Calculate metrics
mse = mean((y_pred_valid - y_test_valid).^2);
rmse = sqrt(mse);
mae = mean(abs(y_pred_valid - y_test_valid));
r2 = 1 - sum((y_test_valid - y_pred_valid).^2) / sum((y_test_valid - mean(y_test_valid)).^2);

% Calculate MAPE (Mean Absolute Percentage Error)
mape = mean(abs((y_test_valid - y_pred_valid) ./ y_test_valid)) * 100;

% Store metrics
performance_metrics.(model_name).RMSE = rmse;
performance_metrics.(model_name).MAE = mae;
performance_metrics.(model_name).R2 = r2;
performance_metrics.(model_name).MAPE = mape;
performance_metrics.(model_name).MSE = mse;

fprintf('\n%s:\n', results.(model_name).model_name);
fprintf('  RMSE: %.2f kPa\n', rmse);
fprintf('  MAE:  %.2f kPa\n', mae);
fprintf('  R²:   %.4f\n', r2);
fprintf('  MAPE: %.2f%%\n', mape);

end
% Add Meyerhof to performance metrics
performance_metrics.Meyerhof.RMSE = rmse_meyerhof;
performance_metrics.Meyerhof.MAE = mae_meyerhof;
performance_metrics.Meyerhof.R2 = r2_meyerhof;
performance_metrics.Meyerhof.MAPE = mape_meyerhof;
performance_metrics.Meyerhof.MSE = mse_meyerhof;

%% 4.1 Comparative Visualization
figure('Position', [50, 50, 1400, 800]);
all_models = [model_fields; {'Meyerhof'}];
r2_values = zeros(length(all_models), 1);
for i = 1:length(all_models)
if i <= length(model_fields)
r2_values(i) = performance_metrics.(all_models{i}).R2;
else
r2_values(i) = performance_metrics.Meyerhof.R2;
end
end
% Determine the best performing ML model (excluding Meyerhof)
[~, best_idx_ml] = max(r2_values(1:end-1));
best_model = all_models{best_idx_ml};
y_pred_best = results.(best_model).y_pred;

% Subplot 1: RMSE Comparison
subplot(2,3,1);
rmse_values = [r2_values*0]; % Preallocate with dummy values
for i = 1:length(all_models)
if i <= length(model_fields)
rmse_values(i) = performance_metrics.(all_models{i}).RMSE;
else
rmse_values(i) = performance_metrics.Meyerhof.RMSE;
end
end
bar(rmse_values);
ylabel('RMSE (kPa)');
title('Root Mean Square Error Comparison', 'FontWeight', 'bold');
set(gca, 'XTickLabel', all_models, 'XTickLabelRotation', 45);
grid on;

% Subplot 2: R² Comparison
subplot(2,3,2);
bar(r2_values);
ylabel('R² Score');
title('Coefficient of Determination (R²)', 'FontWeight', 'bold');
set(gca, 'XTickLabel', all_models, 'XTickLabelRotation', 45);
grid on;
ylim([-0.1 1]);

% Subplot 3: MAE Comparison
subplot(2,3,3);
mae_values = [r2_values*0]; % Preallocate with dummy values
for i = 1:length(all_models)
if i <= length(model_fields)
mae_values(i) = performance_metrics.(all_models{i}).MAE;
else
mae_values(i) = performance_metrics.Meyerhof.MAE;
end
end
bar(mae_values);
ylabel('MAE (kPa)');
title('Mean Absolute Error Comparison', 'FontWeight', 'bold');
set(gca, 'XTickLabel', all_models, 'XTickLabelRotation', 45);
grid on;

% Subplot 4: Predicted vs Actual for best model
subplot(2,3,4);
scatter(y_test, y_pred_best, 50, 'filled', 'MarkerFaceAlpha', 0.6);
hold on;
plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--', 'LineWidth', 2);
xlabel('Actual Bearing Capacity (kPa)');
ylabel('Predicted Bearing Capacity (kPa)');
title(sprintf('Predicted vs Actual (%s)', results.(best_model).model_name), 'FontWeight', 'bold');
grid on;
axis equal;
xlim([min(y_test), max(y_test)]);
ylim([min(y_test), max(y_test)]);
% Add R² value to plot
text(0.05, 0.95, sprintf('R² = %.4f', r2_values(best_idx_ml)), ...
'Units', 'normalized', 'FontSize', 11, 'FontWeight', 'bold', ...
'BackgroundColor', 'white', 'EdgeColor', 'black');

% Subplot 5: Residual Analysis for best model
subplot(2,3,5);
residuals = y_test - y_pred_best;
residuals = residuals(isfinite(y_pred_best)); % Only use finite predictions
y_pred_best_valid = y_pred_best(isfinite(y_pred_best));
scatter(y_pred_best_valid, residuals, 50, 'filled', 'MarkerFaceAlpha', 0.6);
hold on;
plot([min(y_pred_best_valid), max(y_pred_best_valid)], [0, 0], 'r--', 'LineWidth', 2);
xlabel('Predicted Bearing Capacity (kPa)');
ylabel('Residuals (Actual - Predicted)');
title(sprintf('Residual Plot (%s)', results.(best_model).model_name), 'FontWeight', 'bold');
grid on;

% Subplot 6: Error Distribution Comparison (KSDENSITY for errors)
subplot(2,3,6);
hold on;
colors = lines(length(model_fields));
legend_entries = cell(length(all_models), 1);
% Plot error distributions for ML models
for i = 1:length(model_fields)
errors = y_test - results.(model_fields{i}).y_pred;
valid_errors = errors(isfinite(errors));
legend_entries{i} = results.(model_fields{i}).model_name;
if ~isempty(valid_errors) && length(unique(valid_errors)) > 1
[f, xi] = ksdensity(valid_errors);
plot(xi, f, 'LineWidth', 2, 'Color', colors(i,:));
end
end
% Add Meyerhof errors
errors_meyerhof = y_test - y_meyerhof;
valid_errors_meyerhof = errors_meyerhof(isfinite(errors_meyerhof));
legend_entries{end} = 'Meyerhof Equation';
if ~isempty(valid_errors_meyerhof) && length(unique(valid_errors_meyerhof)) > 1
[f, xi] = ksdensity(valid_errors_meyerhof);
plot(xi, f, 'LineWidth', 2, 'Color', 'k', 'LineStyle', '--');
end

xlabel('Prediction Error (kPa)');
ylabel('Density');
title('Error Distribution Comparison', 'FontWeight', 'bold');
grid on;
legend(legend_entries, 'Location', 'best');
sgtitle('Comparative Analysis of ML Models for Bearing Capacity Prediction', ...
'FontSize', 14, 'FontWeight', 'bold');

%% 4.2 Performance Metrics Table
fprintf('\n=== Performance Metrics Summary ===\n');
fprintf('%-30s %-10s %-10s %-10s %-10s\n', ...
'Model', 'RMSE', 'MAE', 'R²', 'MAPE(%)');
fprintf('%s\n', repmat('-', 1, 70));
% Display metrics for each model
for i = 1:length(all_models)
model_key = all_models{i};
metrics_struct = performance_metrics.(model_key);

if i <= length(model_fields)
    model_display = results.(model_key).model_name;
else
    model_display = 'Meyerhof Equation';
end

fprintf('%-30s %-10.2f %-10.2f %-10.4f %-10.2f\n', ...
    model_display, metrics_struct.RMSE, metrics_struct.MAE, metrics_struct.R2, metrics_struct.MAPE);

end

%% =========================================================================
% 5. FEATURE IMPORTANCE ANALYSIS
% =========================================================================
fprintf('\n=== Feature Importance Analysis ===\n');
% Calculate feature importance using Random Forest (interpretable ensemble method)
if isfield(results, 'RandomForest')
fprintf('\nFeature Importance (Random Forest):\n');

try
    % FIX: Access the feature importance property directly from the TreeBagger model
    % This property is now guaranteed to exist due to the 'OOBPredictorImportance', 'on' flag
    imp = results.RandomForest.model.OOBPermutedPredictorDeltaError;

    if isempty(imp)
         fprintf('  Warning: OOBPermutedPredictorDeltaError property is empty. Feature importance requires sufficient data variance.\n');
    else
        % Sort importance
        [imp_sorted, idx] = sort(imp, 'descend');

        % Plot feature importance
        figure('Position', [100, 100, 800, 500]);
        barh(imp_sorted);
        set(gca, 'YTick', 1:length(feature_names), 'YTickLabel', feature_names(idx));
        xlabel('Importance (Increase in OOB MSE)');
        title('Feature Importance - Random Forest', 'FontSize', 12, 'FontWeight', 'bold');
        grid on;

        % Display top features
        fprintf('\nTop 5 Most Important Features:\n');
        for i = 1:min(5, length(feature_names))
            fprintf('%d. %s (Importance: %.4f)\n', i, feature_names{idx(i)}, imp_sorted(i));
        end
    end
catch ME
    % Catch block handles unexpected errors during property access
    fprintf('  Error accessing feature importance: %s\n', ME.message);
    fprintf('  Ensure the Statistics and Machine Learning Toolbox is properly installed.\n');
end

end

%% =========================================================================
% 6. CROSS-VALIDATION FOR ROBUSTNESS CHECK
% =========================================================================
fprintf('\n=== Cross-Validation Analysis ===\n');
% Perform 5-fold cross-validation on ALL ML models for stability check.
fprintf('\nPerforming 5-fold cross-validation on all ML models...\n');

% Prepare data
X_full_scaled = (X - mu) ./ sigma;
% Create cross-validation partition
cv_partition = cvpartition(length(y), 'KFold', 5);
% Initialize arrays for CV results
cv_results = struct();
cv_r2 = zeros(5, length(model_fields));
cv_rmse = zeros(5, length(model_fields));

fprintf('Fold\t\t');
for i = 1:length(model_fields)
fprintf('%s R²\t\t', model_fields{i});
end
fprintf('\n%s\n', repmat('-', 1, 10 + 10 * length(model_fields)));

for fold = 1:5
fprintf('%d\t\t', fold);
% Get training and validation indices
trainIdx = training(cv_partition, fold);
valIdx = test(cv_partition, fold);

% Split data
X_train_cv = X_full_scaled(trainIdx, :);
y_train_cv = y(trainIdx);
X_val_cv = X_full_scaled(valIdx, :);
y_val_cv = y(valIdx);

for i = 1:length(model_fields)
    model_key = model_fields{i};
    y_pred_cv = []; % Initialize prediction

    switch model_key
        case 'LinearRegression'
            X_train_lr_cv = [ones(size(X_train_cv, 1), 1), X_train_cv];
            X_val_lr_cv = [ones(size(X_val_cv, 1), 1), X_val_cv];
            beta_cv = (X_train_lr_cv' * X_train_lr_cv) \ (X_train_lr_cv' * y_train_cv);
            y_pred_cv = X_val_lr_cv * beta_cv;

        case 'ANN'
            y_mu_cv = mean(y_train_cv);
            y_sigma_cv = std(y_train_cv);
            y_train_scaled_cv = (y_train_cv - y_mu_cv) ./ y_sigma_cv;
            net_cv = fitrnet(X_train_cv, y_train_scaled_cv, ...
                'LayerSizes', hiddenLayerSize, 'Standardize', false);
            y_pred_scaled_cv = predict(net_cv, X_val_cv);
            y_pred_cv = y_pred_scaled_cv * y_sigma_cv + y_mu_cv;

        case 'RandomForest'
            % Note: OOBPredictorImportance is NOT needed here as we only need predictions
            rf_model_cv = TreeBagger(50, X_train_cv, y_train_cv, 'Method', 'regression', 'MinLeafSize', 5);
            y_pred_cv_cell = predict(rf_model_cv, X_val_cv);
            if iscell(y_pred_cv_cell)
                y_pred_cv = cellfun(@str2double, y_pred_cv_cell);
            else
                y_pred_cv = y_pred_cv_cell;
            end

        case 'GBM'
            % Ensure a new model is trained for each fold
            gbm_model_cv = fitrensemble(X_train_cv, y_train_cv, ...
                'Method', 'LSBoost', 'NumLearningCycles', 50);
            y_pred_cv = predict(gbm_model_cv, X_val_cv);
    end

    % Calculate metrics
    valid_idx = ~isnan(y_pred_cv) & ~isnan(y_val_cv);
    y_pred_cv_valid = y_pred_cv(valid_idx);
    y_val_cv_valid = y_val_cv(valid_idx);
    if isempty(y_val_cv_valid) || std(y_val_cv_valid) == 0
        cv_r2(fold, i) = NaN;
        cv_rmse(fold, i) = NaN;
    else
        cv_r2(fold, i) = 1 - sum((y_val_cv_valid - y_pred_cv_valid).^2) / sum((y_val_cv_valid - mean(y_val_cv_valid)).^2);
        cv_rmse(fold, i) = sqrt(mean((y_val_cv_valid - y_pred_cv_valid).^2));
    end

    fprintf('%.4f\t', cv_r2(fold, i));
end
fprintf('\n');

end

% Summarize CV Results
fprintf('\nCross-Validation Results Summary:\n');
cv_metrics_table = table();
for i = 1:length(model_fields)
model_name = results.(model_fields{i}).model_name;
R2_mean = nanmean(cv_r2(:, i));
R2_std = nanstd(cv_r2(:, i));
RMSE_mean = nanmean(cv_rmse(:, i));
RMSE_std = nanstd(cv_rmse(:, i));

fprintf('%-30s\tAvg R²: %.4f ± %.4f\tAvg RMSE: %.2f ± %.2f kPa\n', ...
    model_name, R2_mean, R2_std, RMSE_mean, RMSE_std);

% Store in table for easy saving
cv_metrics_table.Model{i} = model_name;
cv_metrics_table.AvgR2(i) = R2_mean;
cv_metrics_table.StdR2(i) = R2_std;
cv_metrics_table.AvgRMSE(i) = RMSE_mean;
cv_metrics_table.StdRMSE(i) = RMSE_std;

end

%% =========================================================================
% 7. PREDICTION ON NEW DATA (DEMONSTRATION)
% =========================================================================
fprintf('\n=== Prediction Demonstration ===\n');
% Determine the correct number of features
num_numerical_features = 8;
num_soil_types = length(soil_types) - 1;
total_features = num_numerical_features + num_soil_types;
fprintf('Number of numerical features: %d\n', num_numerical_features);
fprintf('Number of soil type dummy variables: %d\n', num_soil_types);
fprintf('Total features expected: %d\n', total_features);

% Define sample data parameters
sample_B = 1.50;
sample_D = 1.20;
sample_LB = 1.5;
sample_gamma = 18.5;
sample_c = 75.0;
sample_phi = 30.0;
sample_n = 0.45;
sample_w = 20.0;
sample_soil = 'Alluvial Deposit';

% Find which soil type is the sample soil
alluvial_index = find(strcmp(soil_types, sample_soil));
fprintf('Sample Soil Type: %s (#%d of %d)\n', sample_soil, alluvial_index, length(soil_types));

% Create new sample data
new_sample_numerical = [sample_B, sample_D, sample_LB, sample_gamma, sample_c, sample_phi, sample_n, sample_w];

% Create soil type encoding
soil_encoding = zeros(1, num_soil_types);
if alluvial_index <= num_soil_types
soil_encoding(alluvial_index) = 1;
end % If it's the reference category, all dummies are 0

new_sample = [new_sample_numerical, soil_encoding];

% Verify dimensions
fprintf('\nNew sample dimensions: %d x %d\n', size(new_sample, 1), size(new_sample, 2));
if length(new_sample) ~= total_features
error('New sample has incorrect number of features. Expected %d, got %d.', total_features, length(new_sample));
end

% Scale the new sample
new_sample_scaled = (new_sample - mu) ./ sigma;

% Predict using all models
fprintf('\nPredicting bearing capacity for new sample:\n');
fprintf('Foundation width: %.2f m\n', sample_B);
fprintf('Foundation depth: %.2f m\n', sample_D);
fprintf('Soil type: %s\n', sample_soil);
fprintf('Cohesion: %.1f kPa\n', sample_c);
fprintf('Friction angle: %.1f°\n\n', sample_phi);

pred_lr = ([1, new_sample_scaled] * beta);
fprintf('Linear Regression: %.2f kPa\n', pred_lr);

pred_ann_scaled = predict(net, new_sample_scaled);
pred_ann = pred_ann_scaled * y_sigma + y_mu;
fprintf('ANN: %.2f kPa\n', pred_ann);

pred_rf_cell = predict(rf_model, new_sample_scaled);
if iscell(pred_rf_cell)
pred_rf = str2double(pred_rf_cell{1});
else
pred_rf = pred_rf_cell;
end
fprintf('Random Forest: %.2f kPa\n', pred_rf);

if ~isempty(results.GBM.model)
pred_gbm = predict(results.GBM.model, new_sample_scaled);
else
pred_gbm = mean([pred_lr, pred_ann, pred_rf]); % Fallback
end
fprintf('Gradient Boosting: %.2f kPa\n', pred_gbm);

% Meyerhof Equation
pred_meyerhof = meyerhof_calc(sample_B, sample_D, sample_LB, sample_gamma, sample_c, sample_phi);
fprintf('Meyerhof Equation: %.2f kPa\n', pred_meyerhof);

% Calculate average prediction
ml_predictions = [pred_lr, pred_ann, pred_rf, pred_gbm];
valid_ml_preds = ml_predictions(isfinite(ml_predictions));
avg_prediction = mean(valid_ml_preds);
fprintf('\nAverage ML Prediction: %.2f kPa\n', avg_prediction);
fprintf('Range: %.2f to %.2f kPa\n', min(valid_ml_preds), max(valid_ml_preds));

%% =========================================================================
% 8. SAVE RESULTS AND MODELS
% =========================================================================
fprintf('\n=== Saving Results ===\n');
% Save workspace variables
save('bearing_capacity_analysis_results.mat', ...
'results', 'performance_metrics', 'cv_metrics_table', 'feature_names', ...
'X_train', 'X_test', 'y_train', 'y_test', ...
'mu', 'sigma', 'y_mu', 'y_sigma');

% Save performance metrics to CSV
model_names_display = cell(length(all_models), 1);
for i = 1:length(model_fields)
model_names_display{i} = results.(model_fields{i}).model_name;
end
model_names_display{end+1} = 'Meyerhof Equation';

metrics_table = table();
metrics_table.Model = model_names_display;
metrics_table.RMSE = zeros(length(model_names_display), 1);
metrics_table.MAE = zeros(length(model_names_display), 1);
metrics_table.R2 = zeros(length(model_names_display), 1);
metrics_table.MAPE = zeros(length(model_names_display), 1);

% Fill the table (FIXED INDEXING)
for i = 1:length(model_names_display)
if i <= length(model_fields)
model_key = model_fields{i};
else
model_key = 'Meyerhof';
end

metrics_table.RMSE(i) = performance_metrics.(model_key).RMSE;
metrics_table.MAE(i) = performance_metrics.(model_key).MAE;
metrics_table.R2(i) = performance_metrics.(model_key).R2;
metrics_table.MAPE(i) = performance_metrics.(model_key).MAPE;

end
writetable(metrics_table, 'model_performance_metrics.csv');
fprintf('Performance metrics saved to: model_performance_metrics.csv\n');
writetable(cv_metrics_table, 'cv_robustness_metrics.csv');
fprintf('Cross-validation metrics saved to: cv_robustness_metrics.csv\n');

% Save predictions
predictions_table = table();
predictions_table.Actual = y_test;
for i = 1:length(model_fields)
model = model_fields{i};
predictions_table.(results.(model).model_name) = results.(model).y_pred;
end
predictions_table.Meyerhof = y_meyerhof;
writetable(predictions_table, 'model_predictions.csv');
fprintf('Predictions saved to: model_predictions.csv\n');

%% =========================================================================
% 9. SUMMARY AND CONCLUSIONS
% =========================================================================
fprintf('\n=== Summary and Conclusions ===\n');
fprintf('\nProject Title: A MATLAB-Based Machine Learning Approach for Predicting\n');
fprintf(' Ultimate Bearing Capacity of Shallow Foundations\n');
fprintf(' A Comparative Study\n\n');
fprintf('Key Findings:\n');
fprintf('1. Dataset: %d samples with %d features (Source: Literature Review)\n', height(data), width(data)-1);
fprintf('2. Best Performing ML Model (Test Set): %s (R² = %.4f)\n', ...
results.(best_model).model_name, performance_metrics.(best_model).R2);
fprintf('3. Traditional Method (Meyerhof): R² = %.4f\n', performance_metrics.Meyerhof.R2);
if performance_metrics.(best_model).R2 > performance_metrics.Meyerhof.R2
improvement = ((performance_metrics.(best_model).R2 - performance_metrics.Meyerhof.R2) / ...
performance_metrics.Meyerhof.R2) * 100;
fprintf('4. ML Improvement over Meyerhof: +%.1f%% in R²\n', improvement);
end
fprintf('\nRecommendations:\n');
fprintf('1. For high accuracy and complex non-linear mapping: Use %s model\n', results.(best_model).model_name);
fprintf('2. For model interpretability and quick analysis: Use Linear Regression or Random Forest (for feature importance)\n');
fprintf('3. Traditional methods remain valuable for preliminary estimates and sanity checks\n');
fprintf('\n=== Project Completed Successfully ===\n');
fprintf('All results have been saved to files.\n');
fprintf('Visualizations are displayed in figures.\n');
fprintf('\n=== END OF SCRIPT ===\n');

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions