# Import Libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import precision_recall_curve, auc, average_precision_score
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import cross_val_score
import seaborn as sns
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.metrics import make_scorer, f1_score, recall_score

%%capture
# Install ucimlrepo Package
! pip install ucimlrepo;

# Import Database
from ucimlrepo import fetch_ucirepo;

# fetch dataset
ai4i_2020_predictive_maintenance_dataset = fetch_ucirepo(id=601);

# data (as pandas dataframes)
X = ai4i_2020_predictive_maintenance_dataset.data.features
y = ai4i_2020_predictive_maintenance_dataset.data.targets

# metadata
# print(ai4i_2020_predictive_maintenance_dataset.metadata)

# print variable information
ai4i_2020_predictive_maintenance_dataset.variables

# Add RNF Failures to Machine Failures
# 0.1% of rows are marked RNF Failures by randomness
# Adding this random noise reduces an overfitting problem, and makes the data more realistic
%%capture
y['Machine failure'] = y['Machine failure'] | y['RNF'];

# Display features, targets
X

y

# Display basic information
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")
failure_count = y['Machine failure'].value_counts()[1]

print(f"Machine Failure Count: {failure_count}")
print(f"Machine Failure Percentage: {failure_count * 100 / y.shape[0]}%")

Shape of X: (10000, 6)
Shape of y: (10000, 6)
Machine Failure Count: 357
Machine Failure Percentage: 3.57%

# Display histograms for the distribution of the features
# See general distribution for each
fig, axs = plt.subplots(2, 3, figsize=(10, 5))
axs = axs.flatten()

# Type
sns.histplot(X['Type'], bins=3, ax=axs[0])
axs[0].set_title('Type')

# Air temperature
X['Air temperature'].hist(ax=axs[1], bins=10)
axs[1].set_title('Air temperature')

# Process temperature
X['Process temperature'].hist(ax=axs[2], bins=10)
axs[2].set_title('Process temperature')

# Rotational speed
X['Rotational speed'].hist(ax=axs[3], bins=10)
axs[3].set_title('Rotational speed')

# Torque
X['Torque'].hist(ax=axs[4], bins=10)
axs[4].set_title('Torque')

# Tool wear
X['Tool wear'].hist(ax=axs[5], bins=10)
axs[5].set_title('Tool wear')


plt.tight_layout()
plt.show()

# Display Box and Whisker plots for Distribution in Failure and Normal Operation
# See distribution in Failure vs No Failure
fig, ax = plt.subplots(2, 3, figsize=(14, 8))
for i, col in enumerate(X.columns):
    sns.boxplot(x=y['Machine failure'], y=col, data=X, ax=ax[i//3][i%3])

# Convert "Type" Feature to Numeric with one hot encoding
# This eliminates categorical features, and adding features is not detrimental
# as there are a small number of features currently
X_numeric = pd.get_dummies(X, columns=['Type'])
X_numeric.head()

# Parition into 70% Training, 30% Testing due to limited size of failure data points
X_train, X_test, y_train, y_test = train_test_split(X_numeric, y, test_size=0.3, random_state=42)
X_train.head()

# Temporarily remove dummy variables
train_dummy = X_train[['Type_H', 'Type_L', 'Type_M']]
test_dummy = X_test[['Type_H', 'Type_L', 'Type_M']]
train_numeric = X_train.drop(['Type_H', 'Type_L', 'Type_M'], axis=1)
test_numeric = X_test.drop(['Type_H', 'Type_L', 'Type_M'], axis=1)
train_numeric.head()

# Rescale the numeric data on the training data
# MinMaxScaler used as the data is not known to be normally distributed
scaler = MinMaxScaler()
scaler.fit(train_numeric)
train_scaled = scaler.transform(train_numeric)
test_scaled = scaler.transform(test_numeric)
train_scaled[0]

array([0.20652174, 0.30864198, 0.2914956 , 0.3437058 , 0.5256917 ])

# Put back into dataframe
train_scaled = pd.DataFrame(train_scaled, columns=train_numeric.columns)
test_scaled = pd.DataFrame(test_scaled, columns=test_numeric.columns)
train_scaled.head()

# Reset indicies of both dataframes
train_dummy = train_dummy.reset_index(drop=True)
test_dummy = test_dummy.reset_index(drop=True)
train_scaled = train_scaled.reset_index(drop=True)
test_scaled = test_scaled.reset_index(drop=True)

# Add dummy variable columns
train_scaled = pd.concat([train_scaled, train_dummy], axis=1)
test_scaled = pd.concat([test_scaled, test_dummy], axis=1)
train_scaled.head()

# Apply SMOTE Oversampling to the training set
# Gives the data a more balanced set for training
smote = SMOTE(sampling_strategy={1:2000}, random_state=42)
X_train_resampled_mf, y_train_resampled_mf = smote.fit_resample(train_scaled, y_train['Machine failure'])

# Display oversampled data
failure_count = y_train_resampled_mf.value_counts()[1]
print(f"Machine Failure Count: {failure_count}")
print(f"Machine Failure Percentage: {failure_count * 100 / len(y_train_resampled_mf)}%")

Machine Failure Count: 2000
Machine Failure Percentage: 22.875443211712227%

# Pairplot of features to visualize relationships
sns.pairplot(pd.concat([X, y['Machine failure']], axis=1))
plt.show()

# Correlation matrix to check for multicollinearity
# Done on all targets here as space allows in this instance
corr_matrix = pd.concat([X_numeric, y], axis=1).corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title("Correlation Matrix")
plt.show()

# There is visible multicollinearity,
# though due to the limited number of features available,
# no features are removed

# Running 4 Different Models on Overall Failure with and without oversampling using SMOTE
# This enables comparitive analysis and gives potentially useful insight

# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train_resampled_mf, y_train_resampled_mf)

DecisionTreeClassifier(random_state=42)

DecisionTreeClassifier(random_state=42)

# Plot Decision Tree
plt.figure(figsize=(100, 40))
plot_tree(dt, filled=True, feature_names=train_scaled.columns, class_names=['No Failure', 'Failure'], fontsize=12)
plt.show()

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_resampled_mf, y_train_resampled_mf)

RandomForestClassifier(random_state=42)

RandomForestClassifier(random_state=42)

# Hyperparameter Tuning for AdaBoost Classifier
# Tuning for f1 score, as it is more relevant
param_grid = {
 'n_estimators': [50, 100, 150],
 'learning_rate': [0.01, 0.1, 1.0]
}
grid_search = GridSearchCV(AdaBoostClassifier(), param_grid, cv=5, scoring='f1')
grid_search.fit(X_train_resampled_mf, y_train_resampled_mf)
print("Best AdaBoost Parameters after Hyperparameter Tuning:", grid_search.best_params_)
print("Best Cross-Validation Score after Hyperparameter Tuning:", grid_search.best_score_)
ab = grid_search.best_estimator_

Best AdaBoost Parameters after Hyperparameter Tuning: {'learning_rate': 1.0, 'n_estimators': 150}
Best Cross-Validation Score after Hyperparameter Tuning: 0.8157951741030682

# Determine the effective alphas and the corresponding total leaf impurities
pruning_path = dt.cost_complexity_pruning_path(X_train_resampled_mf, y_train_resampled_mf)
alphas, impurities = pruning_path.ccp_alphas, pruning_path.impurities
# Train a decision tree for each alpha value
trees = [DecisionTreeClassifier(ccp_alpha=alpha).fit(X_train_resampled_mf, y_train_resampled_mf) for alpha in alphas]
# Calculate the training and validation scores for each alpha
train_accuracies = [f1_score(y_train_resampled_mf, tree.predict(X_train_resampled_mf), pos_label=1) for tree in trees]
test_accuracies = [f1_score(y_test['Machine failure'], tree.predict(test_scaled), pos_label=1) for tree in trees]
# Plot the training and validation scores
plt.figure(figsize=(10, 6))
plt.plot(alphas, train_accuracies, marker='o', label='Training F1 Score')
plt.plot(alphas, test_accuracies, marker='o', label='Testing F1 Score')
plt.xlabel('Alpha')
plt.ylabel('F1 Score (Class 1)')
plt.title('F1 Score (Class 1) vs Alpha for Training and Testing Sets')
plt.legend()
plt.grid(True)
plt.show()
# Find the best alpha value
best_alpha_index = test_accuracies.index(max(test_accuracies))
best_alpha = alphas[best_alpha_index]
print(f"The best ccp_alpha value is: {best_alpha}")

The best ccp_alpha value is: 0.0011784451428729706

#Train the Pruned Decision Tree Using the Best Alpha Value
dt_pruned = DecisionTreeClassifier(random_state=42, ccp_alpha=best_alpha)
dt_pruned.fit(X_train_resampled_mf, y_train_resampled_mf)

DecisionTreeClassifier(ccp_alpha=0.0011784451428729706, random_state=42)

DecisionTreeClassifier(ccp_alpha=0.0011784451428729706, random_state=42)

#plot the pruned tree
plt.figure(figsize=(100, 40))
plot_tree(dt_pruned, filled=True, feature_names=train_scaled.columns, class_names=['No Failure', 'Failure'], fontsize=12)
plt.show()

# Decision Tree
dt_b = DecisionTreeClassifier(random_state=42)
dt_b.fit(train_scaled, y_train['Machine failure'])

DecisionTreeClassifier(random_state=42)

DecisionTreeClassifier(random_state=42)

# Plot Decision Tree
plt.figure(figsize=(100, 40))
plot_tree(dt_b, filled=True, feature_names=train_scaled.columns, class_names=['No Failure', 'Failure'], fontsize=12)
plt.show()

# Random Forest
rf_b = RandomForestClassifier(random_state=42)
rf_b.fit(train_scaled, y_train['Machine failure'])

RandomForestClassifier(random_state=42)

RandomForestClassifier(random_state=42)

# Hyperparameter Tuning for AdaBoost
# Tuning for f1 score, as it is more relevant
param_grid = {
 'n_estimators': [50, 100, 150],
 'learning_rate': [0.01, 0.1, 1.0]
}
grid_search = GridSearchCV(AdaBoostClassifier(), param_grid, cv=5, scoring='f1')
grid_search.fit(train_scaled, y_train['Machine failure'])
print("Best AdaBoost Parameters after Hyperparameter Tuning:", grid_search.best_params_)
print("Best Cross-Validation Score after Hyperparameter Tuning:", grid_search.best_score_)
ab_b = grid_search.best_estimator_

Best AdaBoost Parameters after Hyperparameter Tuning: {'learning_rate': 1.0, 'n_estimators': 100}
Best Cross-Validation Score after Hyperparameter Tuning: 0.5087057510438777

# Determine the effective alphas and the corresponding total leaf impurities
pruning_path = dt.cost_complexity_pruning_path(train_scaled, y_train['Machine failure'])
alphas, impurities = pruning_path.ccp_alphas, pruning_path.impurities
# Train a decision tree for each alpha value
trees = [DecisionTreeClassifier(ccp_alpha=alpha).fit(train_scaled, y_train['Machine failure']) for alpha in alphas]
# Calculate the training and validation scores for each alpha
train_accuracies = [f1_score(y_train['Machine failure'], tree.predict(train_scaled), pos_label=1) for tree in trees]
test_accuracies = [f1_score(y_test['Machine failure'], tree.predict(test_scaled), pos_label=1) for tree in trees]
# Plot the training and validation scores
plt.figure(figsize=(10, 6))
plt.plot(alphas, train_accuracies, marker='o', label='Training F1 Score')
plt.plot(alphas, test_accuracies, marker='o', label='Testing F1 Score')
plt.xlabel('Alpha')
plt.ylabel('F1 Score (Class 1)')
plt.title('F1 Score (Class 1) vs Alpha for Training and Testing Sets')
plt.legend()
plt.grid(True)
plt.show()
# Find the best alpha value
best_alpha_index = test_accuracies.index(max(test_accuracies))
best_alpha_b = alphas[best_alpha_index]
print(f"The best ccp_alpha value is: {best_alpha_b}")

The best ccp_alpha value is: 0.00017142857142857143

#Train the Pruned Decision Tree Using the Best Alpha Value
dt_pruned_b = DecisionTreeClassifier(random_state=42, ccp_alpha=best_alpha_b)
dt_pruned_b.fit(train_scaled, y_train['Machine failure'])

DecisionTreeClassifier(ccp_alpha=0.00017142857142857143, random_state=42)

DecisionTreeClassifier(ccp_alpha=0.00017142857142857143, random_state=42)

#plot the pruned tree
plt.figure(figsize=(100, 40))
plot_tree(dt_pruned_b, filled=True, feature_names=train_scaled.columns, class_names=['No Failure', 'Failure'], fontsize=12)
plt.show()

# Decision Tree (No Pruning)
print("Decision Tree Classifier - Training Accuracy:", accuracy_score(y_train_resampled_mf, dt.predict(X_train_resampled_mf)))
print("Decision Tree Classifier - Testing Accuracy:", accuracy_score(y_test['Machine failure'], dt.predict(test_scaled)))
print("Decision Tree Classifier - Classification Report (Test):\n", classification_report(y_test['Machine failure'], dt.predict(test_scaled)))

Decision Tree Classifier - Training Accuracy: 1.0
Decision Tree Classifier - Testing Accuracy: 0.957
Decision Tree Classifier - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      0.97      0.98      2900
           1       0.41      0.64      0.50       100

    accuracy                           0.96      3000
   macro avg       0.70      0.80      0.74      3000
weighted avg       0.97      0.96      0.96      3000

# Pruned Decision Tree
print("Pruned Decision Tree Classifier - Training Accuracy:", accuracy_score(y_train_resampled_mf, dt_pruned.predict(X_train_resampled_mf)))
print("Pruned Decision Tree Classifier - Testing Accuracy:", accuracy_score(y_test['Machine failure'], dt_pruned.predict(test_scaled)))
print("Pruned Decision Tree Classifier - Classification Report (Test):\n", classification_report(y_test['Machine failure'], dt_pruned.predict(test_scaled)))

Pruned Decision Tree Classifier - Training Accuracy: 0.9384650577604942
Pruned Decision Tree Classifier - Testing Accuracy: 0.975
Pruned Decision Tree Classifier - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      2900
           1       0.61      0.68      0.64       100

    accuracy                           0.97      3000
   macro avg       0.80      0.83      0.82      3000
weighted avg       0.98      0.97      0.98      3000

# Random Forest
print("Random Forest - Training Accuracy:", accuracy_score(y_train_resampled_mf, rf.predict(X_train_resampled_mf)))
print("Random Forest - Testing Accuracy:", accuracy_score(y_test['Machine failure'], rf.predict(test_scaled)))
print("Random Forest - Classification Report (Test):\n", classification_report(y_test['Machine failure'], rf.predict(test_scaled)))

Random Forest - Training Accuracy: 1.0
Random Forest - Testing Accuracy: 0.9743333333333334
Random Forest - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      2900
           1       0.61      0.63      0.62       100

    accuracy                           0.97      3000
   macro avg       0.80      0.81      0.80      3000
weighted avg       0.97      0.97      0.97      3000

# Adaboost Classifier
print("Adaboost Classifier - Training Accuracy:", accuracy_score(y_train_resampled_mf, ab.predict(X_train_resampled_mf)))
print("Adaboost Classifier - Testing Accuracy:", accuracy_score(y_test['Machine failure'], ab.predict(test_scaled)))
print("Adaboost Classifier - Classification Report (Test):\n", classification_report(y_test['Machine failure'], ab.predict(test_scaled)))

Adaboost Classifier - Training Accuracy: 0.9320599336612146
Adaboost Classifier - Testing Accuracy: 0.9493333333333334
Adaboost Classifier - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      0.96      0.97      2900
           1       0.35      0.62      0.45       100

    accuracy                           0.95      3000
   macro avg       0.67      0.79      0.71      3000
weighted avg       0.97      0.95      0.96      3000

# Decision Tree (No Pruning)
print("Decision Tree Classifier - Training Accuracy:", accuracy_score(y_train['Machine failure'], dt_b.predict(train_scaled)))
print("Decision Tree Classifier - Testing Accuracy:", accuracy_score(y_test['Machine failure'], dt_b.predict(test_scaled)))
print("Decision Tree Classifier - Classification Report (Test):\n", classification_report(y_test['Machine failure'], dt_b.predict(test_scaled)))

Decision Tree Classifier - Training Accuracy: 1.0
Decision Tree Classifier - Testing Accuracy: 0.9756666666666667
Decision Tree Classifier - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      2900
           1       0.64      0.62      0.63       100

    accuracy                           0.98      3000
   macro avg       0.81      0.80      0.81      3000
weighted avg       0.98      0.98      0.98      3000

# Pruned Decision Tree
print("Pruned Decision Tree Classifier - Training Accuracy:", accuracy_score(y_train['Machine failure'], dt_pruned_b.predict(train_scaled)))
print("Pruned Decision Tree Classifier - Testing Accuracy:", accuracy_score(y_test['Machine failure'], dt_pruned_b.predict(test_scaled)))
print("Pruned Decision Tree Classifier - Classification Report (Test):\n", classification_report(y_test['Machine failure'], dt_pruned_b.predict(test_scaled)))

Pruned Decision Tree Classifier - Training Accuracy: 0.9935714285714285
Pruned Decision Tree Classifier - Testing Accuracy: 0.9816666666666667
Pruned Decision Tree Classifier - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      2900
           1       0.78      0.62      0.69       100

    accuracy                           0.98      3000
   macro avg       0.89      0.81      0.84      3000
weighted avg       0.98      0.98      0.98      3000

# Random Forest
print("Random Forest - Training Accuracy:", accuracy_score(y_train['Machine failure'], rf_b.predict(train_scaled)))
print("Random Forest - Testing Accuracy:", accuracy_score(y_test['Machine failure'], rf_b.predict(test_scaled)))
print("Random Forest - Classification Report (Test):\n", classification_report(y_test['Machine failure'], rf_b.predict(test_scaled)))

Random Forest - Training Accuracy: 1.0
Random Forest - Testing Accuracy: 0.982
Random Forest - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      2900
           1       0.90      0.52      0.66       100

    accuracy                           0.98      3000
   macro avg       0.94      0.76      0.82      3000
weighted avg       0.98      0.98      0.98      3000

# Adaboost Classifier
print("Adaboost Classifier - Training Accuracy:", accuracy_score(y_train['Machine failure'], ab_b.predict(train_scaled)))
print("Adaboost Classifier - Testing Accuracy:", accuracy_score(y_test['Machine failure'], ab_b.predict(test_scaled)))
print("Adaboost Classifier - Classification Report (Test):\n", classification_report(y_test['Machine failure'], ab_b.predict(test_scaled)))

Adaboost Classifier - Training Accuracy: 0.9738571428571429
Adaboost Classifier - Testing Accuracy: 0.9723333333333334
Adaboost Classifier - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.98      0.99      0.99      2900
           1       0.64      0.39      0.48       100

    accuracy                           0.97      3000
   macro avg       0.81      0.69      0.74      3000
weighted avg       0.97      0.97      0.97      3000

# Make Histograms to compare accuracy, F1 Scores
# This lets us compare models on general accuracy an predicting machine failure directly
# Accuracy gives a general view at model performance
# F1 Score (for Machine Failure) gives a view of how the model predicts Machine Failure, which is more relevant

# Get accuracy, f1 scores
# A
accuracy_dt = accuracy_score(y_test['Machine failure'], dt.predict(test_scaled))
f1_dt = f1_score(y_test['Machine failure'], dt.predict(test_scaled), pos_label=1)

accuracy_dt_pruned = accuracy_score(y_test['Machine failure'], dt_pruned.predict(test_scaled))
f1_dt_pruned = f1_score(y_test['Machine failure'], dt_pruned.predict(test_scaled), pos_label=1)

accuracy_rf = accuracy_score(y_test['Machine failure'], rf.predict(test_scaled))
f1_rf = f1_score(y_test['Machine failure'], rf.predict(test_scaled), pos_label=1)

accuracy_ab = accuracy_score(y_test['Machine failure'], ab.predict(test_scaled))
f1_ab = f1_score(y_test['Machine failure'], ab.predict(test_scaled), pos_label=1)

#B
accuracy_dt_b = accuracy_score(y_test['Machine failure'], dt_b.predict(test_scaled))
f1_dt_b = f1_score(y_test['Machine failure'], dt_b.predict(test_scaled), pos_label=1)

accuracy_dt_pruned_b = accuracy_score(y_test['Machine failure'], dt_pruned_b.predict(test_scaled))
f1_dt_pruned_b = f1_score(y_test['Machine failure'], dt_pruned_b.predict(test_scaled), pos_label=1)

accuracy_rf_b = accuracy_score(y_test['Machine failure'], rf_b.predict(test_scaled))
f1_rf_b = f1_score(y_test['Machine failure'], rf_b.predict(test_scaled), pos_label=1)

accuracy_ab_b = accuracy_score(y_test['Machine failure'], ab_b.predict(test_scaled))
f1_ab_b = f1_score(y_test['Machine failure'], ab_b.predict(test_scaled), pos_label=1)

# Model names
models = ['Decision Tree', 'Pruned DT', 'Random Forest', 'Adaboost']

# Accuracy scores
accuracy_scores_a = [accuracy_dt, accuracy_dt_pruned, accuracy_rf, accuracy_ab]
accuracy_scores_b = [accuracy_dt_b, accuracy_dt_pruned_b, accuracy_rf_b, accuracy_ab_b]

# F1 scores
f1_scores_a = [f1_dt, f1_dt_pruned, f1_rf, f1_ab]
f1_scores_b = [f1_dt_b, f1_dt_pruned_b, f1_rf_b, f1_ab_b]

# Make F1 Score Histogram
n_models = len(models)
x = np.arange(n_models)
width = 0.35
fig, ax = plt.subplots()

bars_a = ax.bar(x - width/2, f1_scores_a, width, label='Oversampled')
bars_b = ax.bar(x + width/2, f1_scores_b, width, label='Without')

# Add some text for labels, title, and custom x-axis tick labels, etc.
ax.set_ylabel('F1 Score')
ax.set_title('F1 Score (Machine Failure) for each Model')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
plt.show()

# Make Accuracy Score Histogram
n_models = len(models)
x = np.arange(n_models)
width = 0.35
fig, ax = plt.subplots()

bars_a = ax.bar(x - width/2, accuracy_scores_a, width, label='Oversampled')
bars_b = ax.bar(x + width/2, accuracy_scores_b, width, label='Without')

# Add some text for labels, title, and custom x-axis tick labels, etc.
ax.set_ylabel('Accuracy Score')
ax.set_title('Accuracy Scores for each Model')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
plt.show()

# Interestingly, Oversampling only serves to slightly hurt performance
# Oversampling will not be used from here on out

# Create precision-recall curves to evaluate model performance
# The precision-recall curve is relevant on unbalanced data
# Generate prediction probabilities for each model
y_scores_dt = dt_b.predict_proba(test_scaled)[:, 1]
y_scores_dt_pruned = dt_pruned_b.predict_proba(test_scaled)[:, 1]
y_scores_rf = rf_b.predict_proba(test_scaled)[:, 1]
y_scores_ab = ab_b.predict_proba(test_scaled)[:, 1]
# calculate precision-recall pairs for different thresholds
precision_dt, recall_dt, thresholds_dt = precision_recall_curve(y_test['Machine failure'], y_scores_dt, pos_label=1)
precision_dt_pruned, recall_dt_pruned, thresholds_dt_pruned = precision_recall_curve(y_test['Machine failure'], y_scores_dt_pruned, pos_label=1)
precision_rf, recall_rf, thresholds_rf = precision_recall_curve(y_test['Machine failure'], y_scores_rf, pos_label=1)
precision_ab, recall_ab, thresholds_ab = precision_recall_curve(y_test['Machine failure'], y_scores_ab, pos_label=1)
# Calculate AUC for each model
auc_dt = auc(recall_dt, precision_dt)
auc_dt_pruned = auc(recall_dt_pruned, precision_dt_pruned)
auc_rf = auc(recall_rf, precision_rf)
auc_ab = auc(recall_ab, precision_ab)

# Plot the precision-recall curves
plt.figure(figsize=(10, 8))
plt.plot(recall_dt, precision_dt, label=f'Decision Tree (AP = {average_precision_score(y_test["Machine failure"], y_scores_dt):.2f}, AUC = {auc_dt:.2f})')
plt.plot(recall_dt_pruned, precision_dt_pruned, label=f'Pruned Decision Tree (AP = {average_precision_score(y_test["Machine failure"], y_scores_dt_pruned):.2f}, AUC = {auc_dt_pruned:.2f})')
plt.plot(recall_rf, precision_rf, label=f'Random Forest (AP = {average_precision_score(y_test["Machine failure"], y_scores_rf):.2f}, AUC = {auc_rf:.2f})')
plt.plot(recall_ab, precision_ab, label=f'AdaBoost (AP = {average_precision_score(y_test["Machine failure"], y_scores_ab):.2f}, AUC = {auc_ab:.2f})')

# Add y = -x for comparing
x = np.linspace(0, 1, 100)
plt.plot(x, 1 + -x, 'r--', label='y = -x')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve, With Average Precision and Area under the Curve')
plt.legend(loc='lower left')
plt.grid(True)
plt.show()

# The random forest model performs best, having the greatest area under the curve and getting closest to the top-right
# However, fine-tuning is still required

# Make ROC curves, Calculate AUC
# Gives another metric to evaluate performance of predicting Machine Failure
# Though due to the imbalanced data set, these curves are not strongly considered
# Calculate ROC curve and AUC for each model
fpr_dt, tpr_dt, thresholds_dt = roc_curve(y_test['Machine failure'], y_scores_dt, pos_label=1)
fpr_dt_pruned, tpr_dt_pruned, thresholds_dt_pruned = roc_curve(y_test['Machine failure'], y_scores_dt_pruned, pos_label=1)
fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test['Machine failure'], y_scores_rf, pos_label=1)
fpr_ab, tpr_ab, thresholds_ab = roc_curve(y_test['Machine failure'], y_scores_ab, pos_label=1)

auc_dt = roc_auc_score(y_test['Machine failure'], y_scores_dt)
auc_dt_pruned = roc_auc_score(y_test['Machine failure'], y_scores_dt_pruned)
auc_rf = roc_auc_score(y_test['Machine failure'], y_scores_rf)
auc_ab = roc_auc_score(y_test['Machine failure'], y_scores_ab)

# Plot ROC curve
plt.figure(figsize=(10, 8))
plt.plot(fpr_dt, tpr_dt, label=f'Decision Tree (AUC = {auc_dt:.2f})')
plt.plot(fpr_dt_pruned, tpr_dt_pruned, label=f'Pruned Decision Tree (AUC = {auc_dt_pruned:.2f})')
plt.plot(fpr_rf, tpr_rf, label=f'Random Forest (AUC = {auc_rf:.2f})')
plt.plot(fpr_ab, tpr_ab, label=f'AdaBoost (AUC = {auc_ab:.2f})')

# Plot the diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'r--', label='Random Guessing')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

# Perform relevant tuning on the 2 best performing models (regarding F1 Score),
# Pruned DT and Random Forest

# Tune the Random Forest Model with Different Thresholds
# Looking to optimize for F1 Score on Machine Failure
# Random Forest

# Generate an array of thresholds from 0 to 1
thresholds = np.arange(0.0, 1.01, 0.01)

# Lists to store the F1 and accuracy scores for each threshold
f1_scores = []
accuracy_scores = []

# Calculate F1 and accuracy for each threshold
for threshold in thresholds:
    y_pred_new = (y_scores_rf >= threshold).astype(int)
    f1 = f1_score(y_test['Machine failure'], y_pred_new, pos_label=1)
    accuracy = accuracy_score(y_test['Machine failure'], y_pred_new)
    f1_scores.append(f1)
    accuracy_scores.append(accuracy)

# Plot the F1 score and accuracy vs. threshold
plt.figure(figsize=(10, 6))
plt.plot(thresholds, f1_scores, label='F1 Score (Machine Failure)', marker='o')
plt.plot(thresholds, accuracy_scores, label='Accuracy', marker='o')
plt.title('Random Forest - Threshold vs. F1 Score (Machine Failure) and Accuracy')
plt.xlabel('Threshold')
plt.ylabel('Score')
plt.legend()
plt.grid(True)
plt.show()

best_threshold = thresholds[f1_scores.index(max(f1_scores))]

print(f"Maximum F1 Score of {max(f1_scores)} reached at threshold {thresholds[f1_scores.index(max(f1_scores))]}")

Maximum F1 Score of 0.7608695652173915 reached at threshold 0.37

# Hyper Parameter tuning on the Pruned Decision Tree

param_grid = {
    'max_depth': [None, 5, 7, 10, 15],
    'min_samples_split': [3, 5, 8],
    'min_samples_leaf': [1, 2, 3],
}
grid_search = GridSearchCV(
    DecisionTreeClassifier(random_state=42, ccp_alpha=best_alpha_b),
    param_grid,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1
)
grid_search.fit(train_scaled, y_train['Machine failure'])
print("Parameters found: ", grid_search.best_params_)

# Evaluate the best model
best_pruned_dt = grid_search.best_estimator_
y_pred_best = best_pruned_dt.predict(test_scaled)
accuracy_best = accuracy_score(y_test['Machine failure'], y_pred_best)
f1_best = f1_score(y_test['Machine failure'], y_pred_best, pos_label=1)

print("Testing Accuracy of the best model: ", accuracy_best)
print("F1 Score of the best model for class 1: ", f1_best)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
Parameters found:  {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5}
Testing Accuracy of the best model:  0.983
F1 Score of the best model for class 1:  0.7085714285714284

# Minor improvements gained from these tunings

# Histogram of F1 Score and Accuracy for the 3 Models
# Non-pruned DT omitted to avoid redundancy

# Get accuracy, f1 scores
accuracy_dt_pruned = accuracy_score(y_test['Machine failure'], best_pruned_dt.predict(test_scaled))
f1_dt_pruned = f1_score(y_test['Machine failure'], best_pruned_dt.predict(test_scaled), pos_label=1)

threshold = best_threshold
y_threshold = (y_scores_rf >= threshold).astype(int)
accuracy_rf = accuracy_score(y_test['Machine failure'], y_threshold)
f1_rf = f1_score(y_test['Machine failure'], y_threshold, pos_label=1)

accuracy_ab = accuracy_score(y_test['Machine failure'], ab_b.predict(test_scaled))
f1_ab = f1_score(y_test['Machine failure'], ab_b.predict(test_scaled), pos_label=1)

models = ['Random Forest', 'Pruned DT', 'Adaboost']
accuracy_scores = [accuracy_rf, accuracy_dt_pruned, accuracy_ab]
f1_scores = [f1_rf, f1_dt_pruned, f1_ab]
print("F1 Scores:")
print(f1_scores)

# Make histogram
bar_width = 0.35
r1 = np.arange(len(models))
r2 = [x + bar_width for x in r1]
plt.figure(figsize=(10, 6))
plt.bar(r1, accuracy_scores, color='b', width=bar_width, edgecolor='grey', label='Accuracy')
plt.bar(r2, f1_scores, color='r', width=bar_width, edgecolor='grey', label='F1 Score (Machine Failure)')
plt.xlabel('Models')
plt.xticks([r + bar_width/2 for r in range(len(models))], models)
plt.title('Accuracy and F1 Scores for Different Models')
plt.legend()
plt.show()

F1 Scores:
[0.7608695652173915, 0.7085714285714284, 0.48447204968944096]

# Make histogram to compare performance before and after tuning
models = ['Random Forest', 'Pruned DT']
f1_scores_pretuning = [f1_rf_b, f1_dt_pruned_b]
f1_scores_posttuning = [f1_rf, f1_dt_pruned]
# Make histogram
bar_width = 0.35
r1 = np.arange(len(models))
r2 = [x + bar_width for x in r1]
plt.figure(figsize=(10, 6))
plt.bar(r1, f1_scores_pretuning, color='r', width=bar_width, edgecolor='grey', label='Pre-Tuning')
plt.bar(r2, f1_scores_posttuning, color='b', width=bar_width, edgecolor='grey', label='Post-Tuning')
plt.ylabel('F1 Scores (Machine Failure)')
plt.xlabel('Models')
plt.xticks([r + bar_width/2 for r in range(len(models))], models)
plt.title('F1 Scores for Different Models Pre/Post Tuning')
plt.legend()
plt.show()

# Make Precision-Recall Curve for Before and After

# Create precision-recall curves to evaluate model performance
# The precision-recall curve is more relevant on unbalanced data
# Generate prediction probabilities for each model
y_scores_dt_pruned_tuned = best_pruned_dt.predict_proba(test_scaled)[:, 1]
y_scores_rf_tuned = rf_b.predict_proba(test_scaled)[:, 1]
y_pred_rf_tuned_threshold = (y_scores_rf_tuned >= best_threshold).astype(int)

# calculate precision-recall pairs for different thresholds
precision_dt_pruned_tuned, recall_dt_pruned_tuned, thresholds_dt_pruned_tuned = precision_recall_curve(y_test['Machine failure'], y_scores_dt_pruned_tuned, pos_label=1)
precision_rf_tuned_threshold, recall_rf_tuned_threshold, _ = precision_recall_curve(y_test['Machine failure'], y_pred_rf_tuned_threshold, pos_label=1)

# Calculate AUC for each model
auc_dt_pruned_tuned = auc(recall_dt_pruned_tuned, precision_dt_pruned_tuned)
auc_rf_tuned_threshold = auc(recall_rf_tuned_threshold, precision_rf_tuned_threshold)
auc_rf = auc(recall_rf, precision_rf)
auc_dt_pruned = auc(recall_dt_pruned, precision_dt_pruned)
auc_ab = auc(recall_ab, precision_ab)

# Plot the before and after curves
plt.figure(figsize=(10, 8))
plt.plot(recall_rf_tuned_threshold, precision_rf_tuned_threshold, label=f'Random Forest Tuned (Threshold = {best_threshold:.2f}, AP = {average_precision_score(y_test["Machine failure"], y_pred_rf_tuned_threshold):.2f}, AUC = {auc_rf_tuned_threshold:.2f})')
plt.plot(recall_rf, precision_rf, label=f'Random Forest (AP = {average_precision_score(y_test["Machine failure"], y_scores_rf):.2f}, AUC = {auc_rf:.2f})')
plt.plot(recall_dt_pruned_tuned, precision_dt_pruned_tuned, label=f'Pruned Decision Tree Tuned (AP = {average_precision_score(y_test["Machine failure"], y_scores_dt_pruned_tuned):.2f}, AUC = {auc_dt_pruned_tuned:.2f})')
plt.plot(recall_dt_pruned, precision_dt_pruned, label=f'Pruned Decision Tree (AP = {average_precision_score(y_test["Machine failure"], y_scores_dt_pruned):.2f}, AUC = {auc_dt_pruned:.2f})')
plt.plot(recall_ab, precision_ab, label=f'AdaBoost (AP = {average_precision_score(y_test["Machine failure"], y_scores_ab):.2f}, AUC = {auc_ab:.2f})')

# Add y = -x for comparing
x = np.linspace(0, 1, 100)
plt.plot(x, 1 + -x, 'r--', label='y = -x')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve, with Average Precision Score and Area under the Curve')
plt.legend(loc='lower left')
plt.grid(True)
plt.show()

# Make ROC Curve, Calculate AUC for Before and After
# Gives another metric to evaluate performance of predicting Machine Failure
# Though due to the imbalanced data set, these curves are not strongly considered
fpr_dt_pruned_tuned, tpr_dt_pruned_tuned, thresholds_dt_pruned_tuned = roc_curve(y_test['Machine failure'], y_scores_dt_pruned_tuned, pos_label=1)
fpr_rf_tuned_threshold, tpr_rf_tuned_threshold, _ = roc_curve(y_test['Machine failure'], y_pred_rf_tuned_threshold, pos_label=1)

fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test['Machine failure'], y_scores_rf, pos_label=1)
fpr_dt_pruned, tpr_dt_pruned, thresholds_dt_pruned = roc_curve(y_test['Machine failure'], y_scores_dt_pruned, pos_label=1)
fpr_ab, tpr_ab, thresholds_ab = roc_curve(y_test['Machine failure'], y_scores_ab, pos_label=1)

auc_dt_pruned_tuned = roc_auc_score(y_test['Machine failure'], y_scores_dt_pruned_tuned)
auc_rf_tuned_threshold = roc_auc_score(y_test['Machine failure'], y_pred_rf_tuned_threshold)

auc_rf = roc_auc_score(y_test['Machine failure'], y_scores_rf)
auc_dt_pruned = roc_auc_score(y_test['Machine failure'], y_scores_dt_pruned)
auc_ab = roc_auc_score(y_test['Machine failure'], y_scores_ab)

# Plot the before and after ROC curves
plt.figure(figsize=(10, 8))
plt.plot(fpr_rf_tuned_threshold, tpr_rf_tuned_threshold, label=f'Random Forest Tuned (Threshold = {best_threshold:.2f}, AUC = {auc_rf_tuned_threshold:.2f})')
plt.plot(fpr_rf, tpr_rf, label=f'Random Forest (AUC = {auc_rf:.2f})')
plt.plot(fpr_dt_pruned_tuned, tpr_dt_pruned_tuned, label=f'Pruned Decision Tree Tuned (AUC = {auc_dt_pruned_tuned:.2f})')
plt.plot(fpr_dt_pruned, tpr_dt_pruned, label=f'Pruned Decision Tree (AUC = {auc_dt_pruned:.2f})')
plt.plot(fpr_ab, tpr_ab, label=f'AdaBoost (AUC = {auc_ab:.2f})')

# Plot the diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'r--', label='Random Guessing')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

# Confusion Matricies for Final Overall Look
fig, axs = plt.subplots(1, 4, figsize=(18, 6))

# Random Forest
y_threshold = (y_scores_rf >= best_threshold).astype(int)
ConfusionMatrixDisplay.from_predictions(y_test['Machine failure'], y_threshold, ax=axs[0], cmap='YlGnBu', colorbar=0)
axs[0].set_title("Random Forest")

# Pruned Decision Tree
ConfusionMatrixDisplay.from_predictions(y_test['Machine failure'], best_pruned_dt.predict(test_scaled), ax=axs[1], cmap='YlGnBu', colorbar=0)
axs[1].set_title("Pruned Decision Tree")

# Adaboost Classifier
ConfusionMatrixDisplay.from_predictions(y_test['Machine failure'], ab_b.predict(test_scaled), ax=axs[2], cmap='YlGnBu')
axs[2].set_title("Adaboost Classifier")

# Random Forest
ConfusionMatrixDisplay.from_predictions(y_test['Machine failure'], rf_b.predict(test_scaled), ax=axs[3], cmap='YlGnBu')
axs[3].set_title("Random Forest No Tuning")

plt.tight_layout()
plt.show()

# Now examining building models to predict specific failure modes

y.head()

# Get failure count for each failure mode
TWF_count = y['TWF'].value_counts()[1]
HDF_count = y['HDF'].value_counts()[1]
PWF_count = y['PWF'].value_counts()[1]
OSF_count = y['OSF'].value_counts()[1]
RNF_count = y['RNF'].value_counts()[1] # Note that RNF has completely random defined logic

print(f"TWF Count: {TWF_count}")
print(f"TWF Percentage: {TWF_count * 100 / y.shape[0]}%")
print(f"HDF Count: {HDF_count}")
print(f"HDF Percentage: {HDF_count * 100 / y.shape[0]}%")
print(f"PWF Count: {PWF_count}")
print(f"PWF Percentage: {PWF_count * 100 / y.shape[0]}%")
print(f"OSF Count: {OSF_count}")
print(f"OSF Percentage: {OSF_count * 100 / y.shape[0]}%")
print(f"RNF Count: {RNF_count}")
print(f"RNF Percentage: {RNF_count * 100 / y.shape[0]}%")

TWF Count: 46
TWF Percentage: 0.46%
HDF Count: 115
HDF Percentage: 1.15%
PWF Count: 95
PWF Percentage: 0.95%
OSF Count: 98
OSF Percentage: 0.98%
RNF Count: 19
RNF Percentage: 0.19%

# Drop general Machine failure
y_specific = y.drop('Machine failure', axis=1)
y_specific.head()

# Building Random Forest and Tuned Decision Tree Model for each

# Hyper Parameter tuning for Pruned Decision Trees
param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'ccp_alpha': [0.0, 0.001, 0.01, 0.1]
}
grid_search = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1
)

best_dts = []
for column in y_specific.columns:
    grid_search.fit(train_scaled, y_train[column])
    print(f"{column} Parameters found: ", grid_search.best_params_)
    best_dts.append(grid_search.best_estimator_)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
TWF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 10}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
HDF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
PWF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
OSF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
RNF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}

# Evaluate the best models
for dt, column in zip(best_dts, y_specific.columns):
    print(f"{column} - Training Accuracy:", accuracy_score(y_train[column], dt.predict(train_scaled)))
    print(f"{column} - Testing Accuracy:", accuracy_score(y_test[column], dt.predict(test_scaled)))
    print(f"{column} - Classification Report (Test):\n", classification_report(y_test[column], dt.predict(test_scaled)))

TWF - Training Accuracy: 0.996
TWF - Testing Accuracy: 0.9926666666666667
TWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2986
           1       0.10      0.07      0.08        14

    accuracy                           0.99      3000
   macro avg       0.55      0.53      0.54      3000
weighted avg       0.99      0.99      0.99      3000

HDF - Training Accuracy: 0.9988571428571429
HDF - Testing Accuracy: 0.999
HDF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2975
           1       1.00      0.88      0.94        25

    accuracy                           1.00      3000
   macro avg       1.00      0.94      0.97      3000
weighted avg       1.00      1.00      1.00      3000

PWF - Training Accuracy: 1.0
PWF - Testing Accuracy: 0.9953333333333333
PWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2969
           1       0.79      0.74      0.77        31

    accuracy                           1.00      3000
   macro avg       0.90      0.87      0.88      3000
weighted avg       1.00      1.00      1.00      3000

OSF - Training Accuracy: 0.9995714285714286
OSF - Testing Accuracy: 0.9966666666666667
OSF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2972
           1       0.95      0.68      0.79        28

    accuracy                           1.00      3000
   macro avg       0.97      0.84      0.89      3000
weighted avg       1.00      1.00      1.00      3000

RNF - Training Accuracy: 1.0
RNF - Testing Accuracy: 0.9966666666666667
RNF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2993
           1       0.00      0.00      0.00         7

    accuracy                           1.00      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       1.00      1.00      1.00      3000

# Make Bar chart of Accuracy, F1 Score for Failure
specific_acc_dt = []
specific_f1_dt = []
for dt, column in zip(best_dts, y_specific.columns):
    specific_acc_dt.append(accuracy_score(y_test[column], dt.predict(test_scaled)))
    specific_f1_dt.append(f1_score(y_test[column], dt.predict(test_scaled), pos_label=1))

# Make histogram
bar_width = 0.35
r1 = np.arange(len(y_specific.columns))
r2 = [x + bar_width for x in r1]
plt.figure(figsize=(10, 6))
plt.bar(r1, specific_acc_dt, label='Accuracy', color='b', width=bar_width)
plt.bar(r2, specific_f1_dt, label='F1 Score (Machine Failure)', color='r', width=bar_width)
plt.xlabel('Failure Modes')
plt.xticks([r + bar_width/2 for r in range(len(y_specific.columns))], y_specific.columns)
plt.ylabel('Score')
plt.title('Decision Tree - Accuracy and F1 Scores for Different Failure Modes')
plt.legend()
plt.show()

# Random Forest Model
specific_rfs = []
for column in y_specific.columns:
    specific_rf = RandomForestClassifier(random_state=42)
    specific_rf.fit(train_scaled, y_train[column])
    specific_rfs.append(specific_rf)

# Evaluate the best models
for rf, column in zip(specific_rfs, y_specific.columns):
    print(f"{column} - Training Accuracy:", accuracy_score(y_train[column], rf.predict(train_scaled)))
    print(f"{column} - Testing Accuracy:", accuracy_score(y_test[column], rf.predict(test_scaled)))
    print(f"{column} - Classification Report (Test):\n", classification_report(y_test[column], rf.predict(test_scaled), zero_division=0))

TWF - Training Accuracy: 1.0
TWF - Testing Accuracy: 0.9953333333333333
TWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2986
           1       0.00      0.00      0.00        14

    accuracy                           1.00      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       0.99      1.00      0.99      3000

HDF - Training Accuracy: 1.0
HDF - Testing Accuracy: 0.9973333333333333
HDF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2975
           1       1.00      0.68      0.81        25

    accuracy                           1.00      3000
   macro avg       1.00      0.84      0.90      3000
weighted avg       1.00      1.00      1.00      3000

PWF - Training Accuracy: 1.0
PWF - Testing Accuracy: 0.9956666666666667
PWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2969
           1       0.95      0.61      0.75        31

    accuracy                           1.00      3000
   macro avg       0.97      0.81      0.87      3000
weighted avg       1.00      1.00      1.00      3000

OSF - Training Accuracy: 1.0
OSF - Testing Accuracy: 0.9943333333333333
OSF - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      1.00      1.00      2972
           1       1.00      0.39      0.56        28

    accuracy                           0.99      3000
   macro avg       1.00      0.70      0.78      3000
weighted avg       0.99      0.99      0.99      3000

RNF - Training Accuracy: 1.0
RNF - Testing Accuracy: 0.9976666666666667
RNF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2993
           1       0.00      0.00      0.00         7

    accuracy                           1.00      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       1.00      1.00      1.00      3000

# Make Bar chart of Accuracy, F1 Score for Failure
specific_acc_rf = []
specific_f1_rf = []
for rf, column in zip(specific_rfs, y_specific.columns):
    specific_acc_rf.append(accuracy_score(y_test[column], rf.predict(test_scaled)))
    specific_f1_rf.append(f1_score(y_test[column], rf.predict(test_scaled), pos_label=1))

# Make histogram
bar_width = 0.35
r1 = np.arange(len(y_specific.columns))
r2 = [x + bar_width for x in r1]
plt.figure(figsize=(10, 6))
plt.bar(r1, specific_acc_rf, label='Accuracy', color='b', width=bar_width)
plt.bar(r2, specific_f1_rf, label='F1 Score (Machine Failure)', color='r', width=bar_width)
plt.xlabel('Failure Modes')
plt.xticks([r + bar_width/2 for r in range(len(y_specific.columns))], y_specific.columns)
plt.ylabel('Score')
plt.title('Random Forest - Accuracy and F1 Scores for Different Failure Modes')
plt.legend()
plt.show()

# Accuracy is irrelevant due to the extremely high proportion of non-failures, and F1 scores are poor
# Oversampling is tried again in this instance to remedy

# Oversample for all specific targets
# SMOTE (creating new points) is difficult due to lack of failure points, RandomOS is used instead
# Sampling strategy kept low to reduce extreme overfitting
# Multiple values were tested for this, .2 gives the best results
oversampler = RandomOverSampler(random_state=42, sampling_strategy=.2)
X_os = []
y_os = []
for column in y_specific.columns:
    X_resampled, y_resampled = oversampler.fit_resample(train_scaled, y_train[column])
    X_os.append(X_resampled)
    y_os.append(y_resampled)

# Get new failure count for each failure mode
for i, column in enumerate(y_specific.columns):
    print(f"{column} Count: {y_os[i].value_counts()[1]}")
    print(f"{column} Percentage: {y_os[i].value_counts()[1] * 100 / y_os[i].shape[0]}%")

TWF Count: 1393
TWF Percentage: 16.660686520751106%
HDF Count: 1382
HDF Percentage: 16.666666666666668%
PWF Count: 1387
PWF Percentage: 16.66466418358765%
OSF Count: 1386
OSF Percentage: 16.666666666666668%
RNF Count: 1397
RNF Percentage: 16.660703637447824%

# Building Random Forest and Tuned Decision Tree Model for each

# Hyper Parameter tuning for Pruned Decision Trees
param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'ccp_alpha': [0.0, 0.001, 0.01, 0.1]
}
grid_search = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1
)

best_dts_os = []
for x, yy, column in zip(X_os, y_os, y_specific.columns):
    grid_search.fit(x, yy)
    print(f"{column} Parameters found: ", grid_search.best_params_)
    best_dts_os.append(grid_search.best_estimator_)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
TWF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
HDF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
PWF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
OSF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Fitting 5 folds for each of 144 candidates, totalling 720 fits
RNF Parameters found:  {'ccp_alpha': 0.0, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}

# Evaluate the best models
for dt, x, yy, column in zip(best_dts_os, X_os, y_os, y_specific.columns):
    print(f"{column} - Training Accuracy:", accuracy_score(yy, dt.predict(x)))
    print(f"{column} - Testing Accuracy:", accuracy_score(y_test[column], dt.predict(test_scaled)))
    print(f"{column} - Classification Report (Test):\n", classification_report(y_test[column], dt.predict(test_scaled)))

TWF - Training Accuracy: 1.0
TWF - Testing Accuracy: 0.9933333333333333
TWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2986
           1       0.12      0.07      0.09        14

    accuracy                           0.99      3000
   macro avg       0.56      0.53      0.54      3000
weighted avg       0.99      0.99      0.99      3000

HDF - Training Accuracy: 1.0
HDF - Testing Accuracy: 0.998
HDF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2975
           1       0.91      0.84      0.87        25

    accuracy                           1.00      3000
   macro avg       0.96      0.92      0.94      3000
weighted avg       1.00      1.00      1.00      3000

PWF - Training Accuracy: 1.0
PWF - Testing Accuracy: 0.9933333333333333
PWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2969
           1       0.74      0.55      0.63        31

    accuracy                           0.99      3000
   macro avg       0.87      0.77      0.81      3000
weighted avg       0.99      0.99      0.99      3000

OSF - Training Accuracy: 1.0
OSF - Testing Accuracy: 0.9963333333333333
OSF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2972
           1       0.90      0.68      0.78        28

    accuracy                           1.00      3000
   macro avg       0.95      0.84      0.89      3000
weighted avg       1.00      1.00      1.00      3000

RNF - Training Accuracy: 1.0
RNF - Testing Accuracy: 0.996
RNF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2993
           1       0.00      0.00      0.00         7

    accuracy                           1.00      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       1.00      1.00      1.00      3000

# Make Bar chart of Accuracy, F1 Score for Failure
specific_acc_dt_os = []
specific_f1_dt_os = []
for dt, column in zip(best_dts_os, y_specific.columns):
    specific_acc_dt_os.append(accuracy_score(y_test[column], dt.predict(test_scaled)))
    specific_f1_dt_os.append(f1_score(y_test[column], dt.predict(test_scaled), pos_label=1))

# Make histogram
bar_width = 0.35
r1 = np.arange(len(y_specific.columns))
r2 = [x + bar_width for x in r1]
plt.figure(figsize=(10, 6))
plt.bar(r1, specific_acc_dt_os, label='Accuracy', color='b', width=bar_width)
plt.bar(r2, specific_f1_dt_os, label='F1 Score (Machine Failure)', color='r', width=bar_width)
plt.xlabel('Failure Modes')
plt.xticks([r + bar_width/2 for r in range(len(y_specific.columns))], y_specific.columns)
plt.ylabel('Score')
plt.title('Oversampling - Decision Tree - Accuracy and F1 Scores for Different Failure Modes')
plt.legend()
plt.show()

# Random Forest Model
specific_rfs_os = []
for column, x, yy in zip(y_specific.columns, X_os, y_os):
    specific_rf = RandomForestClassifier(random_state=42)
    specific_rf.fit(x, yy)
    specific_rfs_os.append(specific_rf)

# Evaluate the best models
for rf, column, x, yy in zip(specific_rfs_os, y_specific.columns, X_os, y_os):
    print(f"{column} - Training Accuracy:", accuracy_score(yy, rf.predict(x)))
    print(f"{column} - Testing Accuracy:", accuracy_score(y_test[column], rf.predict(test_scaled)))
    print(f"{column} - Classification Report (Test):\n", classification_report(y_test[column], rf.predict(test_scaled), zero_division=0))

TWF - Training Accuracy: 1.0
TWF - Testing Accuracy: 0.9953333333333333
TWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2986
           1       0.50      0.07      0.12        14

    accuracy                           1.00      3000
   macro avg       0.75      0.54      0.56      3000
weighted avg       0.99      1.00      0.99      3000

HDF - Training Accuracy: 1.0
HDF - Testing Accuracy: 0.997
HDF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2975
           1       0.86      0.76      0.81        25

    accuracy                           1.00      3000
   macro avg       0.93      0.88      0.90      3000
weighted avg       1.00      1.00      1.00      3000

PWF - Training Accuracy: 1.0
PWF - Testing Accuracy: 0.9936666666666667
PWF - Classification Report (Test):
               precision    recall  f1-score   support

           0       0.99      1.00      1.00      2969
           1       0.83      0.48      0.61        31

    accuracy                           0.99      3000
   macro avg       0.91      0.74      0.80      3000
weighted avg       0.99      0.99      0.99      3000

OSF - Training Accuracy: 1.0
OSF - Testing Accuracy: 0.995
OSF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2972
           1       0.93      0.50      0.65        28

    accuracy                           0.99      3000
   macro avg       0.96      0.75      0.82      3000
weighted avg       0.99      0.99      0.99      3000

RNF - Training Accuracy: 1.0
RNF - Testing Accuracy: 0.9976666666666667
RNF - Classification Report (Test):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2993
           1       0.00      0.00      0.00         7

    accuracy                           1.00      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       1.00      1.00      1.00      3000

# Make Bar chart of Accuracy, F1 Score for Failure
specific_acc_rf_os = []
specific_f1_rf_os = []
for rf, column in zip(specific_rfs_os, y_specific.columns):
    specific_acc_rf_os.append(accuracy_score(y_test[column], rf.predict(test_scaled)))
    specific_f1_rf_os.append(f1_score(y_test[column], rf.predict(test_scaled), pos_label=1))

# Make histogram
bar_width = 0.35
r1 = np.arange(len(y_specific.columns))
r2 = [x + bar_width for x in r1]
plt.figure(figsize=(10, 6))
plt.bar(r1, specific_acc_rf_os, label='Accuracy', color='b', width=bar_width)
plt.bar(r2, specific_f1_rf_os, label='F1 Score (Machine Failure)', color='r', width=bar_width)
plt.xlabel('Failure Modes')
plt.xticks([r + bar_width/2 for r in range(len(y_specific.columns))], y_specific.columns)
plt.ylabel('Score')
plt.title('Oversampling Random Forest - Accuracy and F1 Scores for Different Failure Modes')
plt.legend()
plt.show()

# Comparing F1 Scores (1) between Oversampling and As-is
# Make histogram
bar_width = 0.2
r1 = np.arange(len(y_specific.columns))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]
r4 = [x + bar_width for x in r3]
plt.figure(figsize=(10, 6))
plt.bar(r1, specific_f1_dt_os, label='Decision Tree - Oversampled', color='b', width=bar_width)
plt.bar(r2, specific_f1_rf_os, label='Random Forest - Oversampled', color='r', width=bar_width)
plt.bar(r3, specific_f1_dt, label='Decision Tree', color='g', width=bar_width)
plt.bar(r4, specific_f1_rf, label='Random Forest', color='y', width=bar_width)

plt.xlabel('Failure Modes')
plt.xticks([r + bar_width*1.5 for r in range(len(y_specific.columns))], y_specific.columns)
plt.ylabel('F1 Score (Machine Failure)')
plt.title('F1 Score (Machine Failure) for Different Failure Modes in 4 Different Models')
plt.legend()
plt.show()

# Comparing the performance of the Decision Tree trained without Oversampling on Specific Failure Modes
# to the performance of the Tuned Random Forest Model on Overall Machine Failure
# (RNF will be omitted at this point, as further comparisons to it serve no greater insight)

# Bar graph of F1
targets = ['Machine Failure (RF)', 'TWF (DT)', 'HDF (DT)', 'PWF (DT)', 'OSF (DT)']
f1_scores_full = [f1_rf, specific_f1_dt_os[0], specific_f1_dt_os[1], specific_f1_dt_os[2], specific_f1_dt_os[3]]
plt.figure(figsize=(10, 6))
plt.bar(targets, f1_scores_full, color='b')
plt.xlabel('Failure Modes')
plt.ylabel('F1 Score (Machine Failure)')
plt.title('F1 Score (Machine Failure) for General and Specific Failure Modes')
#show grid but only in the y axis
plt.grid(axis='y')
plt.show()

# Confusion Matricies
fig, axs = plt.subplots(1, 5, figsize=(25, 5))

# Machine Failure (RF)
y_threshold = (y_scores_rf >= best_threshold).astype(int)
ConfusionMatrixDisplay.from_predictions(y_test['Machine failure'], y_threshold, ax=axs[0], cmap='YlGnBu', colorbar=0)
axs[0].set_title("Machine Failure (RF)")

# TWF (DT)
ConfusionMatrixDisplay.from_predictions(y_test['TWF'], best_dts[0].predict(test_scaled), ax=axs[1], cmap='YlGnBu', colorbar=0)
axs[1].set_title("TWF (DT)")

# HDF (DT)
ConfusionMatrixDisplay.from_predictions(y_test['HDF'], best_dts[1].predict(test_scaled), ax=axs[2], cmap='YlGnBu', colorbar=0)
axs[2].set_title("HDF (DT)")

# PWF (DT)
ConfusionMatrixDisplay.from_predictions(y_test['PWF'], best_dts[2].predict(test_scaled), ax=axs[3], cmap='YlGnBu', colorbar=0)
axs[3].set_title("PWF (DT)")

# OSF (DT)
ConfusionMatrixDisplay.from_predictions(y_test['OSF'], best_dts[3].predict(test_scaled), ax=axs[4], cmap='YlGnBu')
axs[4].set_title("OSF (DT)")

plt.tight_layout()
plt.show()

# Precision-Recall Curves
# Assume you have the predicted probabilities and the true labels
y_scores_twf = best_dts[0].predict_proba(test_scaled)[:, 1]
y_scores_hdf = best_dts[1].predict_proba(test_scaled)[:, 1]
y_scores_pwf = best_dts[2].predict_proba(test_scaled)[:, 1]
y_scores_osf = best_dts[3].predict_proba(test_scaled)[:, 1]

# Calculate precision-recall pairs for different thresholds
precision_twf, recall_twf, thresholds_twf = precision_recall_curve(y_test['TWF'], y_scores_twf, pos_label=1)
precision_hdf, recall_hdf, thresholds_hdf = precision_recall_curve(y_test['HDF'], y_scores_hdf, pos_label=1)
precision_pwf, recall_pwf, thresholds_pwf = precision_recall_curve(y_test['PWF'], y_scores_pwf, pos_label=1)
precision_osf, recall_osf, thresholds_osf = precision_recall_curve(y_test['OSF'], y_scores_osf, pos_label=1)

# Calculate AUC for each curve
auc_twf = auc(recall_twf, precision_twf)
auc_hdf = auc(recall_hdf, precision_hdf)
auc_pwf = auc(recall_pwf, precision_pwf)
auc_osf = auc(recall_osf, precision_osf)

# Plot the precision-recall curves
plt.figure(figsize=(12, 8))
plt.plot(recall_rf_tuned_threshold, precision_rf_tuned_threshold, label=f'Random Forest Tuned, AP = {average_precision_score(y_test["Machine failure"], y_pred_rf_tuned_threshold):.2f}, AUC = {auc_rf_tuned_threshold:.2f})')
plt.plot(recall_twf, precision_twf, label=f'TWF (DT) AP = {average_precision_score(y_test["TWF"], y_scores_twf):.2f}, AUC = {auc_twf:.2f}')
plt.plot(recall_hdf, precision_hdf, label=f'HDF (DT) AP = {average_precision_score(y_test["HDF"], y_scores_hdf):.2f}, AUC = {auc_hdf:.2f}')
plt.plot(recall_pwf, precision_pwf, label=f'PWF (DT) AP = {average_precision_score(y_test["PWF"], y_scores_pwf):.2f}, AUC = {auc_pwf:.2f}')
plt.plot(recall_osf, precision_osf, label=f'OSF (DT) AP = {average_precision_score(y_test["OSF"], y_scores_osf):.2f}, AUC = {auc_osf:.2f}')

# Add y = -x for comparing
x = np.linspace(0, 1, 100)
plt.plot(x, 1 + -x, 'r--', label='y = -x')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves for Machine Failure and Other Targets')
plt.legend(loc='lower left')
plt.grid(True)
plt.show()

	Type	Air temperature	Process temperature	Rotational speed	Torque	Tool wear
0	M	298.1	308.6	1551	42.8	0
1	L	298.2	308.7	1408	46.3	3
2	L	298.1	308.5	1498	49.4	5
3	L	298.2	308.6	1433	39.5	7
4	L	298.2	308.7	1408	40.0	9
...	...	...	...	...	...	...
9995	M	298.8	308.4	1604	29.5	14
9996	H	298.9	308.4	1632	31.8	17
9997	M	299.0	308.6	1645	33.4	22
9998	H	299.0	308.7	1408	48.5	25
9999	M	299.0	308.7	1500	40.2	30

	Machine failure	TWF	HDF	PWF	OSF	RNF
0	0	0	0	0	0	0
1	0	0	0	0	0	0
2	0	0	0	0	0	0
3	0	0	0	0	0	0
4	0	0	0	0	0	0
...	...	...	...	...	...	...
9995	0	0	0	0	0	0
9996	0	0	0	0	0	0
9997	0	0	0	0	0	0
9998	0	0	0	0	0	0
9999	0	0	0	0	0	0

	Air temperature	Process temperature	Rotational speed	Torque	Tool wear	Type_H	Type_L	Type_M
9069	297.2	308.2	1678	28.1	133	False	False	True
2603	299.3	309.2	1334	46.3	31	False	False	True
7738	300.5	312.0	1263	60.8	146	False	False	True
1579	298.3	308.3	1444	43.8	176	False	True	False
5058	303.9	312.9	1526	42.5	194	False	True	False

	Air temperature	Process temperature	Rotational speed	Torque	Tool wear
9069	297.2	308.2	1678	28.1	133
2603	299.3	309.2	1334	46.3	31
7738	300.5	312.0	1263	60.8	146
1579	298.3	308.3	1444	43.8	176
5058	303.9	312.9	1526	42.5	194

	Air temperature	Process temperature	Rotational speed	Torque	Tool wear
0	0.206522	0.308642	0.291496	0.343706	0.525692
1	0.434783	0.432099	0.089736	0.601132	0.122530
2	0.565217	0.777778	0.048094	0.806223	0.577075
3	0.326087	0.320988	0.154252	0.565771	0.695652
4	0.934783	0.888889	0.202346	0.547383	0.766798

IE 7275 Final Project - Predictive Maintenance Exploration - Charles Schatmeyer¶

1. Importing¶

2. Preprocessing¶

3. Exploratory Data Analysis¶

4. Model Building¶

a. General Machine Failure with Oversampling¶

b. General Machine Failure without Oversampling¶

5. Evaluating the Models¶

a. General Machine Failure with Oversampling¶

b. General Machine Failure without Oversampling¶

c. Model Comparing¶

Model Tuning¶

Post Tuning Evaluation¶

Specific Failure Modes - Model Building¶

a. No Oversampling¶

b. With Oversampling¶

Comparing Specific Failure Modes to Overall¶

	name	role	type	demographic	description	units	missing_values
0	UID	ID	Integer	None	None	None	no
1	Product ID	ID	Categorical	None	None	None	no
2	Type	Feature	Categorical	None	None	None	no
3	Air temperature	Feature	Continuous	None	None	K	no
4	Process temperature	Feature	Continuous	None	None	K	no
5	Rotational speed	Feature	Integer	None	None	rpm	no
6	Torque	Feature	Continuous	None	None	Nm	no
7	Tool wear	Feature	Integer	None	None	min	no
8	Machine failure	Target	Integer	None	None	None	no
9	TWF	Target	Integer	None	None	None	no
10	HDF	Target	Integer	None	None	None	no
11	PWF	Target	Integer	None	None	None	no
12	OSF	Target	Integer	None	None	None	no
13	RNF	Target	Integer	None	None	None	no