whats happening..

2631dc18 · Konstantin Gerd Eyhorn · d5c7ae90 · 2631dc18
Commit 2631dc18 authored 1 year ago by Konstantin Gerd Eyhorn
--- a/mlp_train.py
+++ b/mlp_train.py
@@ -39,23 +39,26 @@ T_features_filter = [
 B_features_filter = ["mean_correlation", "nb_measurements"]

 PICKLE_PATH = "dataset_pandas/temperature.pkl"
+RANDOM_SEED = 123456789


 ##### HYPERPARAMETERS #####
-EPOCHS = 300
-BATCH_SIZE = 32
+EPOCHS = 250
+BATCH_SIZE = 16
 CRITERION = nn.BCELoss()
 OPTIMIZER = torch.optim.Adam
 LEARNING_RATE = 0.01
 GROWTH_RATE = 16
 DROP_RATE = 0.5
 SCHEDULER_PATIENCE = 10
-SCHEDULER_FACTOR = 0.5
+SCHEDULER_FACTOR = 0.1
 SCHEDULER_EPS = 1e-8


 input_features = 11

+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+

 class MLP(nn.Module):
    def __init__(self):
@@ -110,7 +113,7 @@ def prepare_data() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    df = df.drop(columns=S_features_filter)

    # split the into training and testing sets
-    train_df, test_df = train_test_split(df, test_size=0.2, random_state=123456789)
+    train_df, test_df = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)

    print(
        f"Train alarm distribution (befor undersampling): {train_df['alarm'].value_counts()}"
@@ -137,11 +140,20 @@ def prepare_data() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

 def train_model(X_train, y_train, X_test, y_test):

-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    torch.manual_seed(RANDOM_SEED)

    # Setting up the data loader
    train_loader = torch.utils.data.DataLoader(
-        list(zip(X_train, y_train)), batch_size=BATCH_SIZE, shuffle=True
+        list(zip(X_train, y_train)),
+        batch_size=BATCH_SIZE,
+        shuffle=True,
+    )
+
+    # Setting up the test loader
+    test_loader = torch.utils.data.DataLoader(
+        list(zip(X_test, y_test)),
+        batch_size=BATCH_SIZE,
+        shuffle=False,
    )

    # Define model
@@ -176,6 +188,8 @@ def train_model(X_train, y_train, X_test, y_test):
                # Move data to device
                data, target = data.to(device), target.to(device)

+                model.train()
+
                # Zero the gradients
                optimizer.zero_grad()

@@ -195,13 +209,17 @@ def train_model(X_train, y_train, X_test, y_test):
                # Display loss
                t.set_postfix(train_loss=f"{loss.item():.4f}")

-            scheduler.step(loss)
        # print optimizer learning rate
        print(f"Learning rate: {optimizer.param_groups[0]['lr']}")

        # compute train loss
        epoch_train_loss /= len(train_loader)

+        # update scheduler
+        scheduler.step(epoch_train_loss)
+
+        model.eval()
+        with torch.no_grad():
            # Evaluate model on test set
            y_test_pred = (
                model(torch.tensor(X_test).float().to(device)).cpu().detach().numpy()
@@ -232,58 +250,62 @@ def train_model(X_train, y_train, X_test, y_test):
    plt.show()

    # load best model from checkpoint
-    print(f"Loading model from checkpoint: mlp_{np.argmin(test_losses)}.pth")
-    model.load_state_dict(torch.load(f"checkpoints/mlp_{np.argmin(test_losses)}.pth"))
+    # print(f"Loading model from checkpoint: mlp_{np.argmin(test_losses)}.pth")
+    # model.load_state_dict(torch.load(f"checkpoints/mlp_{np.argmin(test_losses)}.pth"))
+    return model
+

+def evaluate_model(model, X_test, y_test):
+
+    model.eval()
+    with torch.no_grad():
        # predict on test set
-    y_test_pred = model(torch.tensor(X_test).float().to(device)).cpu().detach().numpy()
+        y_test_pred = (
+            model(torch.tensor(X_test).float().to(device)).cpu().detach().numpy()
+        )
    y_test_pred_binary = np.where(y_test_pred > 0.5, 1, 0)

+    # print parameter count of model
+    print(f"Parameter count: {sum(p.numel() for p in model.parameters())}")
+
    # calculate confusion matrix
    cm = confusion_matrix(y_test, y_test_pred_binary)

-    print(cm)
-
    # calculate accuracy
    accuracy = np.sum(np.diag(cm)) / np.sum(cm)
    print(f"Accuracy: {accuracy}")

-    # print recall
+    # calculate recall
    recall = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    print(f"Recall: {recall}")

-    # print precision
+    # calculate precision
    precision = cm[1, 1] / (cm[0, 1] + cm[1, 1])
    print(f"Precision: {precision}")

-    # print F1 score
+    # calculate F1 score
    f1 = 2 * (precision * recall) / (precision + recall)
    print(f"F1 score: {f1}")

-    # print F2 score
+    # calculate F2 score
    f2 = 5 * (precision * recall) / (4 * precision + recall)
    print(f"F2 score: {f2}")

-    # print AUC
+    # calculate AUC
    auc = roc_auc_score(y_test, y_test_pred)
-
    print(f"AUC: {auc}")

-    # plot confusion matrix using seaborn
+    # plot confusion matrix
    sns.heatmap(cm, annot=True, fmt="d")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

-    return model
-

 def main():
    X_train, y_train, X_test, y_test = prepare_data()
    model = train_model(X_train, y_train, X_test, y_test)
-
-    # print parameter count of model
-    print(f"Parameter count: {sum(p.numel() for p in model.parameters())}")
+    evaluate_model(model, X_test, y_test)


 if __name__ == "__main__":