fixing loss calculation -> got to 94% accuracy

793491a5 · Konstantin Gerd Eyhorn · 48390805 · 793491a5 · 793491a5 · 793491a5
Commit 793491a5 authored Apr 14, 2024 by Konstantin Gerd Eyhorn
--- a/checkpoints/.gitkeep
+++ b/checkpoints/.gitkeep
--- a/environment.yaml
+++ b/environment.yaml
@@ -24,6 +24,7 @@ dependencies:
  - cf_xarray=0.9.0=pyhd8ed1ab_0
  - cftime=1.6.3=py311h1f0f07a_0
  - charset-normalizer=3.3.2=pyhd8ed1ab_0
+  - colorama=0.4.6=pyhd8ed1ab_0
  - comm=0.2.2=pyhd8ed1ab_0
  - contourpy=1.2.0=py311h9547e67_0
  - cuda-cudart=11.8.89=0
@@ -71,13 +72,18 @@ dependencies:
  - importlib-metadata=7.1.0=pyha770c72_0
  - importlib_metadata=7.1.0=hd8ed1ab_0
  - ipykernel=6.29.3=pyhd33586a_0
+  - ipympl=0.9.3=pyhd8ed1ab_0
  - ipython=8.22.2=pyh707e725_0
+  - ipython_genutils=0.2.0=py_1
+  - ipywidgets=8.1.2=pyhd8ed1ab_0
  - jack=1.9.22=h11f4161_0
  - jedi=0.19.1=pyhd8ed1ab_0
  - jinja2=3.1.3=pyhd8ed1ab_0
+  - joblib=1.4.0=pyhd8ed1ab_0
  - jpeg=9e=h166bdaf_2
  - jupyter_client=8.6.1=pyhd8ed1ab_0
  - jupyter_core=5.7.2=py311h38be061_0
+  - jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
  - keyutils=1.6.1=h166bdaf_0
  - kiwisolver=1.4.5=py311h9547e67_1
  - krb5=1.20.1=h81ceb04_0
@@ -180,6 +186,7 @@ dependencies:
  - packaging=24.0=pyhd8ed1ab_0
  - pandas=2.2.1=py311h320fe9a_0
  - parso=0.8.3=pyhd8ed1ab_0
+  - patsy=0.5.6=pyhd8ed1ab_0
  - pcre2=10.43=hcad00b1_0
  - pexpect=4.9.0=pyhd8ed1ab_0
  - pickleshare=0.7.5=py_1003
@@ -217,15 +224,20 @@ dependencies:
  - qt-main=5.15.8=h5d23da1_6
  - readline=8.2=h8228510_1
  - requests=2.31.0=pyhd8ed1ab_0
+  - scikit-learn=1.4.2=py311hc009520_0
  - scipy=1.12.0=py311h64a7726_2
+  - seaborn=0.13.2=hd8ed1ab_0
+  - seaborn-base=0.13.2=pyhd8ed1ab_0
  - setuptools=69.2.0=pyhd8ed1ab_0
  - shapely=2.0.3=py311h2032efe_0
  - sip=6.7.12=py311hb755f60_0
  - six=1.16.0=pyh6c4a22f_0
  - sqlite=3.45.2=h2c6b66d_0
  - stack_data=0.6.2=pyhd8ed1ab_0
+  - statsmodels=0.14.1=py311h1f0f07a_0
  - sympy=1.12=pypyh9d50eac_103
  - tbb=2021.9.0=hf52228f_0
+  - threadpoolctl=3.4.0=pyhc1e730c_0
  - tk=8.6.13=noxft_h4845f30_101
  - toml=0.10.2=pyhd8ed1ab_0
  - tomli=2.0.1=pyhd8ed1ab_0
@@ -233,12 +245,14 @@ dependencies:
  - torchtriton=2.2.0=py311
  - torchvision=0.17.2=py311_cu118
  - tornado=6.4=py311h459d7ec_0
+  - tqdm=4.66.2=pyhd8ed1ab_0
  - traitlets=5.14.2=pyhd8ed1ab_0
  - typing_extensions=4.10.0=pyha770c72_0
  - tzdata=2024a=h0c530f3_0
  - urllib3=2.2.1=pyhd8ed1ab_0
  - wcwidth=0.2.13=pyhd8ed1ab_0
  - wheel=0.43.0=pyhd8ed1ab_1
+  - widgetsnbextension=4.0.10=pyhd8ed1ab_0
  - xarray=2024.3.0=pyhd8ed1ab_0
  - xcb-util=0.4.0=h516909a_0
  - xcb-util-image=0.4.0=h166bdaf_0

--- a/mlp_train.py
+++ b/mlp_train.py
@@ -6,18 +6,23 @@ from tqdm import trange, tqdm
 from sklearn.model_selection import train_test_split
 import time
 import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix
+import seaborn as sns

 PICKLE_PATH = "dataset_pandas/temperature.pkl"


 ##### HYPERPARAMETERS #####
-EPOCHS = 500
+EPOCHS = 300
 BATCH_SIZE = 16
 CRITERION = nn.BCELoss()
 OPTIMIZER = torch.optim.Adam
 LEARNING_RATE = 0.01
 GROWTH_RATE = 16
 DROP_RATE = 0.5
+SCHEDULER_PATIENCE = 20
+SCHEDULER_FACTOR = 0.5
+SCHEDULER_EPS = 1e-8


 class MLP(nn.Module):
@@ -101,13 +106,18 @@ def train_model(X: np.ndarray, y: np.ndarray):

    # Define a Scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-        optimizer, mode="min", factor=0.1, patience=10, verbose=True, eps=1e-8
+        optimizer,
+        mode="min",
+        factor=SCHEDULER_FACTOR,
+        patience=SCHEDULER_PATIENCE,
+        eps=SCHEDULER_EPS,
    )

    # Train model
    train_losses = []
    test_losses = []
    for epoch in range(EPOCHS):
+        epoch_train_loss = 0
        with tqdm(train_loader, unit="batch") as t:
            for data, target in t:
                t.set_description(f"Epoch {str(epoch).rjust(5)}")
@@ -123,6 +133,8 @@ def train_model(X: np.ndarray, y: np.ndarray):
                # Calculate loss
                loss = criterion(output, target.float().view(-1, 1))

+                epoch_train_loss += loss.item()
+
                # Backpropagation
                loss.backward()

@@ -133,24 +145,62 @@ def train_model(X: np.ndarray, y: np.ndarray):
                t.set_postfix(train_loss=f"{loss.item():.4f}")

            scheduler.step(loss)
+            # print optimizer learning rate
+            print(f"Learning rate: {optimizer.param_groups[0]['lr']}")
+
+            # compute train loss
+            epoch_train_loss /= len(train_loader)

-            train_losses.append(loss.item())
            # Evaluate model on test set
-            y_pred = (
+            y_test_pred = (
                model(torch.tensor(X_test).float().to(device)).cpu().detach().numpy()
            )
            test_loss = criterion(
-                torch.tensor(y_pred).float(), torch.tensor(y_test).float().view(-1, 1)
+                torch.tensor(y_test_pred).float(),
+                torch.tensor(y_test).float().view(-1, 1),
            )
+
+            print(f"Train loss: {epoch_train_loss:.4f}")
            print(f"Test loss: {test_loss.item():.4f}")
            test_losses.append(test_loss.item())
+            train_losses.append(epoch_train_loss)
+
+            # save model if test loss has decreased
+            if len(test_losses) == 1 or test_loss < min(test_losses[:-1]):
+                torch.save(
+                    model.state_dict(),
+                    f"checkpoints/mlp_{epoch}.pth",
+                )

    # Plot losses
-    plt.plot(train_losses, label="Train loss")
-    plt.plot(test_losses, label="Test loss")
+    sns.lineplot(x=range(len(train_losses)), y=train_losses, label="Train loss")
+    sns.lineplot(x=range(len(test_losses)), y=test_losses, label="Test loss")
+    plt.xlabel("Epoch")
+    plt.ylabel("Loss")
    plt.legend()
    plt.show()

+    # load best model from checkpoint
+    print(f"Loading model from checkpoint: mlp_{np.argmin(test_losses)}.pth")
+    model.load_state_dict(torch.load(f"checkpoints/mlp_{np.argmin(test_losses)}.pth"))
+
+    # predict on test set
+    y_test_pred = model(torch.tensor(X_test).float().to(device)).cpu().detach().numpy()
+    y_test_pred_binary = np.where(y_test_pred > 0.5, 1, 0)
+
+    # calculate confusion matrix
+    cm = confusion_matrix(y_test, y_test_pred_binary)
+
+    # plot confusion matrix using seaborn
+    sns.heatmap(cm, annot=True, fmt="d")
+    plt.xlabel("Predicted")
+    plt.ylabel("True")
+    plt.show()
+
+    # calculate accuracy
+    accuracy = np.sum(np.diag(cm)) / np.sum(cm)
+    print(f"Accuracy: {accuracy}")
+
    return model