Resolved conflicts

e77fa008 · AOUAD Mohamed, Jad · 89cda39c · c72d8660 · e77fa008 · e77fa008
Commit e77fa008 authored 1 year ago by AOUAD Mohamed, Jad
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
+{
+    "julia.environmentPath": "/Users/ilyaschahed/git/mini-projet-intro-ml"
+}
\ No newline at end of file
--- a/__pycache__/binary_classification_workflow.cpython-311.pyc
+++ b/__pycache__/binary_classification_workflow.cpython-311.pyc
--- a/binary_classification_workflow.py
+++ b/binary_classification_workflow.py
@@ -16,6 +16,11 @@ from sklearn.preprocessing import MinMaxScaler, StandardScaler, OrdinalEncoder,
 from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
 from sklearn.decomposition import PCA
 from sklearn.metrics import f1_score
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix
+from sklearn.model_selection import learning_curve
+from sklearn.model_selection import GridSearchCV
+from binary_classification_workflow import *


 """
@@ -496,8 +501,36 @@ def display_results(dict_models, X_train, y_train, X_test, y_test, cv, disp_col)
        new_row = {"Model Name": model_name, disp_col: rounded_score}
        df_results = pd.concat([df_results, pd.DataFrame([new_row])], ignore_index=True)

-    df_results = df_results.style.highlight_max(subset=[disp_col], color='salmon') #highlight the model with the higher f1 score
        
+        conf_matrix = confusion_matrix(y_test, best_model.predict(X_test))
+        plt.figure(figsize=(8, 6))
+        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
+        plt.title(f'Confusion Matrix - {model_name}')
+        plt.xlabel('Predicted')
+        plt.ylabel('True')
+        plt.show()
+
+        # Print and analyze additional evaluation metrics
+        y_pred = best_model.predict(X_test)
+        print(f'Model: {model_name}')
+        print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
+        print(f'Precision: {precision_score(y_test, y_pred)}')
+        print(f'Recall: {recall_score(y_test, y_pred)}')
+        print(f'ROC-AUC: {roc_auc_score(y_test, best_model.predict_proba(X_test)[:, 1])}')
+        print('\n')
+
+        # Plot learning curves
+        train_sizes, train_scores, valid_scores = learning_curve(best_model, X_train, y_train, cv=cv, scoring='f1', n_jobs=-1)
+        plt.figure(figsize=(8, 6))
+        plt.plot(train_sizes, np.mean(train_scores, axis=1), label='Training F1 Score')
+        plt.plot(train_sizes, np.mean(valid_scores, axis=1), label='Validation F1 Score')
+        plt.xlabel('Training Examples')
+        plt.ylabel('F1 Score')
+        plt.legend()
+        plt.title(f'Learning Curves - {model_name}')
+        plt.show()
+    # Apply styling after creating the DataFrame
+    df_results = df_results.style.highlight_max(subset=[disp_col], color='salmon') #highlight the model with the higher f1 score
    return df_results



--- a/draft_kidney.ipynb
+++ b/draft_kidney.ipynb