Merge branch 'xiran' into 'main'

SVM&results See merge request !6

Merge branch 'xiran' into 'main'
df56c41e · ZHANG Zuoyu · 8fcce989 · 9f53b993 · df56c41e · df56c41e
Commit df56c41e authored 2 years ago by ZHANG Zuoyu
--- a/.DS_Store
+++ b/.DS_Store
--- a/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
--- a/Fonctions.py
+++ b/Fonctions.py
@@ -19,10 +19,11 @@ from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.impute import SimpleImputer
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.svm import NuSVC

 from sklearn.decomposition import PCA

-from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split, ShuffleSplit
+from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split, ShuffleSplit, GridSearchCV

 from sklearn.metrics import roc_curve, auc, precision_score, recall_score

@@ -226,7 +227,7 @@ class Gaussian_NB:
  """
  def __init__(self):
    #Initialize the parameters
-      self.mean0,self.mean1 = 0,0
+      self.mean0,self.mean1,self.p_c1 = 0,0,0
      self.var0,self.var1 = 1,1
      self.p0,self.p1 = [0],[0]

@@ -293,6 +294,56 @@ class logistic_regression_nn(nn.Module):
      z = torch.sigmoid(self.oupt(z))  # for BCELoss()
      return z
    
+from sklearn.svm import NuSVC
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import GridSearchCV
+
+class SVM:
+    """
+    Use NuSVC from the sklearn Library to classify the data
+    """
+    def get_parameters(self,trainMatrix,trainCategory):
+        """
+        Parameters
+        ----------
+        trainMatrix: Features of the training data set
+                The size of trainMatrix is (n,p), where n the number of samples, p the number of
+                features.
+        trainCategory: Labels of the training data set
+                The size of trainCategory is (n,),  where n the number of samples
+        """
+        param_grid = {'nu': np.linspace(0.1, 0.7, 7), 'gamma': np.linspace(0.01, 1, 10)}
+        gsearch = GridSearchCV(NuSVC(), param_grid=param_grid, scoring='accuracy', cv=10)
+        gsearch.fit(trainMatrix, trainCategory.ravel())
+        self.gamma = gsearch.best_params_['gamma']
+        self.nu = gsearch.best_params_['nu']
+        return
+    
+    def classification(self, X_train, y_train, X_test):
+        """
+        Parameters
+        ----------
+        X_train: Features of the training data set
+                The size of X_train is (n,p), where n the number of samples, p the number of
+                features.
+        y_train: Labels of the training data set
+                The size of y_train is (n,),  where n the number of samples
+        X_test: Features of the testing data set
+                The size of X_test is (m,p), where m the number of samples, p the number of
+                features.
+        """
+        y_train = y_train.ravel()
+        self.get_parameters(X_train,y_train)
+        svm = NuSVC(nu = self.nu, gamma = self.gamma,probability=True)
+        svm.fit(X_train, y_train)
+        y_pred = svm.predict(X_test)
+        y_pred_proba = svm.predict_proba(X_test)
+        return y_pred, y_pred_proba
+    
+    def score(self, y_true, y_pred):
+        acc = np.mean([1 if y_true[i] == y_pred[i] else 0 for i in range(len(y_true))])
+        return acc
+
 def test_logistic(data, dim, n, learning_rate):
    """The test function for the logistic method on the dataset"""
    X_train, X_test, y_train, y_test = train_test_split(data[:, 0:-1], data[:,-1], test_size=0.25, random_state=42)
@@ -447,5 +498,39 @@ def testMLP(data,epoch,learningrate):
  plt.title('The ROC curve of the decision tree model')
  plt.show()

+def testSVM(data):
+    """
+    The test function for the SVM method on the dataset
    
-
+    Input
+    ----------
+    data: the dataset
+        The size of input is (n,p+1), where n is the number of data, p the number of features. The latest colone is the the label.
+    """
+    X_train, X_test, y_train, y_test = train_test_split(data[:, 0:-1], data[:,-1].reshape(data[:,-1].shape[0],1), test_size=0.25, random_state=42)
+    #define the model
+    svm = SVM()
+    #train the model on the training dataset and get the predicted labels
+    y_pred, y_pred_proba = svm.classification(X_train, y_train, X_test)
+    #get the accuracy of the model
+    acc_svm = svm.score(y_test,y_pred)
+    #calculate the radios to evaluate the method
+    P_score = precision_score(y_test,y_pred)
+    R_score = recall_score(y_test,y_pred)
+    y_score = y_pred_proba[:,1]
+    fpr, tpr, thresholds = roc_curve(y_test,y_score)
+    Area_Under_Curve = auc(fpr, tpr)
+    print("The accuracy of the SVM classifier is: ", acc_svm, "\n",
+    "The precision of the SVM classifier is: ", P_score, "\n",
+    "The recall of the SVM classifier is: ", R_score, "\n",
+    "The AUC of the SVM classifier is: ", Area_Under_Curve, "\n")
+    #plot the ROC curve
+    plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% Area_Under_Curve)
+    plt.legend(loc='lower right')
+    plt.plot([0,1],[0,1],'r--')
+    plt.xlim([-0.1,1.1])
+    plt.ylim([-0.1,1.1])
+    plt.xlabel('False Positive Rate')                            
+    plt.ylabel('True Positive Rate')                             
+    plt.title('The ROC curve of the SVM classifier')
+    plt.show()
--- a/Results/.DS_Store
+++ b/Results/.DS_Store
--- a/Results/SVM_roc_banknote.png
+++ b/Results/SVM_roc_banknote.png
--- a/Results/SVM_roc_ckd.png
+++ b/Results/SVM_roc_ckd.png
--- a/Results/result.jpg
+++ b/Results/result.jpg
--- a/Results/result_1.png
+++ b/Results/result_1.png
--- a/Results/result_2.png
+++ b/Results/result_2.png
--- a/__pycache__/Fonctions.cpython-310.pyc
+++ b/__pycache__/Fonctions.cpython-310.pyc
--- a/__pycache__/Fonctions.cpython-311.pyc
+++ b/__pycache__/Fonctions.cpython-311.pyc
--- a/__pycache__/Fonctions.cpython-39.pyc
+++ b/__pycache__/Fonctions.cpython-39.pyc
--- a/__pycache__/test_unit.cpython-310.pyc
+++ b/__pycache__/test_unit.cpython-310.pyc
--- a/chronic_kidney_disease.txt
+++ b/chronic_kidney_disease.txt
--- a/test_unit.py
+++ b/test_unit.py
@@ -30,9 +30,9 @@ class mytest(unittest.TestCase):

  @classmethod
  def test_Gaussian_NB(self):
-    print("The test of Gaussian_NB model for chronic_kidney_disease")
+    print("The test of gaussian naive bayes model for chronic_kidney_disease")
    Fonctions.test_Gaussian_NB(self.data1)
-    print("The test of Gaussian_NB model for banknote_authentication_dataset")
+    print("The test of gaussian naive bayes model for banknote_authentication_dataset")
    Fonctions.test_Gaussian_NB(self.data2)

  @classmethod
@@ -42,5 +42,12 @@ class mytest(unittest.TestCase):
    print("The test of mlp model for banknote_authentication_dataset")
    Fonctions.testMLP(self.data2, 1000, 0.01)

+  @classmethod
+  def test_SVM(self):
+    print("The test of SVM model for chronic_kidney_disease")
+    Fonctions.testSVM(self.data1)
+    print("The test of SVM model for banknote_authentication_dataset")
+    Fonctions.testSVM(self.data2)
+
 if __name__ == '__main__':
    unittest.main(verbosity=2)