Skip to content
Snippets Groups Projects
Commit df56c41e authored by ZHANG Zuoyu's avatar ZHANG Zuoyu
Browse files

Merge branch 'xiran' into 'main'

SVM&results

See merge request !6
parents 8fcce989 9f53b993
Branches main
No related tags found
1 merge request!6SVM&results
.DS_Store 0 → 100644
File added
This diff is collapsed.
......@@ -19,10 +19,11 @@ from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import NuSVC
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split, ShuffleSplit
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split, ShuffleSplit, GridSearchCV
from sklearn.metrics import roc_curve, auc, precision_score, recall_score
......@@ -226,7 +227,7 @@ class Gaussian_NB:
"""
def __init__(self):
#Initialize the parameters
self.mean0,self.mean1 = 0,0
self.mean0,self.mean1,self.p_c1 = 0,0,0
self.var0,self.var1 = 1,1
self.p0,self.p1 = [0],[0]
......@@ -293,6 +294,56 @@ class logistic_regression_nn(nn.Module):
z = torch.sigmoid(self.oupt(z)) # for BCELoss()
return z
from sklearn.svm import NuSVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
class SVM:
"""
Use NuSVC from the sklearn Library to classify the data
"""
def get_parameters(self,trainMatrix,trainCategory):
"""
Parameters
----------
trainMatrix: Features of the training data set
The size of trainMatrix is (n,p), where n the number of samples, p the number of
features.
trainCategory: Labels of the training data set
The size of trainCategory is (n,), where n the number of samples
"""
param_grid = {'nu': np.linspace(0.1, 0.7, 7), 'gamma': np.linspace(0.01, 1, 10)}
gsearch = GridSearchCV(NuSVC(), param_grid=param_grid, scoring='accuracy', cv=10)
gsearch.fit(trainMatrix, trainCategory.ravel())
self.gamma = gsearch.best_params_['gamma']
self.nu = gsearch.best_params_['nu']
return
def classification(self, X_train, y_train, X_test):
"""
Parameters
----------
X_train: Features of the training data set
The size of X_train is (n,p), where n the number of samples, p the number of
features.
y_train: Labels of the training data set
The size of y_train is (n,), where n the number of samples
X_test: Features of the testing data set
The size of X_test is (m,p), where m the number of samples, p the number of
features.
"""
y_train = y_train.ravel()
self.get_parameters(X_train,y_train)
svm = NuSVC(nu = self.nu, gamma = self.gamma,probability=True)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
y_pred_proba = svm.predict_proba(X_test)
return y_pred, y_pred_proba
def score(self, y_true, y_pred):
acc = np.mean([1 if y_true[i] == y_pred[i] else 0 for i in range(len(y_true))])
return acc
def test_logistic(data, dim, n, learning_rate):
"""The test function for the logistic method on the dataset"""
X_train, X_test, y_train, y_test = train_test_split(data[:, 0:-1], data[:,-1], test_size=0.25, random_state=42)
......@@ -447,5 +498,39 @@ def testMLP(data,epoch,learningrate):
plt.title('The ROC curve of the decision tree model')
plt.show()
def testSVM(data):
"""
The test function for the SVM method on the dataset
Input
----------
data: the dataset
The size of input is (n,p+1), where n is the number of data, p the number of features. The latest colone is the the label.
"""
X_train, X_test, y_train, y_test = train_test_split(data[:, 0:-1], data[:,-1].reshape(data[:,-1].shape[0],1), test_size=0.25, random_state=42)
#define the model
svm = SVM()
#train the model on the training dataset and get the predicted labels
y_pred, y_pred_proba = svm.classification(X_train, y_train, X_test)
#get the accuracy of the model
acc_svm = svm.score(y_test,y_pred)
#calculate the radios to evaluate the method
P_score = precision_score(y_test,y_pred)
R_score = recall_score(y_test,y_pred)
y_score = y_pred_proba[:,1]
fpr, tpr, thresholds = roc_curve(y_test,y_score)
Area_Under_Curve = auc(fpr, tpr)
print("The accuracy of the SVM classifier is: ", acc_svm, "\n",
"The precision of the SVM classifier is: ", P_score, "\n",
"The recall of the SVM classifier is: ", R_score, "\n",
"The AUC of the SVM classifier is: ", Area_Under_Curve, "\n")
#plot the ROC curve
plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% Area_Under_Curve)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.1])
plt.ylim([-0.1,1.1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('The ROC curve of the SVM classifier')
plt.show()
File added
Results/SVM_roc_banknote.png

330 KiB

Results/SVM_roc_ckd.png

89.9 KiB

Results/result.jpg

632 KiB

Results/result_1.png

316 KiB

Results/result_2.png

96.2 KiB

File added
File added
File added
File added
This diff is collapsed.
......@@ -30,9 +30,9 @@ class mytest(unittest.TestCase):
@classmethod
def test_Gaussian_NB(self):
print("The test of Gaussian_NB model for chronic_kidney_disease")
print("The test of gaussian naive bayes model for chronic_kidney_disease")
Fonctions.test_Gaussian_NB(self.data1)
print("The test of Gaussian_NB model for banknote_authentication_dataset")
print("The test of gaussian naive bayes model for banknote_authentication_dataset")
Fonctions.test_Gaussian_NB(self.data2)
@classmethod
......@@ -42,5 +42,12 @@ class mytest(unittest.TestCase):
print("The test of mlp model for banknote_authentication_dataset")
Fonctions.testMLP(self.data2, 1000, 0.01)
@classmethod
def test_SVM(self):
print("The test of SVM model for chronic_kidney_disease")
Fonctions.testSVM(self.data1)
print("The test of SVM model for banknote_authentication_dataset")
Fonctions.testSVM(self.data2)
if __name__ == '__main__':
unittest.main(verbosity=2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment