61 lines
1.5 KiB
Python
Executable File
61 lines
1.5 KiB
Python
Executable File
import os
|
|
import sys
|
|
import pandas as pd
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn import svm
|
|
from sklearn import metrics
|
|
import pickle
|
|
from sklearn.neural_network import MLPClassifier
|
|
|
|
if len(sys.argv) < 2:
|
|
print("no input csv file")
|
|
exit(0)
|
|
|
|
|
|
|
|
df = pd.read_csv(sys.argv[1])
|
|
|
|
# print(df, df.columns[0:len(df.columns)])
|
|
|
|
xcols = [ i for i in df.columns]
|
|
targ = xcols.pop()
|
|
# print(xcols)
|
|
X = df.loc[:,xcols ].values
|
|
print(X.shape)
|
|
Y = df.loc[:,targ].values
|
|
print(Y.shape)
|
|
|
|
# X = StandardScaler().fit_transform(X)
|
|
print(X)
|
|
pca_model = PCA(n_components=128)
|
|
pcaofX = pca_model.fit_transform(X)
|
|
print("shapeofX after pca",pcaofX.shape, ", cum Sum of variance ratio",pca_model.explained_variance_ratio_.cumsum()[-1])
|
|
|
|
# pcaofX = X
|
|
|
|
X_train, X_test, Y_train, Y_test = train_test_split(pcaofX, Y, test_size=0.3,random_state=109)
|
|
|
|
print("train shape",X_train.shape)
|
|
|
|
svm_model = svm.SVC(kernel="linear")
|
|
svm_model.fit(X_train,Y_train)
|
|
|
|
pred = svm_model.predict(X_test)
|
|
print(pred)
|
|
|
|
print("Accuracy:",metrics.accuracy_score(Y_test, pred))
|
|
|
|
pickle.dump(pca_model, open("pca_model.sav", 'wb'))
|
|
pickle.dump(svm_model, open("svm_model.sav", 'wb'))
|
|
|
|
|
|
# clf = MLPClassifier(activation='relu' ,solver='lbfgs', alpha=1e-9, hidden_layer_sizes = (128,8096,128), random_state=1, max_iter=3500)
|
|
|
|
# clf.fit(X_train,Y_train)
|
|
# pred2 = clf.predict(X_test)
|
|
# pickle.dump(clf, open("clf.sav", 'wb'))
|
|
|
|
# print("Accuracy:",metrics.accuracy_score(Y_test, pred2))
|