import os import sys import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn import svm from sklearn import metrics import pickle from sklearn.neural_network import MLPClassifier if len(sys.argv) < 2: print("no input csv file") exit(0) df = pd.read_csv(sys.argv[1]) # print(df, df.columns[0:len(df.columns)]) xcols = [ i for i in df.columns] targ = xcols.pop() # print(xcols) X = df.loc[:,xcols ].values print(X.shape) Y = df.loc[:,targ].values print(Y.shape) # X = StandardScaler().fit_transform(X) print(X) pca_model = PCA(n_components=128) pcaofX = pca_model.fit_transform(X) print("shapeofX after pca",pcaofX.shape, ", cum Sum of variance ratio",pca_model.explained_variance_ratio_.cumsum()[-1]) # pcaofX = X X_train, X_test, Y_train, Y_test = train_test_split(pcaofX, Y, test_size=0.3,random_state=109) print("train shape",X_train.shape) svm_model = svm.SVC(kernel="linear") svm_model.fit(X_train,Y_train) pred = svm_model.predict(X_test) print(pred) print("Accuracy:",metrics.accuracy_score(Y_test, pred)) pickle.dump(pca_model, open("pca_model.sav", 'wb')) pickle.dump(svm_model, open("svm_model.sav", 'wb')) # clf = MLPClassifier(activation='relu' ,solver='lbfgs', alpha=1e-9, hidden_layer_sizes = (128,8096,128), random_state=1, max_iter=3500) # clf.fit(X_train,Y_train) # pred2 = clf.predict(X_test) # pickle.dump(clf, open("clf.sav", 'wb')) # print("Accuracy:",metrics.accuracy_score(Y_test, pred2))