import re
import pandas as pd
import sklearn.model_selection
import sklearn.metrics
import numpy as np
from sklearn import svm
import pickle
import os
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
dir1 = 'bin3'
list1 = os.listdir('D:\\Westlake\\pwk lab\\fatez\\pp_tuning/'+dir1)
svm_value = []
lg_value = []
rf_value = []
for i in list1:
print(i)
df_use = pd.read_table('D:\\Westlake\\pwk lab\\fatez\\pp_tuning/'+dir1+'/'+i
,index_col=0)
if i[0:3] == 'NMF':
table = pd.read_table(
'D:\\Westlake\\pwk lab\\fatez\\atac_svm/nmf_label.txt')
elif i[0:3] == 'ors':
table = pd.read_table(
'D:\\Westlake\\pwk lab\\fatez\\atac_svm/ors_label.txt')
else:
table = pd.read_table(
'D:\\Westlake\\pwk lab\\fatez\\atac_svm/two_label.txt')
table.index=table['sample']
label = table.loc[df_use.columns]['label'].to_list()
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(df_use.T, label, random_state=1)
clf = svm.SVC(C=6, kernel='linear', gamma=20)
clf.fit(X_train, y_train)
train_score = clf.score(X_train, y_train)
svm_score = clf.score(X_test,y_test)
print('svm:',svm_score)
clf = LogisticRegression(random_state=42, multi_class='multinomial')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
lg_accuracy = clf.score(X_test, y_test)
print("logstic:", lg_accuracy)
rf_classifier = RandomForestClassifier(n_estimators=150, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)
rf_accuracy = rf_classifier.score(X_test, y_test)
print("rf:", rf_accuracy)
svm_value.append(svm_score)
lg_value.append(lg_accuracy)
rf_value.append(rf_accuracy)
final = pd.DataFrame({'svm':svm_value,'lg':lg_value,'rf':rf_value})
final.index = list1
print(final)
final.to_csv('D:\\Westlake\\pwk lab\\fatez\\pp_tuning/'+dir1+'/.csv')
merge_index = []
other_index = []
for i in range(len(final.index)):
if re.search('merge',final.index[i]):
merge_index.append(i)
else:
other_index.append(i)
merge = final.iloc[merge_index]
other = final.iloc[other_index]
merge.to_csv('D:\\Westlake\\pwk lab\\fatez\\pp_tuning/'+dir1+'_merge.csv')
other.to_csv('D:\\Westlake\\pwk lab\\fatez\\pp_tuning/'+dir1+'_atac.csv')