{ "cells": [ { "attachments": {}, "cell_type": "markdown", "id": "cb1695eb", "metadata": {}, "source": [ "### You need a title here" ] }, { "cell_type": "code", "execution_count": null, "id": "459c5813", "metadata": {}, "outputs": [], "source": [ "import re\n", "import pandas as pd\n", "import sklearn.model_selection\n", "import sklearn.metrics\n", "import numpy as np\n", "from sklearn import svm\n", "import pickle\n", "import os\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "dir1 = 'bin3'\n", "list1 = os.listdir('D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\pp_tuning/'+dir1)\n", "\n", "\n", "svm_value = []\n", "lg_value = []\n", "rf_value = []\n", "\n", "for i in list1:\n", " print(i)\n", " df_use = pd.read_table('D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\pp_tuning/'+dir1+'/'+i\n", " ,index_col=0)\n", " if i[0:3] == 'NMF':\n", " table = pd.read_table(\n", " 'D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\atac_svm/nmf_label.txt')\n", " elif i[0:3] == 'ors':\n", " table = pd.read_table(\n", " 'D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\atac_svm/ors_label.txt')\n", " else:\n", " table = pd.read_table(\n", " 'D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\atac_svm/two_label.txt')\n", "\n", " table.index=table['sample']\n", " label = table.loc[df_use.columns]['label'].to_list()\n", " X_train, X_test, y_train, y_test = \\\n", " sklearn.model_selection.train_test_split(df_use.T, label, random_state=1)\n", " clf = svm.SVC(C=6, kernel='linear', gamma=20)\n", " clf.fit(X_train, y_train)\n", " train_score = clf.score(X_train, y_train)\n", " svm_score = clf.score(X_test,y_test)\n", " print('svm:',svm_score)\n", " clf = LogisticRegression(random_state=42, multi_class='multinomial')\n", " clf.fit(X_train, y_train)\n", " y_pred = clf.predict(X_test)\n", " lg_accuracy = clf.score(X_test, y_test)\n", " print(\"logstic:\", lg_accuracy)\n", " rf_classifier = RandomForestClassifier(n_estimators=150, random_state=42)\n", " rf_classifier.fit(X_train, y_train)\n", " y_pred = rf_classifier.predict(X_test)\n", " rf_accuracy = rf_classifier.score(X_test, y_test)\n", " print(\"rf:\", rf_accuracy)\n", " svm_value.append(svm_score)\n", " lg_value.append(lg_accuracy)\n", " rf_value.append(rf_accuracy)\n", "final = pd.DataFrame({'svm':svm_value,'lg':lg_value,'rf':rf_value})\n", "final.index = list1\n", "print(final)\n", "final.to_csv('D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\pp_tuning/'+dir1+'/.csv')\n", "merge_index = []\n", "other_index = []\n", "for i in range(len(final.index)):\n", " if re.search('merge',final.index[i]):\n", " merge_index.append(i)\n", " else:\n", " other_index.append(i)\n", "merge = final.iloc[merge_index]\n", "other = final.iloc[other_index]\n", "merge.to_csv('D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\pp_tuning/'+dir1+'_merge.csv')\n", "other.to_csv('D:\\\\Westlake\\\\pwk lab\\\\fatez\\\\pp_tuning/'+dir1+'_atac.csv')\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }