{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b69b2ab7",
   "metadata": {},
   "source": [
    "# FateZ Clustering \n",
    "\n",
    "This notebook demonstrate how to implement clustering method with FateZ's representing method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "dd050393",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This part is yet to be modified!\n",
      "Done Import\n"
     ]
    }
   ],
   "source": [
    "print('This part is yet to be modified!')\n",
    "\n",
    "import os\n",
    "import torch\n",
    "import numpy as np\n",
    "from torch.utils.data import DataLoader\n",
    "from pkg_resources import resource_filename\n",
    "from sklearn import cluster\n",
    "import fatez.test as test\n",
    "import fatez.model as model\n",
    "# import scanpy as sc\n",
    "\n",
    "print('Done Import')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10210c8d",
   "metadata": {},
   "source": [
    "### Initialize testing model first."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3948e182",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Testing Full Model.\n",
      "\n",
      "\tPre-Trainer Green.\n",
      "\n",
      "\tFine-Tuner Green.\n",
      "\n",
      "Edge Explain:\n",
      " tensor([[0.1573, 0.1524, 0.1506, 0.1357, 0.1349, 0.1365, 0.1361, 0.1353, 0.0000,\n",
      "         0.1383],\n",
      "        [0.1358, 0.1370, 0.1243, 0.1215, 0.1255, 0.1237, 0.1196, 0.1621, 0.0000,\n",
      "         0.1627],\n",
      "        [0.1638, 0.1475, 0.1416, 0.1444, 0.1399, 0.1464, 0.1395, 0.1368, 0.0000,\n",
      "         0.1393],\n",
      "        [0.1305, 0.1263, 0.1264, 0.1260, 0.1198, 0.1493, 0.1356, 0.1604, 0.0000,\n",
      "         0.1371]]) \n",
      "\n",
      "Reg Explain:\n",
      " tensor([0., 0., 0., 0.], dtype=torch.float64) \n",
      "\n",
      "Node Explain:\n",
      " tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=torch.float64) \n",
      "\n",
      "\tExplainer Green.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "faker = test.Faker()\n",
    "testM, _ = faker.test_full_model()\n",
    "# model.Save(faker.test_gat(), '../data/ignore/gat.model')\n",
    "# model.Save(testM, '../data/ignore/trainer.model')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "255aa807",
   "metadata": {},
   "source": [
    "### Get the fake dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "82ce2fb9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 2&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2</span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1 </span>dataset = faker.make_data_loader().dataset                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2 <span style=\"color: #0000ff; text-decoration-color: #0000ff\">for</span> x <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> DataLoader(dataset, batch_size = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(dataset)):                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3 │   </span>all_fea_mat = x[<span style=\"color: #0000ff; text-decoration-color: #0000ff\">0</span>]                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">4 │   </span>all_adj_mat = x[<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>]                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">5 </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">print</span>(<span style=\"color: #808000; text-decoration-color: #808000\">f'Labels:\\n{</span>labels.tolist()<span style=\"color: #808000; text-decoration-color: #808000\">}'</span>)                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/data/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">dataloader.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">634</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__next__</span>           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 631 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._sampler_iter <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>:                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 632 │   │   │   │   # TODO(https://github.com/pytorch/pytorch/issues/76750)</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 633 │   │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._reset()  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># type: ignore[call-arg]</span>                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 634 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>data = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._next_data()                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 635 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._num_yielded += <span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 636 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._dataset_kind == _DatasetKind.Iterable <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> \\                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 637 │   │   │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._IterableDataset_len_called <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> \\                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/data/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">dataloader.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">678</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_next_data</span>         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 675 │   </span>                                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 676 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_next_data</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>):                                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 677 │   │   </span>index = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._next_index()  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># may raise StopIteration</span>                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 678 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>data = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._dataset_fetcher.fetch(index)  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># may raise StopIteration</span>              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 679 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._pin_memory:                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 680 │   │   │   </span>data = _utils.pin_memory.pin_memory(data, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._pin_memory_device)            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 681 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> data                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">fetch.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">54</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">fetch</span>             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">51 │   │   │   │   </span>data = [<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.dataset[idx] <span style=\"color: #0000ff; text-decoration-color: #0000ff\">for</span> idx <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> possibly_batched_index]                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">52 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">53 │   │   │   </span>data = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.dataset[possibly_batched_index]                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>54 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.collate_fn(data)                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">55 </span>                                                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">collate.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">264</span> in                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">default_collate</span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">261 </span><span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">&gt;&gt;&gt; default_collate_fn_map.update(CustoType, collate_customtype_fn)</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">262 </span><span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">&gt;&gt;&gt; default_collate(batch)  # Handle `CustomType` automatically</span>                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">263 </span><span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">\"\"\"</span>                                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>264 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> collate(batch, collate_fn_map=default_collate_fn_map)                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">265 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">collate.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">150</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">collate</span>        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">147 │   │   │   │   # The sequence type may not support `__init__(iterable)` (e.g., `range`)</span>   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">148 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> [collate(samples, collate_fn_map=collate_fn_map) <span style=\"color: #0000ff; text-decoration-color: #0000ff\">for</span> samples <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> t   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">149 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>150 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">TypeError</span>(default_collate_err_msg_format.format(elem_type))                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">151 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">152 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">153 </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">collate_tensor_fn</span>(batch, *, collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ..   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
       "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">TypeError: </span>default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">class</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span>\n",
       "<span style=\"color: #008000; text-decoration-color: #008000\">'torch_geometric.data.data.Data'</span><span style=\"font-weight: bold\">&gt;</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
       "\u001b[31m│\u001b[0m in \u001b[92m<cell line: 2>\u001b[0m:\u001b[94m2\u001b[0m                                                                              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m1 \u001b[0mdataset = faker.make_data_loader().dataset                                                   \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2 \u001b[94mfor\u001b[0m x \u001b[95min\u001b[0m DataLoader(dataset, batch_size = \u001b[96mlen\u001b[0m(dataset)):                                     \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m3 \u001b[0m\u001b[2m│   \u001b[0mall_fea_mat = x[\u001b[94m0\u001b[0m]                                                                       \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m4 \u001b[0m\u001b[2m│   \u001b[0mall_adj_mat = x[\u001b[94m1\u001b[0m]                                                                       \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m5 \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33mf\u001b[0m\u001b[33m'\u001b[0m\u001b[33mLabels:\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m{\u001b[0mlabels.tolist()\u001b[33m}\u001b[0m\u001b[33m'\u001b[0m)                                                         \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/data/\u001b[0m\u001b[1;33mdataloader.py\u001b[0m:\u001b[94m634\u001b[0m in \u001b[92m__next__\u001b[0m           \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 631 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m._sampler_iter \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m:                                                \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 632 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[2m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001b[0m                   \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 633 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[96mself\u001b[0m._reset()  \u001b[2m# type: ignore[call-arg]\u001b[0m                                   \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 634 \u001b[2m│   │   │   \u001b[0mdata = \u001b[96mself\u001b[0m._next_data()                                                      \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 635 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m._num_yielded += \u001b[94m1\u001b[0m                                                        \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 636 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m._dataset_kind == _DatasetKind.Iterable \u001b[95mand\u001b[0m \\                          \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 637 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[96mself\u001b[0m._IterableDataset_len_called \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \\                    \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/data/\u001b[0m\u001b[1;33mdataloader.py\u001b[0m:\u001b[94m678\u001b[0m in \u001b[92m_next_data\u001b[0m         \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 675 \u001b[0m\u001b[2m│   \u001b[0m                                                                                      \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 676 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_next_data\u001b[0m(\u001b[96mself\u001b[0m):                                                                 \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 677 \u001b[0m\u001b[2m│   │   \u001b[0mindex = \u001b[96mself\u001b[0m._next_index()  \u001b[2m# may raise StopIteration\u001b[0m                             \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 678 \u001b[2m│   │   \u001b[0mdata = \u001b[96mself\u001b[0m._dataset_fetcher.fetch(index)  \u001b[2m# may raise StopIteration\u001b[0m              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 679 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m._pin_memory:                                                              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 680 \u001b[0m\u001b[2m│   │   │   \u001b[0mdata = _utils.pin_memory.pin_memory(data, \u001b[96mself\u001b[0m._pin_memory_device)            \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 681 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m data                                                                       \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/\u001b[0m\u001b[1;33mfetch.py\u001b[0m:\u001b[94m54\u001b[0m in \u001b[92mfetch\u001b[0m             \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m51 \u001b[0m\u001b[2m│   │   │   │   \u001b[0mdata = [\u001b[96mself\u001b[0m.dataset[idx] \u001b[94mfor\u001b[0m idx \u001b[95min\u001b[0m possibly_batched_index]                \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m52 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                               \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m53 \u001b[0m\u001b[2m│   │   │   \u001b[0mdata = \u001b[96mself\u001b[0m.dataset[possibly_batched_index]                                     \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m54 \u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m.collate_fn(data)                                                        \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m55 \u001b[0m                                                                                            \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/\u001b[0m\u001b[1;33mcollate.py\u001b[0m:\u001b[94m264\u001b[0m in                \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[92mdefault_collate\u001b[0m                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m261 \u001b[0m\u001b[2;33m│   │   │   \u001b[0m\u001b[33m>>> default_collate_fn_map.update(CustoType, collate_customtype_fn)\u001b[0m            \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m262 \u001b[0m\u001b[2;33m│   │   │   \u001b[0m\u001b[33m>>> default_collate(batch)  # Handle `CustomType` automatically\u001b[0m                \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m263 \u001b[0m\u001b[2;33m│   \u001b[0m\u001b[33m\"\"\"\u001b[0m                                                                                    \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m264 \u001b[2m│   \u001b[0m\u001b[94mreturn\u001b[0m collate(batch, collate_fn_map=default_collate_fn_map)                           \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m265 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/\u001b[0m\u001b[1;33mcollate.py\u001b[0m:\u001b[94m150\u001b[0m in \u001b[92mcollate\u001b[0m        \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m147 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[2m# The sequence type may not support `__init__(iterable)` (e.g., `range`)\u001b[0m   \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m148 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m [collate(samples, collate_fn_map=collate_fn_map) \u001b[94mfor\u001b[0m samples \u001b[95min\u001b[0m t   \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m149 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m150 \u001b[2m│   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mTypeError\u001b[0m(default_collate_err_msg_format.format(elem_type))                      \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m151 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m152 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m153 \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcollate_tensor_fn\u001b[0m(batch, *, collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ..   \u001b[31m│\u001b[0m\n",
       "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
       "\u001b[1;91mTypeError: \u001b[0mdefault_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found \u001b[1m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\n",
       "\u001b[32m'torch_geometric.data.data.Data'\u001b[0m\u001b[1m>\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dataset = faker.make_data_loader().dataset\n",
    "for x in DataLoader(dataset, batch_size = len(dataset)):\n",
    "    all_fea_mat = x[0]\n",
    "    all_adj_mat = x[1]\n",
    "print(f'Labels:\\n{labels.tolist()}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f82982f2",
   "metadata": {},
   "source": [
    "### Process origin data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "59d0aef5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 2&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2</span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1 # Flatten Data</span>                                                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2 origin = np.array([torch.reshape(ele.to_dense(), (-<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>,)).tolist() <span style=\"color: #0000ff; text-decoration-color: #0000ff\">for</span> ele <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> all_fea_mat]     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3 </span>                                                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">4 # PCA analysis for dimensionality deduction</span>                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">5 </span>pca_analysis = sc.pp.pca(origin, n_comps = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">9</span>, return_info = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">True</span>,)                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
       "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">NameError: </span>name <span style=\"color: #008000; text-decoration-color: #008000\">'all_fea_mat'</span> is not defined\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
       "\u001b[31m│\u001b[0m in \u001b[92m<cell line: 2>\u001b[0m:\u001b[94m2\u001b[0m                                                                              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m1 \u001b[0m\u001b[2m# Flatten Data\u001b[0m                                                                               \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2 origin = np.array([torch.reshape(ele.to_dense(), (-\u001b[94m1\u001b[0m,)).tolist() \u001b[94mfor\u001b[0m ele \u001b[95min\u001b[0m all_fea_mat]     \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m3 \u001b[0m                                                                                             \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m4 \u001b[0m\u001b[2m# PCA analysis for dimensionality deduction\u001b[0m                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m5 \u001b[0mpca_analysis = sc.pp.pca(origin, n_comps = \u001b[94m9\u001b[0m, return_info = \u001b[94mTrue\u001b[0m,)                           \u001b[31m│\u001b[0m\n",
       "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
       "\u001b[1;91mNameError: \u001b[0mname \u001b[32m'all_fea_mat'\u001b[0m is not defined\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Flatten Data\n",
    "origin = np.array([torch.reshape(ele.to_dense(), (-1,)).tolist() for ele in all_fea_mat])\n",
    "\n",
    "# PCA analysis for dimensionality deduction\n",
    "pca_analysis = sc.pp.pca(origin, n_comps = 9, return_info = True,)\n",
    "origin_pca = pca_analysis[0]\n",
    "var_ratios = pca_analysis[2]\n",
    "print(f'Origin Data Var Ratios:\\n{var_ratios}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "96be974b",
   "metadata": {},
   "source": [
    "### Process data with encoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d90f7dc8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 2&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">3</span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1 # Get encoded representaions made by GAT -&gt; BERT encoder</span>                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 2 </span>encode = np.array([                                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 3 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span>torch.reshape(ele, (-<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>,)).tolist() <span style=\"color: #0000ff; text-decoration-color: #0000ff\">for</span> ele <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> testM.get_encoder_output(                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4 │   │   </span>all_fea_mat, all_adj_mat                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5 │   </span>)                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 6 </span>])                                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
       "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">AttributeError: </span><span style=\"color: #008000; text-decoration-color: #008000\">'Trainer'</span> object has no attribute <span style=\"color: #008000; text-decoration-color: #008000\">'get_encoder_output'</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
       "\u001b[31m│\u001b[0m in \u001b[92m<cell line: 2>\u001b[0m:\u001b[94m3\u001b[0m                                                                              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 1 \u001b[0m\u001b[2m# Get encoded representaions made by GAT -> BERT encoder\u001b[0m                                    \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 2 \u001b[0mencode = np.array([                                                                         \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 3 \u001b[2m│   \u001b[0mtorch.reshape(ele, (-\u001b[94m1\u001b[0m,)).tolist() \u001b[94mfor\u001b[0m ele \u001b[95min\u001b[0m testM.get_encoder_output(                 \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 4 \u001b[0m\u001b[2m│   │   \u001b[0mall_fea_mat, all_adj_mat                                                            \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 5 \u001b[0m\u001b[2m│   \u001b[0m)                                                                                       \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 6 \u001b[0m])                                                                                          \u001b[31m│\u001b[0m\n",
       "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
       "\u001b[1;91mAttributeError: \u001b[0m\u001b[32m'Trainer'\u001b[0m object has no attribute \u001b[32m'get_encoder_output'\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Get encoded representaions made by GAT -> BERT encoder\n",
    "encode = np.array([\n",
    "    torch.reshape(ele, (-1,)).tolist() for ele in testM.get_encoder_output(\n",
    "        all_fea_mat, all_adj_mat\n",
    "    )\n",
    "])\n",
    "\n",
    "# PCA analysis for dimensionality deduction\n",
    "pca_analysis = sc.pp.pca(encode, n_comps = 9, return_info = True,)\n",
    "encode_pca = pca_analysis[0]\n",
    "var_ratios = pca_analysis[2]\n",
    "print(f'Encoded Rep Var Ratios:\\n{var_ratios}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b5299e3c",
   "metadata": {},
   "source": [
    "### Set clustering models and fit models with original data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "48d8fea6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 2&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2</span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1 </span>eps = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0.5</span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 2 n_clusters = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(np.unique(labels))                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 3 </span>min_samples = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">5</span>                                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4 </span>                                                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5 </span>dbscan = cluster.DBSCAN(eps = eps)                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
       "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">NameError: </span>name <span style=\"color: #008000; text-decoration-color: #008000\">'labels'</span> is not defined\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
       "\u001b[31m│\u001b[0m in \u001b[92m<cell line: 2>\u001b[0m:\u001b[94m2\u001b[0m                                                                              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 1 \u001b[0meps = \u001b[94m0.5\u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 2 n_clusters = \u001b[96mlen\u001b[0m(np.unique(labels))                                                         \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 3 \u001b[0mmin_samples = \u001b[94m5\u001b[0m                                                                             \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 4 \u001b[0m                                                                                            \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 5 \u001b[0mdbscan = cluster.DBSCAN(eps = eps)                                                          \u001b[31m│\u001b[0m\n",
       "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
       "\u001b[1;91mNameError: \u001b[0mname \u001b[32m'labels'\u001b[0m is not defined\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "eps = 0.5\n",
    "n_clusters = len(np.unique(labels))\n",
    "min_samples = 5\n",
    "\n",
    "dbscan = cluster.DBSCAN(eps = eps)\n",
    "kmeans = cluster.KMeans(n_clusters = n_clusters)\n",
    "optics = cluster.OPTICS(min_samples = min_samples)\n",
    "\n",
    "dbscan.fit(origin_pca)\n",
    "kmeans.fit(origin_pca)\n",
    "optics.fit(origin_pca)\n",
    "\n",
    "# Get labels\n",
    "print(dbscan.labels_.astype(int))\n",
    "print(kmeans.labels_.astype(int))\n",
    "print(optics.labels_.astype(int))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eba48c10",
   "metadata": {},
   "source": [
    "### Reset models and fit with encoded representaions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "1f1098d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 2&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2</span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1 </span>dbscan = cluster.DBSCAN(eps = eps)                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 2 kmeans = cluster.KMeans(n_clusters = n_clusters)                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 3 </span>optics = cluster.OPTICS(min_samples = min_samples)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4 </span>dbscan.fit(encode_pca)                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5 </span>kmeans.fit(encode_pca)                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
       "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
       "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">NameError: </span>name <span style=\"color: #008000; text-decoration-color: #008000\">'n_clusters'</span> is not defined\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
       "\u001b[31m│\u001b[0m in \u001b[92m<cell line: 2>\u001b[0m:\u001b[94m2\u001b[0m                                                                              \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 1 \u001b[0mdbscan = cluster.DBSCAN(eps = eps)                                                          \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 2 kmeans = cluster.KMeans(n_clusters = n_clusters)                                            \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 3 \u001b[0moptics = cluster.OPTICS(min_samples = min_samples)                                          \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 4 \u001b[0mdbscan.fit(encode_pca)                                                                      \u001b[31m│\u001b[0m\n",
       "\u001b[31m│\u001b[0m   \u001b[2m 5 \u001b[0mkmeans.fit(encode_pca)                                                                      \u001b[31m│\u001b[0m\n",
       "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
       "\u001b[1;91mNameError: \u001b[0mname \u001b[32m'n_clusters'\u001b[0m is not defined\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dbscan = cluster.DBSCAN(eps = eps)\n",
    "kmeans = cluster.KMeans(n_clusters = n_clusters)\n",
    "optics = cluster.OPTICS(min_samples = min_samples)\n",
    "dbscan.fit(encode_pca)\n",
    "kmeans.fit(encode_pca)\n",
    "optics.fit(encode_pca)\n",
    "\n",
    "# Get labels\n",
    "print(dbscan.labels_.astype(int))\n",
    "print(kmeans.labels_.astype(int))\n",
    "print(optics.labels_.astype(int))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}