{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prepare data from XLSX file\n",
    "Example code on how to process data stored in a `.xlsx` file into package compatible format."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Key data:\t[('MIN', 'MINOCYCLINE'), ('RIF', 'RIFAMPICIN')]\n",
      "Ixn names:\t[['INH', 'RIF'], ['INH', 'MIN'], ['RIF', 'MIN'], ['INH', 'RIF', 'MIN']]\n",
      "Ixn scores:\t[1.0, 1.09, 0.99, 0.6]\n",
      "Feature names:\t['Rv0001', 'Rv0002', 'Rv0003']\n",
      "Profile data:\t{'INH': [0.12250798308061334, 0.33976099868757503, -0.6754194125478904], 'RIFAMPICIN': [-0.6844927294271568, -0.7003787399367092, -0.9965682681936586], 'MINOCYCLINE': [0.2494555806220347, -0.29418110424257443, -0.5443713088569625]}\n"
     ]
    }
   ],
   "source": [
    "# Import dependencies\n",
    "import pandas as pd\n",
    "\n",
    "# Define data filepath\n",
    "file_path = './sample_data.xlsx'\n",
    "\n",
    "# Define key \n",
    "df = pd.read_excel(file_path, sheet_name='key', engine='openpyxl')\n",
    "key = list(zip(df['Code'], df['Label']))\n",
    "print('Key data:\\t{}'.format(key))\n",
    "\n",
    "# Define interactions + scores\n",
    "df = pd.read_excel(file_path, sheet_name='interactions', engine='openpyxl')\n",
    "interactions = []\n",
    "for ix, row in df[[col for col in df.columns if 'Drug' in col]].iterrows(): \n",
    "    interactions.append(row[~pd.isna(row)].tolist())\n",
    "scores = df['Score'].tolist()\n",
    "print('Ixn names:\\t{}'.format(interactions))\n",
    "print('Ixn scores:\\t{}'.format(scores))\n",
    "\n",
    "# Define profiles + feature_names\n",
    "df = pd.read_excel(file_path, sheet_name='profiles', engine='openpyxl')\n",
    "feature_names = df['Gene'].tolist()\n",
    "profiles = df.loc[:, ~df.columns.isin(['index', 'Gene'])].to_dict('list')\n",
    "print('Feature names:\\t{}'.format(feature_names))\n",
    "print('Profile data:\\t{}'.format(profiles))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.5 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "0ac2a46207b2ff734e5406bb8bd0909b0a981f84a860af7db5bce33c6bd25d0b"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
