simplifying foutput folder structure

easeml · Jul 29, 2021 · 95d1ec8 · 95d1ec8
1 parent d59769d
commit 95d1ec8
Show file tree

Hide file tree

Showing 14 changed files with 108 additions and 100 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3  1,6 @@
 # Datascope experiments
 results/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/datascope/algorithms/KNN_Shapley.py b/datascope/algorithms/KNN_Shapley.py
@@ -53,7  53,7 @@ def _get_shapley_value_np(self, X_train, y_train, X_test, y_test):
                 cur -= 1 
         return np.mean(s, axis=1)
 
-    def score(self, X_train, y_train, X_test, y_test, model=None, sources=None, use_torch=False, **kwargs):
     def score(self, X_train, y_train, X_test, y_test, model=None, use_torch=False, **kwargs):
         """
         Run the model on the test data and calculate the Shapley value.
         """

diff --git a/datascope/utils/DShap.py b/datascope/utils/DShap.py
@@ -146,7  146,8 @@ def run(self, save_every, err, tolerance=0.01, knn_run=True, tmc_run=True, g_run
 
         self.restart_model()
         self.model.fit(self.X, self.y)
-        return self.measure.score(self.X, self.y, self.X_test, self.y_test, self.model_family, self.model, sources=self.sources)
         print(self.measure)
         return self.measure.score(self.X, self.y, self.X_test, self.y_test, model_family=self.model_family, model=self.model, sources=self.sources)
 
     def restart_model(self):
         try:

diff --git a/experiments/Experiment.py b/experiments/Experiment.py
@@ -27,6  27,7 @@ class Experiment:
     def __init__(self, name, pipeline, dataset_name="FashionMNIST"):
         self.name = name
         self.pipeline = pipeline
         self.base_path = ''
         self.dataset_name = dataset_name #UCI, FashionMNIST, Text
 
         if self.pipeline is None:
@@ -39,6  40,7 @@ def run(self, iterations=1000, run_label=False, run_poisoning=False, run_fairnes
         name = self.name
         now = datetime.now()
         dt_string = now.strftime("%d-%m-%Y-%H-%M-%S")
         self.base_path = f'./results/{self.name}/i-{iterations}-time-{dt_string}'
 
         def create_dirs(path):
             if (not os.path.exists(path)):
@@ -51,20  53,18 @@ def create_dirs(path):
 
         print(f'Initiate experiment {name}-i-{iterations}-time-{dt_string}')
 
-        # create folder structure
-        for exp in ['label', 'poisoning', 'fairness']:
-            for folder in ['shapley', 'data', 'time', 'plots']:
-                create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/{exp}/{folder}')
-
         if run_label:
             print('Running label noise experiment')
             create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/label/')
             print(flatten)
             self.run_label_experiment(iterations, dt_string, ray, truncated, flatten=flatten)
         if run_poisoning:
             print('Running poisoning experiment')
             create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/poisoning/')
             self.run_poisoning_experiment(iterations, dt_string, ray, truncated, flatten=flatten)
         if run_fairness:
             print('Running fairness experiment')
             create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/fairness/')
             self.run_fairness_experiment(iterations, dt_string, ray, truncated)
 
         print('done!')
@@ -120,30  120,30 @@ def run_label_experiment(self, iterations, dt_string, ray, truncated, flatten=Tr
         time_pipe = end - start
 
         # datetime object containing current date and time
-        # np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_label_condknn, condpipe=res_label_condpipe, knn=res_label_knn, pipe=res_label_pipe)
-        # np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, flip_indices=app_label.flip_indices)
-        # np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
         # np.savez_compressed(f'{self.base_path}/label/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_label_condknn, condpipe=res_label_condpipe, knn=res_label_knn, pipe=res_label_pipe)
         # np.savez_compressed(f'{self.base_path}/label/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, flip_indices=app_label.flip_indices)
         # np.savez_compressed(f'{self.base_path}/label/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
 
         # # save figures
         # figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
-        RuntimePlotter( 
-                    ('KNN-Shapley (cond)', time_condknn), 
-                    ('TMC-Shapley (cond)', time_condpipe), 
-                    ('KNN-Shapley', time_knn), 
-                    ('TMC-Shapley', time_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/plots/LabelRuntime-{name}-i-{iterations}-time-{dt_string}.png')
         # RuntimePlotter( 
         #             ('KNN-Shapley (cond)', time_condknn), 
         #             ('TMC-Shapley (cond)', time_condpipe), 
         #             ('KNN-Shapley', time_knn), 
         #             ('TMC-Shapley', time_pipe)).plot(save_path=f'{self.base_path}/label/LabelRuntime')
 
         LabelPlotter(app_label, 
                     ('KNN-Shapley (cond)', res_label_condknn), 
                     ('TMC-Shapley (cond)', res_label_condpipe), 
                     ('KNN-Shapley', res_label_knn), 
-                    ('TMC-Shapley', res_label_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/plots/Label-{name}-i-{iterations}-time-{dt_string}.png')
                     ('TMC-Shapley', res_label_pipe)).plot(save_path=f'{self.base_path}/label/Label')
 
         LabelCleaningPlotter(app_label, 
                      ('KNN-Shapley (cond)', res_label_condknn), 
                      ('TMC-Shapley (cond)', res_label_condpipe), 
                      ('KNN-Shapley', res_label_knn), 
                      ('TMC-Shapley', res_label_pipe)
-                    ).plot(ray=ray, model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/plots/LabelCleaning-{name}-i-{iterations}-time-{dt_string}.png')
                     ).plot(ray=ray, model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/label/LabelCleaning')
 
     def run_poisoning_experiment(self, iterations, dt_string, ray, truncated, flatten=True):
         '''
@@ -189,30  189,30 @@ def run_poisoning_experiment(self, iterations, dt_string, ray, truncated, flatte
         time_pipe = end - start
 
         # datetime object containing current date and time
-        np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_poisoning_condknn, condpipe=res_poisoning_condpipe, knn=res_poisoning_knn, pipe=res_poisoning_pipe)
-        np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, poison_indices=app_poisoning.poison_indices)
-        np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
         # np.savez_compressed(f'{self.base_path}/poisoning/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_poisoning_condknn, condpipe=res_poisoning_condpipe, knn=res_poisoning_knn, pipe=res_poisoning_pipe)
         # np.savez_compressed(f'{self.base_path}/poisoning/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, poison_indices=app_poisoning.poison_indices)
         # np.savez_compressed(f'{self.base_path}/poisoning/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
 
         # save figures
         figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
-        RuntimePlotter( 
-                    ('KNN-Shapley (cond)', time_condknn), 
-                    ('TMC-Shapley (cond)', time_condpipe), 
-                    ('KNN-Shapley', time_knn), 
-                    ('TMC-Shapley', time_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/plots/PoisoningRuntime-{name}-i-{iterations}-time-{dt_string}.png')
         # RuntimePlotter( 
         #             ('KNN-Shapley (cond)', time_condknn), 
         #             ('TMC-Shapley (cond)', time_condpipe), 
         #             ('KNN-Shapley', time_knn), 
         #             ('TMC-Shapley', time_pipe)).plot(save_path=f'{self.base_path}/poisoning/PoisoningRuntime')
 
         PoisoningPlotter(app_poisoning, 
                     ('KNN-Shapley (cond)', res_poisoning_condknn), 
                     ('TMC-Shapley (cond)', res_poisoning_condpipe), 
                     ('KNN-Shapley', res_poisoning_knn), 
-                    ('TMC-Shapley', res_poisoning_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/plots/Poisoning-{name}-i-{iterations}-time-{dt_string}.png')
                     ('TMC-Shapley', res_poisoning_pipe)).plot(save_path=f'{self.base_path}/poisoning/Poisoning')
 
         PoisoningCleaningPlotter(app_poisoning, 
                      ('KNN-Shapley (cond)', res_poisoning_condknn), 
                      ('TMC-Shapley (cond)', res_poisoning_condpipe), 
                      ('KNN-Shapley', res_poisoning_knn), 
                      ('TMC-Shapley', res_poisoning_pipe)
-                     ).plot(model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/plots/PoisoningCleaning-{name}-i-{iterations}-time-{dt_string}.png')
                      ).plot(model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/poisoning/PoisoningCleaning')
 
     def run_fairness_experiment(self, iterations, dt_string, ray, truncated):
 
@@ -289,7  289,7 @@ def equal_odds(y, y_pred, a_indices=a_indices, b_indices=b_indices):
         eo_score = equal_odds(y_test, pipeline.predict(X_test))
         print(f"Initial demographic ratio parity is {dpr_score}!")
         print(f"Initial equalized odds ratio is {eo_score}!")
-        np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test)
         #np.savez_compressed(f'{self.base_path}/fairness/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test)
 
         transform_condknn, pipeline_condknn = process_pipe_condknn(pipeline)
         transform_condpipe, pipeline_condpipe = process_pipe_condpipe(pipeline)
@@ -311,30  311,30 @@ def equal_odds(y, y_pred, a_indices=a_indices, b_indices=b_indices):
         time_pipe = end - start
 
         # datetime object containing current date and time
-        np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/shapley/{name}-i-{iterations}-time-{dt_string}-shapley', condpipe=res_fairness_condpipe, knn=res_fairness_knn, pipe=res_fairness_pipe)
-        np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/time/{name}-i-{iterations}-time-{dt_string}-time', time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
         # np.savez_compressed(f'{self.base_path}/fairness/shapley/{name}-i-{iterations}-time-{dt_string}-shapley', condpipe=res_fairness_condpipe, knn=res_fairness_knn, pipe=res_fairness_pipe)
         # np.savez_compressed(f'{self.base_path}/fairness/time/{name}-i-{iterations}-time-{dt_string}-time', time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
 
         figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
 
         FairnessPlotter(app_fairness, 
              ('TMC-Shapley (cond)', res_fairness_condpipe), 
              ('KNN-Shapley', res_fairness_knn), 
              ('TMC-Shapley', res_fairness_pipe)
-                ).plot(metric=eo_sex, metric_name="Equalized Odds Ratio", model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessEO-{name}-i-{iterations}-time-{dt_string}.png')
                 ).plot(metric=eo_sex, metric_name="Equalized Odds Ratio", model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/fairness/FairnessEO')
 
         FairnessPlotter(app_fairness, 
              ('TMC-Shapley (cond)', res_fairness_condpipe), 
              ('KNN-Shapley', res_fairness_knn), 
              ('TMC-Shapley', res_fairness_pipe)
-                ).plot(metric=dpr_sex, metric_name="Demographic Parity Ratio", model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessDPR-{name}-i-{iterations}-time-{dt_string}.png')
                 ).plot(metric=dpr_sex, metric_name="Demographic Parity Ratio", model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/fairness/FairnessDPR')
 
         FairnessPlotter(app_fairness, 
              ('TMC-Shapley (cond)', res_fairness_condpipe), 
              ('KNN-Shapley', res_fairness_knn), 
              ('TMC-Shapley', res_fairness_pipe)
-                ).plot(metric=accuracy_score, metric_name='Accuracy', model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessAccuracy-{name}-i-{iterations}-time-{dt_string}.png')
                 ).plot(metric=accuracy_score, metric_name='Accuracy', model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/fairness/FairnessAccuracy')
 
-        RuntimePlotter( 
-             ('TMC-Shapley (cond)', time_condpipe), 
-             ('KNN-Shapley', time_knn), 
-             ('TMC-Shapley', time_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessRuntime-{name}-i-{iterations}-time-{dt_string}.png')
         # RuntimePlotter( 
         #      ('TMC-Shapley (cond)', time_condpipe), 
         #      ('KNN-Shapley', time_knn), 
         #      ('TMC-Shapley', time_pipe)).plot(save_path=f'{self.base_path}/fairness/FairnessRuntime')
diff --git a/experiments/experiments.ipynb b/experiments/experiments.ipynb
@@ -2,73  2,60 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 12,
    "execution_count": null,
    "source": [
     "%load_ext autoreload\n",
-    "%autoreload 2"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
     "%autoreload 2\n",
     "%matplotlib inline"
    ],
    "outputs": [],
    "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "execution_count": null,
    "source": [
-    "from sklearn import set_config\n",
-    "set_config(display='diagram')\n",
     "# from sklearn import set_config\n",
     "# set_config(display='diagram')\n",
     "\n",
     "from dspipes import Pipelines, TextPipeline\n",
-    "import Experiment"
-   ],
-   "outputs": [
-    {
-     "output_type": "error",
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'dspipes'",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-13-cc7eabe9d859>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mset_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'diagram'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdspipes\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mPipelines\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTextPipeline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mExperiment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'dspipes'"
-     ]
-    }
     "from Experiment import Experiment"
    ],
    "outputs": [],
    "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "source": [
-    "for i in range(0,8):\n",
     "for i in range(0,7):\n",
     "    name = f'pipe_{i}'\n",
     "    model = Pipelines.create_numerical_pipeline(name, imputer=False)\n",
     "    exp_name = f'uci_label_{name}'\n",
     "    exp = Experiment(exp_name, model, dataset_name=\"UCI\")\n",
     "    if i == 7: \n",
     "        exp.run(iterations=2000, run_label=True, run_poisoning=False, run_fairness=False, ray=True, flatten=False, truncated=True)\n",
     "    else:\n",
-    "        exp.run(iterations=2000, run_label=True, run_poisoning=False, run_fairness=False, ray=True, truncated=True)"
     "        exp.run(iterations=2000, run_label=True, run_poisoning=True, run_fairness=True, ray=True, truncated=True)"
    ],
    "outputs": [],
    "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "source": [],
    "outputs": [],
    "metadata": {}
   }
  ],
  "metadata": {
   "interpreter": {
-   "hash": "244f903178196d36fbfba2449c5f83f26f06c6159b04ebd44a04b5c6e8ea4107"
    "hash": "ef53dea1fad49daf30b71adbbc57c745033606a4b172566c9d43a4236b6a4b07"
   },
   "kernelspec": {
    "name": "python3",
-   "display_name": "Python 3.7.7 64-bit ('fastai2': conda)"
    "display_name": "Python 3.8.10 64-bit ('datascope': conda)"
   },
   "language_info": {
    "codemirror_mode": {
@@ -80,7  67,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
    "version": "3.8.10"
   },
   "orig_nbformat": 4
  },