Skip to content

Commit

Permalink
simplifying foutput folder structure
Browse files Browse the repository at this point in the history
  • Loading branch information
johndoe12312 authored and daviddao committed Jul 29, 2021
1 parent d59769d commit 95d1ec8
Show file tree
Hide file tree
Showing 14 changed files with 108 additions and 100 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 1,6 @@
# Datascope experiments
results/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 1 addition & 1 deletion datascope/algorithms/KNN_Shapley.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 53,7 @@ def _get_shapley_value_np(self, X_train, y_train, X_test, y_test):
cur -= 1
return np.mean(s, axis=1)

def score(self, X_train, y_train, X_test, y_test, model=None, sources=None, use_torch=False, **kwargs):
def score(self, X_train, y_train, X_test, y_test, model=None, use_torch=False, **kwargs):
"""
Run the model on the test data and calculate the Shapley value.
"""
Expand Down
3 changes: 2 additions & 1 deletion datascope/utils/DShap.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 146,8 @@ def run(self, save_every, err, tolerance=0.01, knn_run=True, tmc_run=True, g_run

self.restart_model()
self.model.fit(self.X, self.y)
return self.measure.score(self.X, self.y, self.X_test, self.y_test, self.model_family, self.model, sources=self.sources)
print(self.measure)
return self.measure.score(self.X, self.y, self.X_test, self.y_test, model_family=self.model_family, model=self.model, sources=self.sources)

def restart_model(self):
try:
Expand Down
70 changes: 35 additions & 35 deletions experiments/Experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 27,7 @@ class Experiment:
def __init__(self, name, pipeline, dataset_name="FashionMNIST"):
self.name = name
self.pipeline = pipeline
self.base_path = ''
self.dataset_name = dataset_name #UCI, FashionMNIST, Text

if self.pipeline is None:
Expand All @@ -39,6 40,7 @@ def run(self, iterations=1000, run_label=False, run_poisoning=False, run_fairnes
name = self.name
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y-%H-%M-%S")
self.base_path = f'./results/{self.name}/i-{iterations}-time-{dt_string}'

def create_dirs(path):
if (not os.path.exists(path)):
Expand All @@ -51,20 53,18 @@ def create_dirs(path):

print(f'Initiate experiment {name}-i-{iterations}-time-{dt_string}')

# create folder structure
for exp in ['label', 'poisoning', 'fairness']:
for folder in ['shapley', 'data', 'time', 'plots']:
create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/{exp}/{folder}')

if run_label:
print('Running label noise experiment')
create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/label/')
print(flatten)
self.run_label_experiment(iterations, dt_string, ray, truncated, flatten=flatten)
if run_poisoning:
print('Running poisoning experiment')
create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/poisoning/')
self.run_poisoning_experiment(iterations, dt_string, ray, truncated, flatten=flatten)
if run_fairness:
print('Running fairness experiment')
create_dirs(f'./results/{name}/i-{iterations}-time-{dt_string}/fairness/')
self.run_fairness_experiment(iterations, dt_string, ray, truncated)

print('done!')
Expand Down Expand Up @@ -120,30 120,30 @@ def run_label_experiment(self, iterations, dt_string, ray, truncated, flatten=Tr
time_pipe = end - start

# datetime object containing current date and time
# np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_label_condknn, condpipe=res_label_condpipe, knn=res_label_knn, pipe=res_label_pipe)
# np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, flip_indices=app_label.flip_indices)
# np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
# np.savez_compressed(f'{self.base_path}/label/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_label_condknn, condpipe=res_label_condpipe, knn=res_label_knn, pipe=res_label_pipe)
# np.savez_compressed(f'{self.base_path}/label/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, flip_indices=app_label.flip_indices)
# np.savez_compressed(f'{self.base_path}/label/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)

# # save figures
# figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
RuntimePlotter(
('KNN-Shapley (cond)', time_condknn),
('TMC-Shapley (cond)', time_condpipe),
('KNN-Shapley', time_knn),
('TMC-Shapley', time_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/plots/LabelRuntime-{name}-i-{iterations}-time-{dt_string}.png')
# RuntimePlotter(
# ('KNN-Shapley (cond)', time_condknn),
# ('TMC-Shapley (cond)', time_condpipe),
# ('KNN-Shapley', time_knn),
# ('TMC-Shapley', time_pipe)).plot(save_path=f'{self.base_path}/label/LabelRuntime')

LabelPlotter(app_label,
('KNN-Shapley (cond)', res_label_condknn),
('TMC-Shapley (cond)', res_label_condpipe),
('KNN-Shapley', res_label_knn),
('TMC-Shapley', res_label_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/plots/Label-{name}-i-{iterations}-time-{dt_string}.png')
('TMC-Shapley', res_label_pipe)).plot(save_path=f'{self.base_path}/label/Label')

LabelCleaningPlotter(app_label,
('KNN-Shapley (cond)', res_label_condknn),
('TMC-Shapley (cond)', res_label_condpipe),
('KNN-Shapley', res_label_knn),
('TMC-Shapley', res_label_pipe)
).plot(ray=ray, model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/label/plots/LabelCleaning-{name}-i-{iterations}-time-{dt_string}.png')
).plot(ray=ray, model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/label/LabelCleaning')

def run_poisoning_experiment(self, iterations, dt_string, ray, truncated, flatten=True):
'''
Expand Down Expand Up @@ -189,30 189,30 @@ def run_poisoning_experiment(self, iterations, dt_string, ray, truncated, flatte
time_pipe = end - start

# datetime object containing current date and time
np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_poisoning_condknn, condpipe=res_poisoning_condpipe, knn=res_poisoning_knn, pipe=res_poisoning_pipe)
np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, poison_indices=app_poisoning.poison_indices)
np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
# np.savez_compressed(f'{self.base_path}/poisoning/shapley/{name}-i-{iterations}-time-{dt_string}-shapley',condknn=res_poisoning_condknn, condpipe=res_poisoning_condpipe, knn=res_poisoning_knn, pipe=res_poisoning_pipe)
# np.savez_compressed(f'{self.base_path}/poisoning/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test, poison_indices=app_poisoning.poison_indices)
# np.savez_compressed(f'{self.base_path}/poisoning/time/{name}-i-{iterations}-time-{dt_string}-time', time_condknn=time_condknn, time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)

# save figures
figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
RuntimePlotter(
('KNN-Shapley (cond)', time_condknn),
('TMC-Shapley (cond)', time_condpipe),
('KNN-Shapley', time_knn),
('TMC-Shapley', time_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/plots/PoisoningRuntime-{name}-i-{iterations}-time-{dt_string}.png')
# RuntimePlotter(
# ('KNN-Shapley (cond)', time_condknn),
# ('TMC-Shapley (cond)', time_condpipe),
# ('KNN-Shapley', time_knn),
# ('TMC-Shapley', time_pipe)).plot(save_path=f'{self.base_path}/poisoning/PoisoningRuntime')

PoisoningPlotter(app_poisoning,
('KNN-Shapley (cond)', res_poisoning_condknn),
('TMC-Shapley (cond)', res_poisoning_condpipe),
('KNN-Shapley', res_poisoning_knn),
('TMC-Shapley', res_poisoning_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/plots/Poisoning-{name}-i-{iterations}-time-{dt_string}.png')
('TMC-Shapley', res_poisoning_pipe)).plot(save_path=f'{self.base_path}/poisoning/Poisoning')

PoisoningCleaningPlotter(app_poisoning,
('KNN-Shapley (cond)', res_poisoning_condknn),
('TMC-Shapley (cond)', res_poisoning_condpipe),
('KNN-Shapley', res_poisoning_knn),
('TMC-Shapley', res_poisoning_pipe)
).plot(model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/poisoning/plots/PoisoningCleaning-{name}-i-{iterations}-time-{dt_string}.png')
).plot(model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/poisoning/PoisoningCleaning')

def run_fairness_experiment(self, iterations, dt_string, ray, truncated):

Expand Down Expand Up @@ -289,7 289,7 @@ def equal_odds(y, y_pred, a_indices=a_indices, b_indices=b_indices):
eo_score = equal_odds(y_test, pipeline.predict(X_test))
print(f"Initial demographic ratio parity is {dpr_score}!")
print(f"Initial equalized odds ratio is {eo_score}!")
np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/data/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test)
#np.savez_compressed(f'{self.base_path}/fairness/{name}-i-{iterations}-time-{dt_string}-data', X=X_train, y=y_train, X_test=X_test, y_test=y_test)

transform_condknn, pipeline_condknn = process_pipe_condknn(pipeline)
transform_condpipe, pipeline_condpipe = process_pipe_condpipe(pipeline)
Expand All @@ -311,30 311,30 @@ def equal_odds(y, y_pred, a_indices=a_indices, b_indices=b_indices):
time_pipe = end - start

# datetime object containing current date and time
np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/shapley/{name}-i-{iterations}-time-{dt_string}-shapley', condpipe=res_fairness_condpipe, knn=res_fairness_knn, pipe=res_fairness_pipe)
np.savez_compressed(f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/time/{name}-i-{iterations}-time-{dt_string}-time', time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)
# np.savez_compressed(f'{self.base_path}/fairness/shapley/{name}-i-{iterations}-time-{dt_string}-shapley', condpipe=res_fairness_condpipe, knn=res_fairness_knn, pipe=res_fairness_pipe)
# np.savez_compressed(f'{self.base_path}/fairness/time/{name}-i-{iterations}-time-{dt_string}-time', time_condpipe=time_condpipe, time_knn=time_knn, time_pipe=time_pipe)

figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')

FairnessPlotter(app_fairness,
('TMC-Shapley (cond)', res_fairness_condpipe),
('KNN-Shapley', res_fairness_knn),
('TMC-Shapley', res_fairness_pipe)
).plot(metric=eo_sex, metric_name="Equalized Odds Ratio", model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessEO-{name}-i-{iterations}-time-{dt_string}.png')
).plot(metric=eo_sex, metric_name="Equalized Odds Ratio", model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/fairness/FairnessEO')

FairnessPlotter(app_fairness,
('TMC-Shapley (cond)', res_fairness_condpipe),
('KNN-Shapley', res_fairness_knn),
('TMC-Shapley', res_fairness_pipe)
).plot(metric=dpr_sex, metric_name="Demographic Parity Ratio", model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessDPR-{name}-i-{iterations}-time-{dt_string}.png')
).plot(metric=dpr_sex, metric_name="Demographic Parity Ratio", model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/fairness/FairnessDPR')

FairnessPlotter(app_fairness,
('TMC-Shapley (cond)', res_fairness_condpipe),
('KNN-Shapley', res_fairness_knn),
('TMC-Shapley', res_fairness_pipe)
).plot(metric=accuracy_score, metric_name='Accuracy', model_family='custom', pipeline=pipeline, save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessAccuracy-{name}-i-{iterations}-time-{dt_string}.png')
).plot(metric=accuracy_score, metric_name='Accuracy', model_family='custom', pipeline=pipeline, save_path=f'{self.base_path}/fairness/FairnessAccuracy')

RuntimePlotter(
('TMC-Shapley (cond)', time_condpipe),
('KNN-Shapley', time_knn),
('TMC-Shapley', time_pipe)).plot(save_path=f'./results/{self.name}/i-{iterations}-time-{dt_string}/fairness/plots/FairnessRuntime-{name}-i-{iterations}-time-{dt_string}.png')
# RuntimePlotter(
# ('TMC-Shapley (cond)', time_condpipe),
# ('KNN-Shapley', time_knn),
# ('TMC-Shapley', time_pipe)).plot(save_path=f'{self.base_path}/fairness/FairnessRuntime')
55 changes: 21 additions & 34 deletions experiments/experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 2,60 @@
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
"%autoreload 2\n",
"%matplotlib inline"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"source": [
"from sklearn import set_config\n",
"set_config(display='diagram')\n",
"# from sklearn import set_config\n",
"# set_config(display='diagram')\n",
"\n",
"from dspipes import Pipelines, TextPipeline\n",
"import Experiment"
],
"outputs": [
{
"output_type": "error",
"ename": "ModuleNotFoundError",
"evalue": "No module named 'dspipes'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-cc7eabe9d859>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mset_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'diagram'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdspipes\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mPipelines\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTextPipeline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mExperiment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'dspipes'"
]
}
"from Experiment import Experiment"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"for i in range(0,8):\n",
"for i in range(0,7):\n",
" name = f'pipe_{i}'\n",
" model = Pipelines.create_numerical_pipeline(name, imputer=False)\n",
" exp_name = f'uci_label_{name}'\n",
" exp = Experiment(exp_name, model, dataset_name=\"UCI\")\n",
" if i == 7: \n",
" exp.run(iterations=2000, run_label=True, run_poisoning=False, run_fairness=False, ray=True, flatten=False, truncated=True)\n",
" else:\n",
" exp.run(iterations=2000, run_label=True, run_poisoning=False, run_fairness=False, ray=True, truncated=True)"
" exp.run(iterations=2000, run_label=True, run_poisoning=True, run_fairness=True, ray=True, truncated=True)"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [],
"outputs": [],
"metadata": {}
}
],
"metadata": {
"interpreter": {
"hash": "244f903178196d36fbfba2449c5f83f26f06c6159b04ebd44a04b5c6e8ea4107"
"hash": "ef53dea1fad49daf30b71adbbc57c745033606a4b172566c9d43a4236b6a4b07"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3.7.7 64-bit ('fastai2': conda)"
"display_name": "Python 3.8.10 64-bit ('datascope': conda)"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -80,7 67,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
"version": "3.8.10"
},
"orig_nbformat": 4
},
Expand Down
Loading

0 comments on commit 95d1ec8

Please sign in to comment.