import numpy as np
import pandas as pd
import tabulate

Load Data and Scenario

%load_ext autoreload
%autoreload 2

from fedimpute.data_prep import load_data, display_data
data, data_config = load_data("codrna")
display_data(data)
print("Data Dimensions: ", data.shape)
print("Data Config:\n", data_config)
+--------+--------+--------+--------+--------+--------+--------+--------+--------+
|   X1   |   X2   |   X3   |   X4   |   X5   |   X6   |   X7   |   X8   |   y    |
|--------+--------+--------+--------+--------+--------+--------+--------+--------|
| 0.7554 | 0.1364 | 0.0352 | 0.4132 | 0.6937 | 0.1591 | 0.3329 | 0.7154 | 1.0000 |
| 0.7334 | 0.7879 | 0.3819 | 0.3693 | 0.5619 | 0.4830 | 0.4351 | 0.5160 | 0.0000 |
| 0.7752 | 0.1364 | 0.1761 | 0.3290 | 0.7410 | 0.4259 | 0.4644 | 0.5268 | 1.0000 |
| 0.5905 | 0.7424 | 0.2720 | 0.2898 | 0.6920 | 0.3205 | 0.4019 | 0.6290 | 1.0000 |
| 0.7366 | 0.1212 | 0.2465 | 0.3290 | 0.7410 | 0.3249 | 0.5086 | 0.5631 | 1.0000 |
+--------+--------+--------+--------+--------+--------+--------+--------+--------+
Data Dimensions:  (5000, 9)
Data Config:
 {'target': 'y', 'task_type': 'classification', 'natural_partition': False}
%load_ext autoreload
%autoreload 2
from fedimpute.scenario import ScenarioBuilder

scenario_builder = ScenarioBuilder()
scenario_data = scenario_builder.create_simulated_scenario(
    data, data_config, num_clients = 4, dp_strategy='iid-even', ms_scenario='mnar-heter'
)
scenario_builder.summarize_scenario()
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Missing data simulation...
==================================================================
Scenario Summary
==================================================================
Total clients: 4
Global Test Data: (500, 9)
Missing Mechanism Category: MNAR (Self Masking Logit)
Clients Data Summary:
     Train     Test      Miss     MS Ratio    MS Feature    Seed
--  --------  -------  --------  ----------  ------------  ------
C1  (1125,9)  (113,9)  (1125,8)     0.47         8/8        6077
C2  (1125,9)  (113,9)  (1125,8)     0.51         8/8        577
C3  (1125,9)  (113,9)  (1125,8)     0.46         8/8        7231
C4  (1125,9)  (113,9)  (1125,8)     0.47         8/8        5504
==================================================================

Benchmarking Pipeline

%load_ext autoreload
%autoreload 2
from fedimpute.pipeline import FedImputePipeline

pipeline = FedImputePipeline()
pipeline.setup(
    id = 'benchmark_demo',
    fed_imp_configs = [
        ('em', ['local', 'fedem'], {}, [{}, {}]),
        ('mice', ['local', 'fedmice'], {}, [{}, {}]),
        ('gain', ['local', 'fedavg'], {}, [{}, {}]),
    ],
    evaluation_params = {
        'metrics': ['imp_quality', 'local_pred', 'fed_pred'],
        'model': 'lr',
    },
    persist_data = False,
    description = 'benchmark demonstration'
)

pipeline.pipeline_setup_summary()
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
==============================================================
Experiment ID: benchmark_demo
==============================================================
Description: benchmark demonstration
Persist Data: False
Evaluation:
  - metrics: ['imp_quality', 'local_pred', 'fed_pred']
  - model: lr
Seed: 100330201
--------------------------------------------------------------
    Imputer    Fed Strategy    Imp Params    Strategy Params
--  ---------  --------------  ------------  -----------------
 0  em         local           {}            {}
 1  em         fedem           {}            {}
 2  mice       local           {}            {}
 3  mice       fedmice         {}            {}
 4  gain       local           {}            {}
 5  gain       fedavg          {}            {}
==============================================================
pipeline.run_pipeline(
    scenario_builder, repeats = 5, verbose = 0
)

Result Analysis

import matplotlib.pyplot as plt

plt.rc('font', family='arial')
plt.rc('pdf', fonttype = 42)
plt.rc('ps', fonttype = 42)
pipeline.plot_pipeline_results(
    metric_aspect = 'fed_pred_personalized',
    plot_type = 'bar',
    plot_params = {'font_size': 20, 'bar_width': 0.2},
    save_path = "./plots/benchmark_fedpred.png",
    legend = False,
    dpi = 300
)
pipeline.plot_pipeline_results(
    metric_aspect = 'local_pred',    
    plot_type = 'bar',
    plot_params = {'font_size': 20, 'bar_width': 0.2},
    save_path = "./plots/benchmark_localpred.png",
    legend = False
)
pipeline.plot_pipeline_results(
    metric_aspect = 'imp_quality',    
    plot_type = 'bar',
    plot_params = {'font_size': 20, 'bar_width': 0.2},
    save_path = "./plots/benchmark_impquality.png"  
)
pipeline.plot_pipeline_results(
    metric_aspect = 'fed_pred_global',    
    plot_type = 'bar',
    plot_params = {'font_size': 20, 'bar_width': 0.2},
    save_path = "./plots/benchmark_fedpredglobal.png",
    legend = False
)
data = pipeline.show_pipeline_results(
    format = 'dataframe',  
    metric_aspect = 'imp_quality',
    metric_name = 'rmse',
    show_round_variation = False
)
data.to_excel("./plots/benchmark_impquality.xlsx")