@@ -44,15 +44,12 @@ Preprocessors Check
44
44
2) Update script in data/input/mydata/clean.py
45
45
to load column names, basic profile...
46
46
47
- 3) Run python clean.py profile and check results
48
47
49
-
50
- 4) run python clean.py train_test
48
+ 3) run python clean.py train_test
51
49
which generates train and test data in :
52
50
data/input/mydata/train/features.parquet target.parquet (y label)
53
51
data/input/mydata/test/features.parquet target.parquet (y label)
54
52
55
-
56
53
4) Copy Paste titanic_classifier.py into mydata_classifier.py
57
54
58
55
5) Modify the script mydata_classifier.py
@@ -73,59 +70,48 @@ Preprocessors Check
73
70
74
71
### List of preprocessor
75
72
76
- prepro_sampler.pd_autoencoder
77
- prepro_sampler.pd_col_genetic_transform
78
- prepro_sampler.pd_colcat_encoder_generic
79
- prepro_sampler.pd_filter_resample
80
- prepro_sampler.pd_filter_rows
81
-
82
-
83
- prepro.pd_autoencoder
84
- prepro.pd_col_covariate_shift_adjustment
85
- prepro.pd_col_genetic_transform
86
- prepro.pd_colcat_bin
87
- prepro.pd_colcat_encoder_generic
88
- prepro.pd_colcat_minhash
89
- prepro.pd_colcat_to_onehot
90
- prepro.pd_colcross
91
- prepro.pd_coldate
92
- prepro.pd_colnum
93
- prepro.pd_colnum_bin
94
- prepro.pd_colnum_binto_onehot
95
- prepro.pd_colnum_normalize
96
- prepro.pd_colnum_quantile_norm
97
- prepro.pd_coltext
98
- prepro.pd_coltext_clean
99
- prepro.pd_coltext_universal_google
100
- prepro.pd_coltext_wordfreq
101
- prepro.pd_coly
102
- prepro.pd_filter_resample
103
- prepro.pd_filter_rows
104
- prepro.pd_label_clean
105
-
106
-
107
- prepro_tseries.pd_ts_autoregressive
108
- prepro_tseries.pd_ts_basic
109
- prepro_tseries.pd_ts_date
110
- prepro_tseries.pd_ts_detrend
111
- prepro_tseries.pd_ts_generic
112
- prepro_tseries.pd_ts_groupby
113
- prepro_tseries.pd_ts_identity
114
- prepro_tseries.pd_ts_lag
115
- prepro_tseries.pd_ts_onehot
116
- prepro_tseries.pd_ts_rolling
117
- prepro_tseries.pd_ts_template
118
-
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
-
128
-
73
+ #### Data Over/Under sampling
74
+ prepro_sampler.pd_autoencoder(df,col, pars)
75
+ prepro_sampler.pd_col_genetic_transform(df,col, pars)
76
+ prepro_sampler.pd_colcat_encoder_generic(df,col, pars)
77
+ prepro_sampler.pd_filter_resample(df,col, pars)
78
+ prepro_sampler.pd_filter_rows(df,col, pars)
79
+
80
+ #### Auto-Encoder
81
+ prepro.pd_autoencoder(df,col, pars)
82
+ prepro.pd_col_genetic_transform(df,col, pars)
83
+ prepro.pd_colcat_bin(df,col, pars)
84
+ prepro.pd_colcat_encoder_generic(df,col, pars)
85
+ prepro.pd_colcat_minhash(df,col, pars)
86
+ prepro.pd_colcat_to_onehot(df,col, pars)
87
+ prepro.pd_colcross(df,col, pars)
88
+ prepro.pd_coldate(df,col, pars)
89
+ prepro.pd_colnum(df,col, pars)
90
+ prepro.pd_colnum_bin(df,col, pars)
91
+ prepro.pd_colnum_binto_onehot(df,col, pars)
92
+ prepro.pd_colnum_normalize(df,col, pars)
93
+ prepro.pd_colnum_quantile_norm(df,col, pars)
94
+ prepro.pd_coltext(df,col, pars)
95
+ prepro.pd_coltext_clean(df,col, pars)
96
+ prepro.pd_coltext_universal_google(df,col, pars)
97
+ prepro.pd_coltext_wordfreq(df,col, pars)
98
+ prepro.pd_coly(df,col, pars)
99
+ prepro.pd_filter_resample(df,col, pars)
100
+ prepro.pd_filter_rows(df,col, pars)
101
+ prepro.pd_label_clean(df,col, pars)
102
+
103
+ #### Time Series
104
+ prepro_tseries.pd_ts_autoregressive(df,col, pars)
105
+ prepro_tseries.pd_ts_basic(df,col, pars)
106
+ prepro_tseries.pd_ts_date(df,col, pars)
107
+ prepro_tseries.pd_ts_detrend(df,col, pars)
108
+ prepro_tseries.pd_ts_generic(df,col, pars)
109
+ prepro_tseries.pd_ts_groupby(df,col, pars)
110
+ prepro_tseries.pd_ts_identity(df,col, pars)
111
+ prepro_tseries.pd_ts_lag(df,col, pars)
112
+ prepro_tseries.pd_ts_onehot(df,col, pars)
113
+ prepro_tseries.pd_ts_rolling(df,col, pars)
114
+ prepro_tseries.pd_ts_template(df,col, pars)
129
115
130
116
131
117
0 commit comments