Skip to content

feat!: rename ml model params #491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix tests
  • Loading branch information
GarrettWu committed Mar 22, 2024
commit 57a8b65ee0bc8c38b10ec0fba4ecb8692513980e
4 changes: 4 additions & 0 deletions bigframes/ml/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
return {
"model_type": "BOOSTED_TREE_REGRESSOR",
"data_split_method": "NO_SPLIT",
"early_stop": True,
"num_parallel_tree": self.n_estimators,
"booster_type": self.booster,
"tree_method": self.tree_method,
Expand Down Expand Up @@ -279,6 +280,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
return {
"model_type": "BOOSTED_TREE_CLASSIFIER",
"data_split_method": "NO_SPLIT",
"early_stop": True,
"num_parallel_tree": self.n_estimators,
"booster_type": self.booster,
"tree_method": self.tree_method,
Expand Down Expand Up @@ -423,6 +425,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
"""The model options as they will be set for BQML"""
return {
"model_type": "RANDOM_FOREST_REGRESSOR",
"early_stop": True,
"num_parallel_tree": self.n_estimators,
"tree_method": self.tree_method,
"min_tree_child_weight": self.min_tree_child_weight,
Expand Down Expand Up @@ -585,6 +588,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
"""The model options as they will be set for BQML"""
return {
"model_type": "RANDOM_FOREST_CLASSIFIER",
"early_stop": True,
"num_parallel_tree": self.n_estimators,
"tree_method": self.tree_method,
"min_tree_child_weight": self.min_tree_child_weight,
Expand Down
2 changes: 1 addition & 1 deletion bigframes/ml/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def _bqml_options(self) -> dict:
"optimize_strategy": self.optimize_strategy,
"l2_reg": self.l2_reg,
"max_iterations": self.max_iterations,
"learning_rate_strategy": self.learning_rate_strategy,
"learn_rate_strategy": self.learning_rate_strategy,
"min_rel_progress": self.tol,
"calculate_p_values": self.calculate_p_values,
"enable_global_explain": self.enable_global_explain,
Expand Down
16 changes: 8 additions & 8 deletions tests/system/large/ml/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import bigframes.ml.ensemble


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_xgbregressor_default_params(penguins_df_default_index, dataset_id):
model = bigframes.ml.ensemble.XGBRegressor()

Expand Down Expand Up @@ -64,7 +64,7 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id):
)


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_xgbregressor_dart_booster_multiple_params(
penguins_df_default_index, dataset_id
):
Expand Down Expand Up @@ -138,7 +138,7 @@ def test_xgbregressor_dart_booster_multiple_params(
assert reloaded_model.n_estimators == 2


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id):
model = bigframes.ml.ensemble.XGBClassifier()

Expand Down Expand Up @@ -178,7 +178,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id):
)


# @pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_xgbclassifier_dart_booster_multiple_params(
penguins_df_default_index, dataset_id
):
Expand Down Expand Up @@ -251,7 +251,7 @@ def test_xgbclassifier_dart_booster_multiple_params(
assert reloaded_model.n_estimators == 2


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_randomforestregressor_default_params(penguins_df_default_index, dataset_id):
model = bigframes.ml.ensemble.RandomForestRegressor()

Expand Down Expand Up @@ -292,7 +292,7 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset
)


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_randomforestregressor_multiple_params(penguins_df_default_index, dataset_id):
model = bigframes.ml.ensemble.RandomForestRegressor(
tree_method="auto",
Expand Down Expand Up @@ -358,7 +358,7 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase
assert reloaded_model.enable_global_explain is False


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_randomforestclassifier_default_params(penguins_df_default_index, dataset_id):
model = bigframes.ml.ensemble.RandomForestClassifier()

Expand Down Expand Up @@ -398,7 +398,7 @@ def test_randomforestclassifier_default_params(penguins_df_default_index, datase
)


@pytest.mark.flaky(retries=2, delay=120)
@pytest.mark.flaky(retries=2)
def test_randomforestclassifier_multiple_params(penguins_df_default_index, dataset_id):
model = bigframes.ml.ensemble.RandomForestClassifier(
tree_method="AUTO",
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/ml/test_golden_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_linear_regression_default_fit(
model.fit(mock_X, mock_y)

mock_session._start_query_ml_ddl.assert_called_once_with(
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="auto_strategy",\n fit_intercept=True,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n early_stop=True,\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="auto_strategy",\n fit_intercept=True,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
)


Expand All @@ -115,7 +115,7 @@ def test_linear_regression_params_fit(bqml_model_factory, mock_session, mock_X,
model.fit(mock_X, mock_y)

mock_session._start_query_ml_ddl.assert_called_once_with(
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="auto_strategy",\n fit_intercept=False,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n early_stop=True,\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="auto_strategy",\n fit_intercept=False,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
)


Expand Down Expand Up @@ -148,7 +148,7 @@ def test_logistic_regression_default_fit(
model.fit(mock_X, mock_y)

mock_session._start_query_ml_ddl.assert_called_once_with(
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=True,\n auto_class_weights=False,\n optimize_strategy="auto_strategy",\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=True,\n auto_class_weights=False,\n optimize_strategy="auto_strategy",\n l2_reg=0.0,\n max_iterations=20,\n learning_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
)


Expand All @@ -170,7 +170,7 @@ def test_logistic_regression_params_fit(
model.fit(mock_X, mock_y)

mock_session._start_query_ml_ddl.assert_called_once_with(
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=False,\n auto_class_weights=True,\n optimize_strategy="batch_gradient_descent",\n l2_reg=0.2,\n max_iterations=30,\n learn_rate_strategy="constant",\n min_rel_progress=0.02,\n calculate_p_values=False,\n enable_global_explain=False,\n l1_reg=0.2,\n learn_rate=0.2,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=False,\n auto_class_weights=True,\n optimize_strategy="batch_gradient_descent",\n l2_reg=0.2,\n max_iterations=30,\n learning_rate_strategy="constant",\n min_rel_progress=0.02,\n calculate_p_values=False,\n enable_global_explain=False,\n l1_reg=0.2,\n learn_rate=0.2,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql'
)


Expand Down