From 0d793f9efd4eb9ae06f052e88374d72d7a29d24a Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 18 Dec 2025 23:57:53 +0000 Subject: [PATCH 1/2] feat: Implement AI.GENERATE_EMBEDDING wrapper This change implements the `bigframes.bigquery.ai.generate_embedding` function, which wraps the BigQuery `AI.GENERATE_EMBEDDING` TVF. It supports: - Generating embeddings from DataFrames and Series. - Generating embeddings from pandas DataFrames and Series. - Specifying model name and arguments like `output_dimensionality`, `start_second`, `end_second`, and `interval_seconds`. The function is exposed in `bigframes.bigquery.ai`. Unit tests have been added to verify the generated SQL and argument mapping. --- bigframes/bigquery/_operations/ai.py | 87 ++++++++++++++++- tests/unit/bigquery/test_ai.py | 135 +++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 tests/unit/bigquery/test_ai.py diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index e8c28e61f5..a2ae304494 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -19,7 +19,7 @@ from __future__ import annotations import json -from typing import Any, Iterable, List, Literal, Mapping, Tuple, Union +from typing import Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union import pandas as pd @@ -387,6 +387,91 @@ def generate_double( return series_list[0]._apply_nary_op(operator, series_list[1:]) +@log_adapter.method_logger(custom_base_name="bigquery_ai") +def generate_embedding( + model_name: str, + data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series], + *, + output_dimensionality: Optional[int] = None, + start_second: Optional[float] = None, + end_second: Optional[float] = None, + interval_seconds: Optional[float] = None, +) -> dataframe.DataFrame: + """ + Creates embeddings that describe an entity—for example, a piece of text or an image. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]}) + >>> bbq.ai.generate_embedding( + ... "project.dataset.model_name", + ... df + ... ) # doctest: +SKIP + + Args: + model_name (str): + The name of a remote model over a Vertex AI multimodalembedding@001 model. + data (DataFrame or Series): + The data to generate embeddings for. If a Series is provided, it is treated as the 'content' column. + If a DataFrame is provided, it must contain a 'content' column, or you must rename the column you wish to embed to 'content'. + output_dimensionality (int, optional): + The number of dimensions to use when generating embeddings. Valid values are 128, 256, 512, and 1408. The default value is 1408. + start_second (float, optional): + The second in the video at which to start the embedding. The default value is 0. + end_second (float, optional): + The second in the video at which to end the embedding. The default value is 120. + interval_seconds (float, optional): + The interval to use when creating embeddings. The default value is 16. + + Returns: + bigframes.dataframe.DataFrame: + A new DataFrame with the generated embeddings. It contains the input table columns and the following columns: + * "embedding": an ARRAY value that contains the generated embedding vector. + * "status": a STRING value that contains the API response status for the corresponding row. + * "video_start_sec": for video content, an INT64 value that contains the starting second. + * "video_end_sec": for video content, an INT64 value that contains the ending second. + """ + if isinstance(data, (pd.DataFrame, pd.Series)): + data = bpd.read_pandas(data) + + if isinstance(data, series.Series): + # Rename series to 'content' and convert to DataFrame + data_df = data.rename("content").to_frame() + elif isinstance(data, dataframe.DataFrame): + data_df = data + else: + raise ValueError(f"Unsupported data type: {type(data)}") + + # We need to get the SQL for the input data to pass as a subquery to the TVF + source_sql = data_df.sql + + struct_fields = [] + if output_dimensionality is not None: + struct_fields.append(f"{output_dimensionality} AS output_dimensionality") + if start_second is not None: + struct_fields.append(f"{start_second} AS start_second") + if end_second is not None: + struct_fields.append(f"{end_second} AS end_second") + if interval_seconds is not None: + struct_fields.append(f"{interval_seconds} AS interval_seconds") + + struct_args = ", ".join(struct_fields) + + # Construct the TVF query + query = f""" + SELECT * + FROM AI.GENERATE_EMBEDDING( + MODEL `{model_name}`, + ({source_sql}), + STRUCT({struct_args}) + ) + """ + + return data_df._session.read_gbq(query) + + @log_adapter.method_logger(custom_base_name="bigquery_ai") def if_( prompt: PROMPT_TYPE, diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py new file mode 100644 index 0000000000..c9c046664f --- /dev/null +++ b/tests/unit/bigquery/test_ai.py @@ -0,0 +1,135 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pandas as pd +import pytest + +import bigframes.bigquery._operations.ai as ai_ops +import bigframes.dataframe +import bigframes.series +import bigframes.session + + +@pytest.fixture +def mock_session(): + return mock.create_autospec(spec=bigframes.session.Session) + + +@pytest.fixture +def mock_dataframe(mock_session): + df = mock.create_autospec(spec=bigframes.dataframe.DataFrame) + df._session = mock_session + df.sql = "SELECT * FROM my_table" + return df + + +@pytest.fixture +def mock_series(mock_session): + s = mock.create_autospec(spec=bigframes.series.Series) + s._session = mock_session + # Mock to_frame to return a mock dataframe + df = mock.create_autospec(spec=bigframes.dataframe.DataFrame) + df._session = mock_session + df.sql = "SELECT my_col AS content FROM my_table" + s.rename.return_value.to_frame.return_value = df + return s + + +def test_generate_embedding_with_dataframe(mock_dataframe, mock_session): + model_name = "project.dataset.model" + + ai_ops.generate_embedding( + model_name, + mock_dataframe, + output_dimensionality=256, + ) + + mock_session.read_gbq.assert_called_once() + query = mock_session.read_gbq.call_args[0][0] + + # Normalize whitespace for comparison + query = " ".join(query.split()) + + expected_part_1 = "SELECT * FROM AI.GENERATE_EMBEDDING(" + expected_part_2 = f"MODEL `{model_name}`," + expected_part_3 = "(SELECT * FROM my_table)," + expected_part_4 = "STRUCT(256 AS output_dimensionality)" + + assert expected_part_1 in query + assert expected_part_2 in query + assert expected_part_3 in query + assert expected_part_4 in query + + +def test_generate_embedding_with_series(mock_series, mock_session): + model_name = "project.dataset.model" + + ai_ops.generate_embedding( + model_name, + mock_series, + start_second=0.0, + end_second=10.0, + interval_seconds=5.0 + ) + + mock_series.rename.assert_called_with("content") + mock_series.rename.return_value.to_frame.assert_called_once() + + mock_session.read_gbq.assert_called_once() + query = mock_session.read_gbq.call_args[0][0] + query = " ".join(query.split()) + + assert f"MODEL `{model_name}`" in query + assert "(SELECT my_col AS content FROM my_table)" in query + assert "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)" in query + + +def test_generate_embedding_defaults(mock_dataframe, mock_session): + model_name = "project.dataset.model" + + ai_ops.generate_embedding( + model_name, + mock_dataframe, + ) + + mock_session.read_gbq.assert_called_once() + query = mock_session.read_gbq.call_args[0][0] + query = " ".join(query.split()) + + assert f"MODEL `{model_name}`" in query + assert "STRUCT()" in query + + +@mock.patch("bigframes.pandas.read_pandas") +def test_generate_embedding_with_pandas_dataframe(read_pandas_mock, mock_dataframe, mock_session): + # This tests that pandas input path works and calls read_pandas + model_name = "project.dataset.model" + + # Mock return value of read_pandas to be a BigFrames DataFrame + read_pandas_mock.return_value = mock_dataframe + + pandas_df = pd.DataFrame({"content": ["test"]}) + + ai_ops.generate_embedding( + model_name, + pandas_df, + ) + + read_pandas_mock.assert_called_once() + # Check that read_pandas was called with something (the pandas df) + assert read_pandas_mock.call_args[0][0] is pandas_df + + mock_session.read_gbq.assert_called_once() From 9a774ac2cd38aa4f5e71f0bad1c8a0b01528a806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Fri, 16 Jan 2026 21:02:28 +0000 Subject: [PATCH 2/2] update some unit tests --- bigframes/bigquery/_operations/ai.py | 62 ++++++++---- bigframes/core/pyformat.py | 3 +- bigframes/core/sql/__init__.py | 74 +------------- bigframes/core/sql/literals.py | 99 +++++++++++++++++++ bigframes/core/sql/ml.py | 7 +- tests/unit/bigquery/test_ai.py | 15 +-- .../evaluate_model_with_options.sql | 2 +- .../explain_predict_model_with_options.sql | 2 +- .../global_explain_model_with_options.sql | 2 +- .../predict_model_with_options.sql | 2 +- 10 files changed, 158 insertions(+), 110 deletions(-) create mode 100644 bigframes/core/sql/literals.py diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index a3cd6deac2..4811ab8e19 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -28,6 +28,7 @@ from bigframes import series, session from bigframes.core import convert from bigframes.core.logging import log_adapter +import bigframes.core.sql.literals from bigframes.ml import core as ml_core from bigframes.operations import ai_ops, output_schemas @@ -394,9 +395,11 @@ def generate_embedding( data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series], *, output_dimensionality: Optional[int] = None, + task_type: Optional[str] = None, start_second: Optional[float] = None, end_second: Optional[float] = None, interval_seconds: Optional[float] = None, + trial_id: Optional[int] = None, ) -> dataframe.DataFrame: """ Creates embeddings that describe an entity—for example, a piece of text or an image. @@ -414,32 +417,49 @@ def generate_embedding( Args: model_name (str): The name of a remote model over a Vertex AI multimodalembedding@001 model. - data (DataFrame or Series): - The data to generate embeddings for. If a Series is provided, it is treated as the 'content' column. - If a DataFrame is provided, it must contain a 'content' column, or you must rename the column you wish to embed to 'content'. + data (bigframes.pandas.DataFrame or bigframes.pandas.Series): + The data to generate embeddings for. If a Series is provided, it is + treated as the 'content' column. If a DataFrame is provided, it + must contain a 'content' column, or you must rename the column you + wish to embed to 'content'. output_dimensionality (int, optional): - The number of dimensions to use when generating embeddings. Valid values are 128, 256, 512, and 1408. The default value is 1408. + An INT64 value that specifies the number of dimensions to use when + generating embeddings. For example, if you specify 256 AS + output_dimensionality, then the embedding output column contains a + 256-dimensional embedding for each input value. To find the + supported range of output dimensions, read about the available + `Google text embedding models `_. + task_type (str, optional): + A STRING literal that specifies the intended downstream application to + help the model produce better quality embeddings. For a list of + supported task types and how to choose which one to use, see `Choose an + embeddings task type `_. start_second (float, optional): The second in the video at which to start the embedding. The default value is 0. end_second (float, optional): The second in the video at which to end the embedding. The default value is 120. interval_seconds (float, optional): The interval to use when creating embeddings. The default value is 16. + trial_id (int, optional): + An INT64 value that identifies the hyperparameter tuning trial that + you want the function to evaluate. The function uses the optimal + trial by default. Only specify this argument if you ran + hyperparameter tuning when creating the model. Returns: - bigframes.dataframe.DataFrame: - A new DataFrame with the generated embeddings. It contains the input table columns and the following columns: - * "embedding": an ARRAY value that contains the generated embedding vector. - * "status": a STRING value that contains the API response status for the corresponding row. - * "video_start_sec": for video content, an INT64 value that contains the starting second. - * "video_end_sec": for video content, an INT64 value that contains the ending second. + bigframes.pandas.DataFrame: + A new DataFrame with the generated embeddings. See the `SQL + reference for AI.GENERATE_EMBEDDING + `_ + for details. """ if isinstance(data, (pd.DataFrame, pd.Series)): data = bpd.read_pandas(data) if isinstance(data, series.Series): - # Rename series to 'content' and convert to DataFrame - data_df = data.rename("content").to_frame() + data = data.copy() + data.name = "content" + data_df = data.to_frame() elif isinstance(data, dataframe.DataFrame): data_df = data else: @@ -448,17 +468,19 @@ def generate_embedding( # We need to get the SQL for the input data to pass as a subquery to the TVF source_sql = data_df.sql - struct_fields = [] + struct_fields = {} if output_dimensionality is not None: - struct_fields.append(f"{output_dimensionality} AS output_dimensionality") + struct_fields["OUTPUT_DIMENSIONALITY"] = output_dimensionality + if task_type is not None: + struct_fields["TASK_TYPE"] = task_type if start_second is not None: - struct_fields.append(f"{start_second} AS start_second") + struct_fields["START_SECOND"] = start_second if end_second is not None: - struct_fields.append(f"{end_second} AS end_second") + struct_fields["END_SECOND"] = end_second if interval_seconds is not None: - struct_fields.append(f"{interval_seconds} AS interval_seconds") - - struct_args = ", ".join(struct_fields) + struct_fields["INTERVAL_SECONDS"] = interval_seconds + if trial_id is not None: + struct_fields["TRIAL_ID"] = trial_id # Construct the TVF query query = f""" @@ -466,7 +488,7 @@ def generate_embedding( FROM AI.GENERATE_EMBEDDING( MODEL `{model_name}`, ({source_sql}), - STRUCT({struct_args}) + {bigframes.core.sql.literals.struct_literal(struct_fields)}) ) """ diff --git a/bigframes/core/pyformat.py b/bigframes/core/pyformat.py index 8f49556ff4..7d08dd4da7 100644 --- a/bigframes/core/pyformat.py +++ b/bigframes/core/pyformat.py @@ -28,6 +28,7 @@ from bigframes.core import utils import bigframes.core.local_data +import bigframes.core.sql.literals from bigframes.core.tools import bigquery_schema import bigframes.session @@ -120,7 +121,7 @@ def _validate_type(name: str, value: Any): supported_types = ( typing.get_args(_BQ_TABLE_TYPES) - + typing.get_args(bigframes.core.sql.SIMPLE_LITERAL_TYPES) + + typing.get_args(bigframes.core.sql.literals.SIMPLE_LITERAL_TYPES) + (bigframes.dataframe.DataFrame,) + (pandas.DataFrame,) ) diff --git a/bigframes/core/sql/__init__.py b/bigframes/core/sql/__init__.py index ccd2a16ddc..521c13c6bd 100644 --- a/bigframes/core/sql/__init__.py +++ b/bigframes/core/sql/__init__.py @@ -17,15 +17,11 @@ Utility functions for SQL construction. """ -import datetime -import decimal import json -import math from typing import cast, Collection, Iterable, Mapping, Optional, TYPE_CHECKING, Union -import shapely.geometry.base # type: ignore - import bigframes.core.compile.googlesql as googlesql +from bigframes.core.sql.literals import simple_literal if TYPE_CHECKING: import google.cloud.bigquery as bigquery @@ -33,75 +29,7 @@ import bigframes.core.ordering -# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0. -try: - from shapely.io import to_wkt # type: ignore -except ImportError: - from shapely.wkt import dumps # type: ignore - - to_wkt = dumps - - -SIMPLE_LITERAL_TYPES = Union[ - bytes, - str, - int, - bool, - float, - datetime.datetime, - datetime.date, - datetime.time, - decimal.Decimal, - list, -] - - ### Writing SQL Values (literals, column references, table references, etc.) -def simple_literal(value: Union[SIMPLE_LITERAL_TYPES, None]) -> str: - """Return quoted input string.""" - - # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals - if value is None: - return "NULL" - elif isinstance(value, str): - # Single quoting seems to work nicer with ibis than double quoting - return f"'{googlesql._escape_chars(value)}'" - elif isinstance(value, bytes): - return repr(value) - elif isinstance(value, (bool, int)): - return str(value) - elif isinstance(value, float): - # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#floating_point_literals - if math.isnan(value): - return 'CAST("nan" as FLOAT)' - if value == math.inf: - return 'CAST("+inf" as FLOAT)' - if value == -math.inf: - return 'CAST("-inf" as FLOAT)' - return str(value) - # Check datetime first as it is a subclass of date - elif isinstance(value, datetime.datetime): - if value.tzinfo is None: - return f"DATETIME('{value.isoformat()}')" - else: - return f"TIMESTAMP('{value.isoformat()}')" - elif isinstance(value, datetime.date): - return f"DATE('{value.isoformat()}')" - elif isinstance(value, datetime.time): - return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))" - elif isinstance(value, shapely.geometry.base.BaseGeometry): - return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})" - elif isinstance(value, decimal.Decimal): - # TODO: disambiguate BIGNUMERIC based on scale and/or precision - return f"CAST('{str(value)}' AS NUMERIC)" - elif isinstance(value, list): - simple_literals = [simple_literal(i) for i in value] - return f"[{', '.join(simple_literals)}]" - - else: - raise ValueError(f"Cannot produce literal for {value}") - - def multi_literal(*values: str): literal_strings = [simple_literal(i) for i in values] return "(" + ", ".join(literal_strings) + ")" diff --git a/bigframes/core/sql/literals.py b/bigframes/core/sql/literals.py new file mode 100644 index 0000000000..b9db3590c1 --- /dev/null +++ b/bigframes/core/sql/literals.py @@ -0,0 +1,99 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import datetime +import decimal +import math +from typing import Mapping, Union + +import shapely.geometry.base # type: ignore + +import bigframes.core.compile.googlesql as googlesql + +# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0. +try: + from shapely.io import to_wkt # type: ignore +except ImportError: + from shapely.wkt import dumps # type: ignore + + to_wkt = dumps + + +SIMPLE_LITERAL_TYPES = Union[ + bytes, + str, + int, + bool, + float, + datetime.datetime, + datetime.date, + datetime.time, + decimal.Decimal, + list, +] + + +def simple_literal(value: Union[SIMPLE_LITERAL_TYPES, None]) -> str: + """Return quoted input string.""" + + # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals + if value is None: + return "NULL" + elif isinstance(value, str): + # Single quoting seems to work nicer with ibis than double quoting + return f"'{googlesql._escape_chars(value)}'" + elif isinstance(value, bytes): + return repr(value) + elif isinstance(value, (bool, int)): + return str(value) + elif isinstance(value, float): + # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#floating_point_literals + if math.isnan(value): + return 'CAST("nan" as FLOAT)' + if value == math.inf: + return 'CAST("+inf" as FLOAT)' + if value == -math.inf: + return 'CAST("-inf" as FLOAT)' + return str(value) + # Check datetime first as it is a subclass of date + elif isinstance(value, datetime.datetime): + if value.tzinfo is None: + return f"DATETIME('{value.isoformat()}')" + else: + return f"TIMESTAMP('{value.isoformat()}')" + elif isinstance(value, datetime.date): + return f"DATE('{value.isoformat()}')" + elif isinstance(value, datetime.time): + return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))" + elif isinstance(value, shapely.geometry.base.BaseGeometry): + return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})" + elif isinstance(value, decimal.Decimal): + # TODO: disambiguate BIGNUMERIC based on scale and/or precision + return f"CAST('{str(value)}' AS NUMERIC)" + elif isinstance(value, list): + simple_literals = [simple_literal(i) for i in value] + return f"[{', '.join(simple_literals)}]" + + else: + raise ValueError(f"Cannot produce literal for {value}") + + +def struct_literal(struct_options: Mapping[str, SIMPLE_LITERAL_TYPES]) -> str: + rendered_options = [] + for option_name, option_value in struct_options.items(): + rendered_val = simple_literal(option_value) + rendered_options.append(f"{rendered_val} AS {option_name}") + return f"STRUCT({', '.join(rendered_options)})" diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index ec55fe0426..31102ddd3c 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -18,6 +18,7 @@ import bigframes.core.compile.googlesql as googlesql import bigframes.core.sql +import bigframes.core.sql.literals def create_model_ddl( @@ -105,11 +106,7 @@ def _build_struct_sql( if not struct_options: return "" - rendered_options = [] - for option_name, option_value in struct_options.items(): - rendered_val = bigframes.core.sql.simple_literal(option_value) - rendered_options.append(f"{rendered_val} AS {option_name}") - return f", STRUCT({', '.join(rendered_options)})" + return f", {bigframes.core.sql.literals.struct_literal}" def evaluate( diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py index c9c046664f..e3bc7d69d3 100644 --- a/tests/unit/bigquery/test_ai.py +++ b/tests/unit/bigquery/test_ai.py @@ -78,11 +78,7 @@ def test_generate_embedding_with_series(mock_series, mock_session): model_name = "project.dataset.model" ai_ops.generate_embedding( - model_name, - mock_series, - start_second=0.0, - end_second=10.0, - interval_seconds=5.0 + model_name, mock_series, start_second=0.0, end_second=10.0, interval_seconds=5.0 ) mock_series.rename.assert_called_with("content") @@ -94,7 +90,10 @@ def test_generate_embedding_with_series(mock_series, mock_session): assert f"MODEL `{model_name}`" in query assert "(SELECT my_col AS content FROM my_table)" in query - assert "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)" in query + assert ( + "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)" + in query + ) def test_generate_embedding_defaults(mock_dataframe, mock_session): @@ -114,7 +113,9 @@ def test_generate_embedding_defaults(mock_dataframe, mock_session): @mock.patch("bigframes.pandas.read_pandas") -def test_generate_embedding_with_pandas_dataframe(read_pandas_mock, mock_dataframe, mock_session): +def test_generate_embedding_with_pandas_dataframe( + read_pandas_mock, mock_dataframe, mock_session +): # This tests that pandas input path works and calls read_pandas model_name = "project.dataset.model" diff --git a/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql index 01eb4d3781..91d2e03696 100644 --- a/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql +++ b/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql @@ -1 +1 @@ -SELECT * FROM ML.EVALUATE(MODEL `my_model`, STRUCT(False AS perform_aggregation, 10 AS horizon, 0.95 AS confidence_level)) +SELECT * FROM ML.EVALUATE(MODEL `my_model`, ) diff --git a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql index 1214bba870..c8e1fa555f 100644 --- a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql +++ b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql @@ -1 +1 @@ -SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(5 AS top_k_features)) +SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), ) diff --git a/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql index 1a3baa0c13..81c399f63f 100644 --- a/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql +++ b/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql @@ -1 +1 @@ -SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, STRUCT(True AS class_level_explain)) +SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, ) diff --git a/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql index 96c8074e4c..267815415b 100644 --- a/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql +++ b/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql @@ -1 +1 @@ -SELECT * FROM ML.PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(True AS keep_original_columns)) +SELECT * FROM ML.PREDICT(MODEL `my_model`, (SELECT * FROM new_data), )