Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions marimo/_plugins/ui/_impl/altair_chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
can_narwhalify,
empty_df,
is_narwhals_lazyframe,
make_lazy,
)

LOGGER = _loggers.marimo_logger()
Expand Down Expand Up @@ -122,9 +123,7 @@ def _filter_dataframe(
binned_fields: Optional[dict[str, Any]] = None,
) -> Union[IntoDataFrame, IntoLazyFrame]:
# Use lazy evaluation for efficient chained filtering
base = nw.from_native(native_df)
is_lazy = is_narwhals_lazyframe(base)
df = base.lazy()
df, undo_df = make_lazy(native_df)

if not isinstance(selection, dict):
raise TypeError("Input 'selection' must be a dictionary")
Expand Down Expand Up @@ -260,11 +259,7 @@ def _filter_dataframe(
# Continue without this filter - don't break the entire operation
continue

if not is_lazy and is_narwhals_lazyframe(df):
# Undo the lazy
return df.collect().to_native() # type: ignore[no-any-return]

return df.to_native()
return undo_df(df)


def _resolve_values(values: Any, dtype: Any) -> list[Any]:
Expand Down
22 changes: 8 additions & 14 deletions marimo/_plugins/ui/_impl/charts/altair_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
get_table_manager_or_none,
)
from marimo._utils.data_uri import build_data_url
from marimo._utils.narwhals_utils import can_narwhalify, is_narwhals_lazyframe
from marimo._utils.narwhals_utils import (
can_narwhalify,
make_lazy,
)

LOGGER = _loggers.marimo_logger()

Expand Down Expand Up @@ -153,31 +156,22 @@ def _maybe_sanitize_dataframe(data: Any) -> Any:
def sanitize_nan_infs(data: Any) -> Any:
"""Sanitize NaN and Inf values in Dataframes for JSON serialization."""
if can_narwhalify(data):
narwhals_data = nw.from_native(data)
is_prev_lazy = is_narwhals_lazyframe(narwhals_data)

# Convert to lazy for optimization if not already lazy
if not is_prev_lazy:
narwhals_data = narwhals_data.lazy()
df, undo = make_lazy(data)

# Get schema without collecting
schema = narwhals_data.collect_schema()
schema = df.collect_schema()

for col, dtype in schema.items():
# Only numeric columns can have NaN or Inf values
if dtype.is_numeric():
narwhals_data = narwhals_data.with_columns(
df = df.with_columns(
nw.when(nw.col(col).is_nan() | ~nw.col(col).is_finite())
.then(None)
.otherwise(nw.col(col))
.name.keep()
)

# Collect if input was eager
if not is_prev_lazy and is_narwhals_lazyframe(narwhals_data):
narwhals_data = narwhals_data.collect()

return narwhals_data.to_native()
return undo(df)
return data


Expand Down
32 changes: 7 additions & 25 deletions marimo/_plugins/ui/_impl/dataframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
from dataclasses import dataclass
from typing import (
TYPE_CHECKING,
Any,
Callable,
Final,
Expand All @@ -14,8 +13,6 @@
Union,
)

import narwhals.stable.v2 as nw

from marimo._output.rich_help import mddoc
from marimo._plugins.ui._core.ui_element import UIElement
from marimo._plugins.ui._impl.dataframes.transforms.apply import (
Expand Down Expand Up @@ -47,12 +44,9 @@
)
from marimo._runtime.functions import EmptyArgs, Function
from marimo._utils.memoize import memoize_last_value
from marimo._utils.narwhals_utils import is_narwhals_lazyframe
from marimo._utils.narwhals_utils import make_lazy
from marimo._utils.parse_dataclass import parse_raw

if TYPE_CHECKING:
from narwhals.typing import IntoLazyFrame


@dataclass
class GetDataFrameResponse:
Expand Down Expand Up @@ -145,10 +139,8 @@ def __init__(
except Exception:
pass

# Make the dataframe lazy and keep track of whether it was lazy originally
nw_df: nw.LazyFrame[Any] = nw.from_native(df, pass_through=False)
self._was_lazy = is_narwhals_lazyframe(nw_df)
nw_df = nw_df.lazy()
# Make the dataframe lazy and keep an undo callback to restore original type
nw_df, self._undo = make_lazy(df)

self._limit = limit
self._dataframe_name = dataframe_name
Expand Down Expand Up @@ -257,22 +249,20 @@ def _get_column_values(
def _convert_value(self, value: dict[str, Any]) -> DataFrameType:
if value is None:
self._error = None
return _maybe_collect(self._data, self._was_lazy)
# Return the original data using the undo callback
return self._undo(self._transform_container._original_df)

try:
transformations = parse_raw(value, Transformations)
result = self._transform_container.apply(transformations)
self._error = None
self._last_transforms = transformations
return _maybe_collect(result, self._was_lazy)
return self._undo(result)
except Exception as e:
error = f"Error applying dataframe transform: {str(e)}\n\n"
sys.stderr.write(error)
self._error = error
return _maybe_collect(
nw.from_native(self._data, pass_through=False).lazy(),
self._was_lazy,
)
return self._undo(self._transform_container._original_df)

def _search(self, args: SearchTableArgs) -> SearchTableResponse:
offset = args.page_number * args.page_size
Expand Down Expand Up @@ -341,11 +331,3 @@ def _get_cached_table_manager(
if limit is not None:
tm = tm.take(limit, 0)
return tm


def _maybe_collect(
df: nw.LazyFrame[IntoLazyFrame], was_lazy: bool
) -> DataFrameType:
if was_lazy:
return df.collect().to_native() # type: ignore[no-any-return]
return df.to_native()
15 changes: 4 additions & 11 deletions marimo/_plugins/ui/_impl/dataframes/transforms/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
TransformType,
)
from marimo._utils.assert_never import assert_never
from marimo._utils.narwhals_utils import can_narwhalify, is_narwhals_lazyframe
from marimo._utils.narwhals_utils import can_narwhalify, make_lazy

T = TypeVar("T")

Expand Down Expand Up @@ -61,22 +61,15 @@ def apply_transforms_to_df(
f"Unsupported dataframe type. Must be Pandas, Polars, Ibis, Pyarrow, or DuckDB. Got: {type(df)}"
)

import narwhals.stable.v2 as nw

nw_df = nw.from_native(df)
was_lazy = is_narwhals_lazyframe(nw_df)
nw_df = nw_df.lazy()
lazy_df, undo = make_lazy(df)

result_nw = _apply_transforms(
nw_df,
lazy_df,
NarwhalsTransformHandler(),
Transformations(transforms=[transform]),
)

if was_lazy:
return result_nw.to_native()

return result_nw.collect().to_native() # type: ignore[no-any-return]
return undo(result_nw)


def _apply_transforms(
Expand Down
11 changes: 7 additions & 4 deletions marimo/_plugins/ui/_impl/dataframes/transforms/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
UniqueTransform,
)
from marimo._utils.assert_never import assert_never
from marimo._utils.narwhals_utils import collect_and_preserve_type

if TYPE_CHECKING:
import polars as pl
Expand Down Expand Up @@ -293,20 +294,22 @@ def handle_shuffle_rows(
df: DataFrame, transform: ShuffleRowsTransform
) -> DataFrame:
# Note: narwhals sample requires collecting first for shuffle with seed
result = df.collect().sample(fraction=1, seed=transform.seed)
return result.lazy()
collected_df, undo = collect_and_preserve_type(df)
result = collected_df.sample(fraction=1, seed=transform.seed)
return undo(result)

@staticmethod
def handle_sample_rows(
df: DataFrame, transform: SampleRowsTransform
) -> DataFrame:
# Note: narwhals sample requires collecting first for shuffle with seed
result = df.collect().sample(
collected_df, undo = collect_and_preserve_type(df)
result = collected_df.sample(
n=transform.n,
seed=transform.seed,
with_replacement=transform.replace,
)
return result.lazy()
return undo(result)

@staticmethod
def handle_explode_columns(
Expand Down
119 changes: 117 additions & 2 deletions marimo/_utils/narwhals_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING, Any, Union, overload
from typing import TYPE_CHECKING, Any, Callable, Union, overload

import narwhals as nw_main
import narwhals.dtypes as nw_dtypes
import narwhals.stable.v1 as nw1
import narwhals.stable.v2 as nw
from narwhals.typing import IntoDataFrame

from marimo import _loggers

LOGGER = _loggers.marimo_logger()

if sys.version_info < (3, 11):
from typing_extensions import TypeGuard
else:
from typing import TypeGuard


if TYPE_CHECKING:
from narwhals.typing import IntoDataFrame, IntoFrame, IntoLazyFrame
from narwhals.typing import (
IntoBackend,
IntoDataFrame,
IntoFrame,
IntoLazyFrame,
)
from typing_extensions import TypeIs


Expand Down Expand Up @@ -239,3 +248,109 @@ def is_narwhals_dataframe(df: Any) -> TypeIs[nw.DataFrame[Any]]:
or isinstance(df, nw_main.DataFrame)
or isinstance(df, nw1.DataFrame)
)


if TYPE_CHECKING:
UndoCallback = Callable[
[Union[nw.LazyFrame[Any], nw.DataFrame[Any]]], IntoFrame
]


def _to_lazyframe(
df: Union[nw.DataFrame[Any], nw.LazyFrame[Any]],
original_backend: IntoBackend[Any],
) -> nw.LazyFrame[Any]:
if is_narwhals_lazyframe(df):
return df
else:
try:
# Try to convert to the original backend. This backend must be a "lazy backend"
# e.g., Ibis, DuckDB, etc.
return df.lazy(backend=original_backend)
except ValueError:
# This error is expected in most cases. For example, if the original
# backend was not a "lazy backend" (e.g., Pandas), Narwhals will
# raise a ValueError. In this case, we just make a default lazyframe.
return df.lazy()


def _to_dataframe(
df: Union[nw.DataFrame[Any], nw.LazyFrame[Any]],
) -> nw.DataFrame[Any]:
if is_narwhals_dataframe(df):
return df
else:
return df.collect()


def make_lazy(
df: IntoFrame,
) -> tuple[nw.LazyFrame[Any], UndoCallback]:
"""
Convert a dataframe to a lazy narwhals LazyFrame and return an undo callback.

This utility tracks whether the original dataframe was lazy or eager,
and provides a callback to convert back to the original type.

Args:
df: A dataframe that can be narwhalified (Pandas, Polars, Ibis, etc.)

Returns:
A tuple of:
- nw.LazyFrame: The lazy version of the dataframe
- undo: A callback that takes a LazyFrame and converts it back to the
original type (lazy or eager), returning the native dataframe

Example:
>>> lazy_df, undo = make_lazy(ibis_table)
>>> # Do transformations on lazy_df
>>> result = undo(lazy_df) # Returns Ibis table (still lazy)
"""
nw_df = nw.from_native(df, pass_through=False)
was_lazy = is_narwhals_lazyframe(nw_df)
original_backend = nw_df.implementation
lazy_df = nw_df.lazy()

def undo(result: Union[nw.LazyFrame[Any], nw.DataFrame[Any]]) -> Any:
"""Convert back to the original type (lazy or eager)."""
if not is_narwhals_dataframe(result) and not is_narwhals_lazyframe(
result
):
LOGGER.warning(
"Expected a narwhals DataFrame or LazyFrame, got %s",
type(result),
)
return result

if was_lazy:
return _to_lazyframe(result, original_backend).to_native()
else:
return _to_dataframe(result).to_native()

return lazy_df, undo


def collect_and_preserve_type(
df: nw.LazyFrame[Any],
) -> tuple[
nw.DataFrame[Any], Callable[[nw.DataFrame[Any]], nw.LazyFrame[Any]]
]:
"""
Collect a narwhals LazyFrame to DataFrame, preserving the original backend.

This is useful since when you collect an Ibis or DuckDB dataframe, making them
lazy does not convert them back to their original backend.
"""
original_backend = df.implementation

def undo(result: nw.DataFrame[Any]) -> nw.LazyFrame[Any]:
"""Convert back to the original backend as a LazyFrame."""
if not is_narwhals_dataframe(result):
LOGGER.warning(
"Expected a narwhals DataFrame, got %s", type(result)
)
return result.lazy()
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the input is not a narwhals DataFrame, the function logs a warning but then calls result.lazy(), which will likely fail if result is not a narwhals object. The function should either return early after the warning or handle the non-DataFrame case more gracefully by raising an exception or attempting to convert it.

Suggested change
return result.lazy()
return result

Copilot uses AI. Check for mistakes.

return _to_lazyframe(result, original_backend)

return df.collect(), undo
Loading
Loading