Source code for lumix.indexing.dimensions

"""Index dimension definitions for multi-dimensional model indexing.

This module provides the LXIndexDimension class, which represents a single dimension
in multi-dimensional indexing. Index dimensions can be combined via cartesian products
to create multi-model indexed variables and constraints.

Classes:
    LXIndexDimension: A single dimension of a multi-dimensional index with filtering
                      and data source configuration

Example:
    Creating a dimension with filtering::

        from lumix import LXIndexDimension

        driver_dim = (
            LXIndexDimension(Driver, lambda d: d.id)
            .from_data(drivers)
            .where(lambda d: d.is_active and d.years_experience >= 2)
        )

    Using with ORM::

        driver_dim = (
            LXIndexDimension(Driver, lambda d: d.id)
            .from_model(session)
            .where(lambda d: d.is_active)
        )

See Also:
    - :class:`~lumix.indexing.cartesian.LXCartesianProduct`: Combines dimensions
    - :class:`~lumix.core.variables.LXVariable`: Uses dimensions for indexing
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Callable, Generic, List, Optional, Type, TypeVar

TModel = TypeVar("TModel")


[docs] @dataclass class LXIndexDimension(Generic[TModel]): """Represents a single dimension in multi-dimensional model indexing. An LXIndexDimension defines one dimension of a potentially multi-dimensional index space. It encapsulates the model type, indexing function, optional filters, and data source for that dimension. Dimensions can be combined via LXCartesianProduct to create multi-model indexed variables and constraints. This class is central to LumiX's data-driven modeling approach, enabling automatic expansion of variables and constraints across data instances with type safety and IDE support. Type Parameters: TModel: The data model type for this dimension (e.g., Driver, Product, Date) Attributes: model_type: The Python class representing the data model for this dimension key_func: Function to extract the index key from a model instance filter_func: Optional predicate to filter which instances to include label: Optional human-readable label for this dimension _data: Direct data instances (mutually exclusive with _session) _session: ORM session for querying instances (mutually exclusive with _data) Examples: Basic dimension with direct data:: driver_dim = ( LXIndexDimension(Driver, lambda d: d.id) .from_data(drivers) ) Dimension with filtering:: active_driver_dim = ( LXIndexDimension(Driver, lambda d: d.id) .from_data(drivers) .where(lambda d: d.is_active and d.years_experience >= 2) ) Dimension with ORM:: product_dim = ( LXIndexDimension(Product, lambda p: p.sku) .from_model(db_session) .where(lambda p: p.in_stock) ) Compound key extraction:: route_dim = ( LXIndexDimension(Route, lambda r: (r.origin, r.destination)) .from_data(routes) ) See Also: - :class:`~lumix.indexing.cartesian.LXCartesianProduct`: Combines dimensions - :class:`~lumix.core.variables.LXVariable.indexed_by_product`: Uses dimensions - Driver Scheduling Example (examples/02_driver_scheduling): Real-world usage Note: Index dimensions follow the "late binding" pattern - data instances are not retrieved until the dimension is actually used during model solving. This allows dimensions to be defined before data is available and supports dynamic data sources. """ model_type: Type[TModel] key_func: Callable[[TModel], Any] filter_func: Optional[Callable[[TModel], bool]] = None label: Optional[str] = None _data: Optional[List[TModel]] = None _session: Optional[Any] = None
[docs] def __deepcopy__(self, memo): """Custom deepcopy that detaches ORM sessions and handles lambda closures. This method enables what-if analysis on models using ORM data sources by: 1. Materializing lazy-loaded ORM data before copying 2. Detaching ORM objects from database sessions 3. Safely copying lambda functions that may capture ORM objects Args: memo: Dictionary for tracking circular references during deepcopy Returns: Deep copy of this dimension with all ORM dependencies resolved Note: After copying, the new dimension will have _session=None and all data stored in _data as detached objects safe for pickling. """ from copy import deepcopy from ..utils.copy_utils import ( materialize_and_detach_list, copy_function_detaching_closure ) # Create new instance without calling __init__ cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result # Copy simple attributes result.model_type = self.model_type result.label = self.label # Copy functions - may have closures capturing ORM objects result.key_func = copy_function_detaching_closure(self.key_func, memo) result.filter_func = ( copy_function_detaching_closure(self.filter_func, memo) if self.filter_func is not None else None ) # CRITICAL: Handle data sources # If using ORM session, materialize data before copying if self._session is not None: try: instances = self.get_instances() result._data = materialize_and_detach_list(instances, memo) except Exception as e: import warnings warnings.warn( f"Failed to materialize dimension data for {self.model_type.__name__}: {e}. " f"Dimension will be empty in the copy.", UserWarning ) result._data = [] result._session = None elif self._data is not None: # Already have data - just detach and copy result._data = materialize_and_detach_list(self._data, memo) result._session = None else: # No data source configured result._data = None result._session = None return result
[docs] def __getstate__(self): """Support for pickle protocol - detach ORM sessions before pickling. Returns: Dictionary of instance state safe for pickling """ state = self.__dict__.copy() # If using ORM session, materialize data before pickling if state.get('_session') is not None: try: instances = self.get_instances() from ..utils.copy_utils import detach_orm_object state['_data'] = [detach_orm_object(inst) for inst in instances] except Exception: state['_data'] = [] state['_session'] = None return state
[docs] def __setstate__(self, state): """Support for pickle protocol - restore from pickled state. Args: state: Dictionary of instance state from pickling """ self.__dict__.update(state)
[docs] def from_data(self, data: List[TModel]) -> LXIndexDimension[TModel]: """Provide data instances directly for this dimension. This method configures the dimension to use a pre-existing list of model instances. Use this when you have data already loaded in memory or when not using an ORM. Args: data: List of model instances for this dimension Returns: Self for method chaining Examples: Basic usage:: drivers = [Driver("D1", "Alice"), Driver("D2", "Bob")] dim = LXIndexDimension(Driver, lambda d: d.id).from_data(drivers) With filtering:: dim = ( LXIndexDimension(Driver, lambda d: d.id) .from_data(drivers) .where(lambda d: d.is_active) ) Note: This method is mutually exclusive with from_model(). If both are called, the last call takes precedence. """ self._data = data return self
[docs] def from_model(self, session: Any) -> LXIndexDimension[TModel]: """Configure dimension to query data from an ORM session. This method configures the dimension to query instances from a database using an ORM session (e.g., SQLAlchemy, Django ORM). The actual query is executed lazily when get_instances() is called. Args: session: ORM session object (SQLAlchemy Session, Django ORM manager, etc.) Returns: Self for method chaining Examples: SQLAlchemy session:: from sqlalchemy.orm import Session dim = ( LXIndexDimension(Driver, lambda d: d.id) .from_model(db_session) .where(lambda d: d.is_active) ) With additional filtering:: dim = ( LXIndexDimension(Product, lambda p: p.sku) .from_model(session) .where(lambda p: p.stock_quantity > 0) ) Note: - This method is mutually exclusive with from_data() - The actual database query happens during model solving, not at definition time - Filters applied via where() are evaluated in Python after the query See Also: - :mod:`lumix.utils.orm`: ORM integration utilities """ self._session = session return self
[docs] def get_instances(self) -> List[TModel]: """Retrieve and filter data instances for this dimension. This method retrieves instances from the configured data source (either direct data or ORM query) and applies any filters specified via where(). This is typically called internally during model solving, not by user code. Returns: List of model instances after filtering Raises: ValueError: If no data source is configured (neither from_data() nor from_model() has been called) Examples: The method is typically called internally, but can be used for inspection:: dim = ( LXIndexDimension(Driver, lambda d: d.id) .from_data(all_drivers) .where(lambda d: d.is_active) ) # Get filtered instances active_drivers = dim.get_instances() print(f"Found {len(active_drivers)} active drivers") Note: - For ORM-based dimensions, this triggers the database query - Filters are applied in Python after data retrieval - Results are not cached; each call may return different results if data changes Implementation Details: The method follows this logic: 1. If _data is set, use it as the data source 2. Otherwise, if _session is set, query via ORM using LXTypedQuery 3. Otherwise, raise ValueError 4. Apply filter_func if present """ if self._data is not None: instances = self._data elif self._session is not None: from ..utils.orm import LXTypedQuery query = LXTypedQuery(self._session, self.model_type) instances = query.all() else: raise ValueError( f"LXIndexDimension for {self.model_type.__name__} has no data source. " "Use .from_data(data) or .from_model(session)" ) # Apply filter if present if self.filter_func is not None: instances = [inst for inst in instances if self.filter_func(inst)] return instances
[docs] def where(self, predicate: Callable[[TModel], bool]) -> LXIndexDimension[TModel]: """Apply a filter predicate to this dimension. This method adds a filter that determines which model instances from this dimension should be included. Only instances where the predicate returns True will be included in the dimension's expansion. The filter is applied within the dimension itself, before any cross-dimension filters (where_multi) are applied when using cartesian products. Args: predicate: A function that takes a model instance and returns True if it should be included, False otherwise Returns: Self for method chaining Examples: Simple filter:: dim = ( LXIndexDimension(Driver, lambda d: d.id) .from_data(drivers) .where(lambda d: d.is_active) ) Complex filter with multiple conditions:: dim = ( LXIndexDimension(Product, lambda p: p.sku) .from_data(products) .where(lambda p: p.in_stock and p.price > 0 and not p.discontinued) ) Filter with attribute check:: dim = ( LXIndexDimension(Route, lambda r: (r.origin, r.dest)) .from_data(routes) .where(lambda r: r.distance < 1000 and r.is_operational) ) Note: - Filters are evaluated in Python after data retrieval - Multiple where() calls will override previous filters (not combine them) - For multi-dimensional filtering across dimensions, use where_multi() on the variable See Also: - :meth:`lumix.core.variables.LXVariable.where_multi`: Cross-dimension filtering - :meth:`get_instances`: Where filtering is applied """ self.filter_func = predicate return self
__all__ = ["LXIndexDimension"]