diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py new file mode 100644 index 00000000..877fa9a4 --- /dev/null +++ b/dev-scripts/benchmark_lp_writer.py @@ -0,0 +1,527 @@ +#!/usr/bin/env python3 +""" +Benchmark script for LP file writing and model build performance. + +Usage: + # Benchmark LP write speed (default): + python dev-scripts/benchmark_lp_writer.py --output results.json [--label "my branch"] + + # Benchmark model build speed: + python dev-scripts/benchmark_lp_writer.py --phase build --output results.json + + # Benchmark memory usage of the built model: + python dev-scripts/benchmark_lp_writer.py --phase memory --output results.json + + # Plot comparison of two result files: + python dev-scripts/benchmark_lp_writer.py --plot master.json this_pr.json +""" + +from __future__ import annotations + +import argparse +import json +import tempfile +import time +import tracemalloc +from pathlib import Path + +import numpy as np +from numpy.random import default_rng + +from linopy import Model + +rng = default_rng(125) + + +def basic_model(n: int) -> Model: + """Create a basic model with 2*n^2 variables and 2*n^2 constraints.""" + m = Model() + N = np.arange(n) + x = m.add_variables(coords=[N, N], name="x") + y = m.add_variables(coords=[N, N], name="y") + m.add_constraints(x - y >= N, name="c1") + m.add_constraints(x + y >= 0, name="c2") + m.add_objective((2 * x).sum() + y.sum()) + return m + + +def knapsack_model(n: int) -> Model: + """Create a knapsack model with n binary variables and 1 constraint.""" + m = Model() + packages = m.add_variables(coords=[np.arange(n)], binary=True) + weight = rng.integers(1, 100, size=n) + value = rng.integers(1, 100, size=n) + m.add_constraints((weight * packages).sum() <= 200) + m.add_objective(-(value * packages).sum()) + return m + + +def pypsa_model(snapshots: int | None = None) -> Model | None: + """Create a model from the PyPSA SciGrid-DE example network.""" + try: + import pandas as pd + import pypsa + except ImportError: + return None + n = pypsa.examples.scigrid_de() + if snapshots is not None and snapshots > len(n.snapshots): + orig = n.snapshots + repeats = -(-snapshots // len(orig)) + new_index = pd.date_range(orig[0], periods=len(orig) * repeats, freq=orig.freq) + new_index = new_index[:snapshots] + n.set_snapshots(new_index) + n.optimize.create_model() + return n.model + + +# --------------------------------------------------------------------------- +# Memory measurement helpers +# --------------------------------------------------------------------------- + + +def model_nbytes(m: Model) -> dict[str, int]: + """Return byte sizes of the model's variable and constraint datasets.""" + var_bytes = sum( + v.nbytes + for name in m.variables + for v in m.variables[name].data.data_vars.values() + ) + con_bytes = sum( + v.nbytes + for name in m.constraints + for v in m.constraints[name].data.data_vars.values() + ) + return { + "var_bytes": var_bytes, + "con_bytes": con_bytes, + "total_bytes": var_bytes + con_bytes, + } + + +def measure_build_memory(builder, *args, **kwargs) -> tuple[Model, int]: + """Build a model while tracking peak memory allocation with tracemalloc.""" + tracemalloc.start() + m = builder(*args, **kwargs) + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + return m, peak + + +# --------------------------------------------------------------------------- +# Benchmark runners +# --------------------------------------------------------------------------- + + +def benchmark_lp_write( + label: str, m: Model, iterations: int = 10, io_api: str | None = None +) -> dict: + """Benchmark LP file writing speed. Returns dict with results.""" + to_file_kwargs: dict = dict(progress=False) + if io_api is not None: + to_file_kwargs["io_api"] = io_api + with tempfile.TemporaryDirectory() as tmpdir: + m.to_file(Path(tmpdir) / "warmup.lp", **to_file_kwargs) + times = [] + for i in range(iterations): + fn = Path(tmpdir) / f"bench_{i}.lp" + start = time.perf_counter() + m.to_file(fn, **to_file_kwargs) + times.append(time.perf_counter() - start) + + return _timing_result(label, m, times, phase="lp_write") + + +def benchmark_build( + label: str, builder, builder_args: tuple, iterations: int = 10 +) -> dict: + """Benchmark model build speed. Returns dict with results.""" + # warmup + builder(*builder_args) + times = [] + for _ in range(iterations): + start = time.perf_counter() + m = builder(*builder_args) + times.append(time.perf_counter() - start) + + return _timing_result(label, m, times, phase="build") + + +def benchmark_memory(label: str, builder, builder_args: tuple) -> dict: + """Benchmark memory usage of the built model.""" + m, peak_alloc = measure_build_memory(builder, *builder_args) + nb = model_nbytes(m) + nvars = int(m.nvars) + ncons = int(m.ncons) + print( + f" {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): " + f"datasets={nb['total_bytes'] / 1e6:7.2f} MB, peak_alloc={peak_alloc / 1e6:7.2f} MB" + ) + return { + "label": label, + "nvars": nvars, + "ncons": ncons, + "phase": "memory", + **nb, + "peak_alloc_bytes": peak_alloc, + } + + +def _timing_result(label: str, m: Model, times: list[float], phase: str) -> dict: + avg = float(np.mean(times)) + med = float(np.median(times)) + q25 = float(np.percentile(times, 25)) + q75 = float(np.percentile(times, 75)) + nvars = int(m.nvars) + ncons = int(m.ncons) + print( + f" {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): " + f"{med * 1000:7.1f}ms (IQR {q25 * 1000:.1f}-{q75 * 1000:.1f}ms)" + ) + return { + "label": label, + "nvars": nvars, + "ncons": ncons, + "phase": phase, + "mean_s": avg, + "median_s": med, + "q25_s": q25, + "q75_s": q75, + "times_s": times, + } + + +# --------------------------------------------------------------------------- +# Size configurations +# --------------------------------------------------------------------------- + +BASIC_SIZES = [5, 10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000, 1500, 2000] +PYPSA_SNAPS = [24, 50, 100, 200, 500, 1000] + + +def run_benchmarks( + phase: str = "lp_write", + io_api: str | None = None, + iterations: int = 10, + model_type: str = "basic", +) -> list[dict]: + """ + Run benchmarks for a single model type across sizes. + + Parameters + ---------- + phase : str + "lp_write" (default) - benchmark LP file writing speed. + "build" - benchmark model construction speed. + "memory" - measure dataset nbytes and peak allocation. + model_type : str + "basic" (default) - N from 5 to 2000, giving 50 to 8M vars. + "pypsa" - PyPSA SciGrid-DE with varying snapshot counts. + """ + results = [] + + if model_type == "basic": + print(f"\nbasic_model (2 x N^2 vars, 2 x N^2 constraints) — phase={phase}:") + for n in BASIC_SIZES: + iters = iterations * 5 if n <= 100 else iterations + if phase == "lp_write": + r = benchmark_lp_write( + f"basic N={n}", basic_model(n), iters, io_api=io_api + ) + elif phase == "build": + r = benchmark_build(f"basic N={n}", basic_model, (n,), iters) + elif phase == "memory": + r = benchmark_memory(f"basic N={n}", basic_model, (n,)) + else: + raise ValueError(f"Unknown phase: {phase!r}") + r["model"] = "basic" + r["param"] = n + results.append(r) + + elif model_type == "pypsa": + print(f"\nPyPSA SciGrid-DE — phase={phase}:") + for snaps in PYPSA_SNAPS: + if phase == "memory": + m, peak = measure_build_memory(pypsa_model, snaps) + if m is None: + print(" (skipped, pypsa not installed)") + break + nb = model_nbytes(m) + r = { + "label": f"pypsa {snaps} snaps", + "nvars": int(m.nvars), + "ncons": int(m.ncons), + "phase": "memory", + **nb, + "peak_alloc_bytes": peak, + } + print( + f" pypsa {snaps} snaps ({m.nvars:>9,} vars, {m.ncons:>9,} cons): " + f"datasets={nb['total_bytes'] / 1e6:7.2f} MB, peak_alloc={peak / 1e6:7.2f} MB" + ) + elif phase == "build": + # For PyPSA, "build" means calling pypsa_model() + pypsa_model(snaps) # warmup + times = [] + m = None + for _ in range(iterations): + start = time.perf_counter() + m = pypsa_model(snaps) + times.append(time.perf_counter() - start) + if m is None: + print(" (skipped, pypsa not installed)") + break + r = _timing_result(f"pypsa {snaps} snaps", m, times, phase="build") + else: + m = pypsa_model(snapshots=snaps) + if m is None: + print(" (skipped, pypsa not installed)") + break + r = benchmark_lp_write( + f"pypsa {snaps} snaps", m, iterations, io_api=io_api + ) + r["model"] = "pypsa" + r["param"] = snaps + results.append(r) + else: + raise ValueError(f"Unknown model_type: {model_type!r}") + + return results + + +# --------------------------------------------------------------------------- +# Plotting +# --------------------------------------------------------------------------- + + +def plot_comparison(file_old: str, file_new: str) -> None: + """Create 4-panel comparison plot from two JSON result files.""" + import matplotlib.pyplot as plt + + with open(file_old) as f: + data_old = json.load(f) + with open(file_new) as f: + data_new = json.load(f) + + label_old = data_old.get("label", Path(file_old).stem) + label_new = data_new.get("label", Path(file_new).stem) + phase = data_old["results"][0].get("phase", "lp_write") + + is_memory = phase == "memory" + + def get_stats(data): + nv = [r["nvars"] for r in data["results"]] + if is_memory: + vals = [r["total_bytes"] / 1e6 for r in data["results"]] + return nv, vals, vals, vals # no spread for memory + if "median_s" in data["results"][0]: + med = [r["median_s"] * 1000 for r in data["results"]] + lo = [r["q25_s"] * 1000 for r in data["results"]] + hi = [r["q75_s"] * 1000 for r in data["results"]] + else: + med = [r["mean_s"] * 1000 for r in data["results"]] + std = [r["std_s"] * 1000 for r in data["results"]] + lo = [m - s for m, s in zip(med, std)] + hi = [m + s for m, s in zip(med, std)] + return nv, med, lo, hi + + nv_old, med_old, lo_old, hi_old = get_stats(data_old) + nv_new, med_new, lo_new, hi_new = get_stats(data_new) + + y_label = "Memory (MB)" if is_memory else "Time (ms, median)" + title_prefix = f"{phase.replace('_', ' ').title()} Performance" + + color_old, color_new = "#1f77b4", "#ff7f0e" + + fig, axes = plt.subplots(2, 2, figsize=(14, 10)) + fig.suptitle(f"{title_prefix}: {label_old} vs {label_new}", fontsize=14) + + def plot_errorbar(ax, nv, med, lo, hi, **kwargs): + yerr_lo = [m - l for m, l in zip(med, lo)] + yerr_hi = [h - m for m, h in zip(med, hi)] + ax.errorbar(nv, med, yerr=[yerr_lo, yerr_hi], capsize=3, **kwargs) + + # Panel 1: All data, log-log + ax = axes[0, 0] + plot_errorbar( + ax, + nv_old, + med_old, + lo_old, + hi_old, + marker="o", + color=color_old, + linestyle="--", + label=label_old, + alpha=0.8, + ) + plot_errorbar( + ax, + nv_new, + med_new, + lo_new, + hi_new, + marker="s", + color=color_new, + linestyle="-", + label=label_new, + alpha=0.8, + ) + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlabel("Number of variables") + ax.set_ylabel(y_label) + ax.set_title(f"{title_prefix} vs problem size (log-log)") + ax.legend() + ax.grid(True, alpha=0.3) + + # Panel 2: Ratio (old/new) + ax = axes[0, 1] + if len(nv_old) == len(nv_new): + ratio = [o / n if n > 0 else 1 for o, n in zip(med_old, med_new)] + ax.plot(nv_old, ratio, marker="o", color="#2ca02c") + ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5) + ax.set_xscale("log") + ax.set_xlabel("Number of variables") + ratio_label = "Reduction" if is_memory else "Speedup" + ax.set_ylabel(f"{ratio_label} ({label_old} / {label_new})") + ax.set_title(f"{ratio_label} vs problem size") + ax.grid(True, alpha=0.3) + + # Panel 3: Small models + ax = axes[1, 0] + cutoff = 25000 + idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff] + idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff] + plot_errorbar( + ax, + [nv_old[i] for i in idx_old], + [med_old[i] for i in idx_old], + [lo_old[i] for i in idx_old], + [hi_old[i] for i in idx_old], + marker="o", + color=color_old, + linestyle="--", + label=label_old, + alpha=0.8, + ) + plot_errorbar( + ax, + [nv_new[i] for i in idx_new], + [med_new[i] for i in idx_new], + [lo_new[i] for i in idx_new], + [hi_new[i] for i in idx_new], + marker="s", + color=color_new, + linestyle="-", + label=label_new, + alpha=0.8, + ) + ax.set_xlabel("Number of variables") + ax.set_ylabel(y_label) + ax.set_ylim(bottom=0) + ax.set_title(f"Small models (<= {cutoff:,} vars)") + ax.legend() + ax.grid(True, alpha=0.3) + + # Panel 4: Large models + ax = axes[1, 1] + idx_old = [i for i, n in enumerate(nv_old) if n > cutoff] + idx_new = [i for i, n in enumerate(nv_new) if n > cutoff] + plot_errorbar( + ax, + [nv_old[i] for i in idx_old], + [med_old[i] for i in idx_old], + [lo_old[i] for i in idx_old], + [hi_old[i] for i in idx_old], + marker="o", + color=color_old, + linestyle="--", + label=label_old, + alpha=0.8, + ) + plot_errorbar( + ax, + [nv_new[i] for i in idx_new], + [med_new[i] for i in idx_new], + [lo_new[i] for i in idx_new], + [hi_new[i] for i in idx_new], + marker="s", + color=color_new, + linestyle="-", + label=label_new, + alpha=0.8, + ) + ax.set_xscale("log") + ax.set_xlabel("Number of variables") + ax.set_ylabel(y_label) + ax.set_title(f"Large models (> {cutoff:,} vars)") + ax.legend() + ax.grid(True, alpha=0.3) + + plt.tight_layout() + out_path = f"dev-scripts/benchmark_{phase}_comparison.png" + plt.savefig(out_path, dpi=150, bbox_inches="tight") + print(f"\nPlot saved to {out_path}") + plt.close() + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser(description="Linopy benchmark (speed & memory)") + parser.add_argument("--output", "-o", help="Save results to JSON file") + parser.add_argument("--label", default=None, help="Label for this run") + parser.add_argument("--io-api", default=None, help="io_api to pass to to_file()") + parser.add_argument( + "--phase", + default="lp_write", + choices=["lp_write", "build", "memory"], + help="What to benchmark: lp_write (default), build, or memory", + ) + parser.add_argument( + "--model", + default="basic", + choices=["basic", "pypsa"], + help="Model type to benchmark (default: basic)", + ) + parser.add_argument( + "--plot", + nargs=2, + metavar=("OLD", "NEW"), + help="Plot comparison from two JSON files", + ) + args = parser.parse_args() + + if args.plot: + plot_comparison(args.plot[0], args.plot[1]) + return + + iterations = 10 + label = args.label or "benchmark" + print( + f"Linopy benchmark — phase={args.phase}, model={args.model}, " + f"iterations={iterations}, label={label!r}" + ) + print("=" * 90) + + results = run_benchmarks( + phase=args.phase, + io_api=args.io_api, + iterations=iterations, + model_type=args.model, + ) + + output = {"label": label, "phase": args.phase, "results": results} + if args.output: + with open(args.output, "w") as f: + json.dump(output, f, indent=2) + print(f"\nResults saved to {args.output}") + else: + print("\n(use --output FILE to save results for later plotting)") + + +if __name__ == "__main__": + main() diff --git a/linopy/common.py b/linopy/common.py index 7dd97b65..5b1dfb16 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -8,7 +8,6 @@ from __future__ import annotations import operator -import os from collections.abc import Callable, Generator, Hashable, Iterable, Sequence from functools import partial, reduce, wraps from pathlib import Path @@ -18,7 +17,7 @@ import numpy as np import pandas as pd import polars as pl -from numpy import arange, signedinteger +from numpy import signedinteger from xarray import DataArray, Dataset, apply_ufunc, broadcast from xarray import align as xr_align from xarray.core import dtypes, indexing @@ -27,6 +26,7 @@ from linopy.config import options from linopy.constants import ( + DEFAULT_LABEL_DTYPE, HELPER_DIMS, SIGNS, SIGNS_alternative, @@ -333,13 +333,11 @@ def infer_schema_polars(ds: Dataset) -> dict[Hashable, pl.DataType]: dict: A dictionary mapping column names to their corresponding Polars data types. """ schema = {} - np_major_version = int(np.__version__.split(".")[0]) - use_int32 = os.name == "nt" and np_major_version < 2 for name, array in ds.items(): if np.issubdtype(array.dtype, np.integer): - schema[name] = pl.Int32 if use_int32 else pl.Int64 + schema[name] = pl.Int32 if array.dtype.itemsize <= 4 else pl.Int64 elif np.issubdtype(array.dtype, np.floating): - schema[name] = pl.Float64 # type: ignore + schema[name] = pl.Float32 if array.dtype.itemsize <= 4 else pl.Float64 # type: ignore elif np.issubdtype(array.dtype, np.bool_): schema[name] = pl.Boolean # type: ignore elif np.issubdtype(array.dtype, np.object_): @@ -523,7 +521,7 @@ def fill_missing_coords( # Fill in missing integer coordinates for dim in ds.dims: if dim not in ds.coords and dim not in skip_dims: - ds.coords[dim] = arange(ds.sizes[dim]) + ds.coords[dim] = np.arange(ds.sizes[dim], dtype=DEFAULT_LABEL_DTYPE) return ds diff --git a/linopy/constants.py b/linopy/constants.py index 021a9a10..0e71fbd8 100644 --- a/linopy/constants.py +++ b/linopy/constants.py @@ -33,6 +33,9 @@ short_LESS_EQUAL: LESS_EQUAL, } +DEFAULT_LABEL_DTYPE = np.int32 +DEFAULT_FLOAT_DTYPE = np.float32 + TERM_DIM = "_term" STACKED_TERM_DIM = "_stacked_term" GROUPED_TERM_DIM = "_grouped_term" diff --git a/linopy/expressions.py b/linopy/expressions.py index 10e243de..25d476d7 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -68,6 +68,8 @@ from linopy.config import options from linopy.constants import ( CV_DIM, + DEFAULT_FLOAT_DTYPE, + DEFAULT_LABEL_DTYPE, EQUAL, FACTOR_DIM, GREATER_EQUAL, @@ -279,7 +281,9 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression: def func(ds: Dataset) -> Dataset: ds = LinearExpression._sum(ds, str(self.groupby._group_dim)) - ds = ds.assign_coords({TERM_DIM: np.arange(len(ds._term))}) + ds = ds.assign_coords( + {TERM_DIM: np.arange(len(ds._term), dtype=DEFAULT_LABEL_DTYPE)} + ) return ds return self.map(func, **kwargs, shortcut=True) @@ -360,9 +364,11 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None: ) if np.issubdtype(data.vars, np.floating): - data = assign_multiindex_safe(data, vars=data.vars.fillna(-1).astype(int)) - if not np.issubdtype(data.coeffs, np.floating): - data["coeffs"].values = data.coeffs.values.astype(float) + data = assign_multiindex_safe( + data, vars=data.vars.fillna(-1).astype(DEFAULT_LABEL_DTYPE) + ) + if data.coeffs.dtype != DEFAULT_FLOAT_DTYPE: + data["coeffs"].values = data.coeffs.values.astype(DEFAULT_FLOAT_DTYPE) data = fill_missing_coords(data) @@ -370,9 +376,11 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None: raise ValueError("data must contain one dimension ending with '_term'") if "const" not in data: - data = data.assign(const=0.0) - elif not np.issubdtype(data.const, np.floating): - data = assign_multiindex_safe(data, const=data.const.astype(float)) + data = data.assign(const=np.float32(0.0)) + elif data.const.dtype != DEFAULT_FLOAT_DTYPE: + data = assign_multiindex_safe( + data, const=data.const.astype(DEFAULT_FLOAT_DTYPE) + ) (data,) = xr.broadcast(data, exclude=HELPER_DIMS) (coeffs_vars,) = xr.broadcast(data[["coeffs", "vars"]], exclude=[FACTOR_DIM]) @@ -1137,7 +1145,7 @@ def sanitize(self: GenericExpression) -> GenericExpression: linopy.LinearExpression """ if not np.issubdtype(self.vars.dtype, np.integer): - return self.assign(vars=self.vars.fillna(-1).astype(int)) + return self.assign(vars=self.vars.fillna(-1).astype(DEFAULT_LABEL_DTYPE)) return self @@ -1503,8 +1511,8 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: # Return arrays filled with -1 and 0.0, same length as input return np.vstack( [ - np.full(input_len, -1, dtype=float), - np.zeros(input_len, dtype=float), + np.full(input_len, -1, dtype=DEFAULT_FLOAT_DTYPE), + np.zeros(input_len, dtype=DEFAULT_FLOAT_DTYPE), ] ) @@ -1519,8 +1527,8 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: unique_coeffs = summed[unique_vars] # Pad to match input length - result_vars = np.full(input_len, -1, dtype=float) - result_coeffs = np.zeros(input_len, dtype=float) + result_vars = np.full(input_len, -1, dtype=DEFAULT_FLOAT_DTYPE) + result_coeffs = np.zeros(input_len, dtype=DEFAULT_FLOAT_DTYPE) n_unique = len(unique_vars) result_vars[:n_unique] = unique_vars @@ -1541,12 +1549,12 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: # Combined has dimensions (.., CV_DIM, TERM_DIM) # Drop terms where all vars are -1 (i.e., empty terms across all coordinates) - vars = combined.isel({CV_DIM: 0}).astype(int) + vars = combined.isel({CV_DIM: 0}).astype(DEFAULT_LABEL_DTYPE) non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM]) combined = combined.isel({TERM_DIM: non_empty_terms}) # Extract vars and coeffs from the combined result - vars = combined.isel({CV_DIM: 0}).astype(int) + vars = combined.isel({CV_DIM: 0}).astype(DEFAULT_LABEL_DTYPE) coeffs = combined.isel({CV_DIM: 1}) # Create new dataset with simplified data diff --git a/linopy/model.py b/linopy/model.py index 657b2d45..b0ad7f22 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -35,6 +35,7 @@ to_path, ) from linopy.constants import ( + DEFAULT_LABEL_DTYPE, GREATER_EQUAL, HELPER_DIMS, LESS_EQUAL, @@ -534,7 +535,14 @@ def add_variables( start = self._xCounter end = start + data.labels.size - data.labels.values = np.arange(start, end).reshape(data.labels.shape) + if end > np.iinfo(DEFAULT_LABEL_DTYPE).max: + raise ValueError( + f"Number of labels ({end}) exceeds the maximum value for " + f"{DEFAULT_LABEL_DTYPE.__name__} ({np.iinfo(DEFAULT_LABEL_DTYPE).max}). " + ) + data.labels.values = np.arange(start, end, dtype=DEFAULT_LABEL_DTYPE).reshape( + data.labels.shape + ) self._xCounter += data.labels.size if mask is not None: @@ -713,7 +721,14 @@ def add_constraints( start = self._cCounter end = start + data.labels.size - data.labels.values = np.arange(start, end).reshape(data.labels.shape) + if end > np.iinfo(DEFAULT_LABEL_DTYPE).max: + raise ValueError( + f"Number of labels ({end}) exceeds the maximum value for " + f"{DEFAULT_LABEL_DTYPE.__name__} ({np.iinfo(DEFAULT_LABEL_DTYPE).max}). " + ) + data.labels.values = np.arange(start, end, dtype=DEFAULT_LABEL_DTYPE).reshape( + data.labels.shape + ) self._cCounter += data.labels.size if mask is not None: diff --git a/linopy/variables.py b/linopy/variables.py index e2570b5d..9791bdb7 100644 --- a/linopy/variables.py +++ b/linopy/variables.py @@ -52,7 +52,7 @@ to_polars, ) from linopy.config import options -from linopy.constants import HELPER_DIMS, TERM_DIM +from linopy.constants import DEFAULT_FLOAT_DTYPE, HELPER_DIMS, TERM_DIM from linopy.solver_capabilities import SolverFeature, solver_supports from linopy.types import ( ConstantLike, @@ -188,9 +188,9 @@ def __init__( if not skip_broadcast: (data,) = broadcast(data) for attr in ("lower", "upper"): - # convert to float, important for operations like "shift" - if not issubdtype(data[attr].dtype, floating): - data[attr].values = data[attr].values.astype(float) + # convert to float32, important for operations like "shift" + if data[attr].dtype != DEFAULT_FLOAT_DTYPE: + data[attr].values = data[attr].values.astype(DEFAULT_FLOAT_DTYPE) if "label_range" not in data.attrs: data.assign_attrs(label_range=(data.labels.min(), data.labels.max())) diff --git a/test/test_constraints.py b/test/test_constraints.py index cca010e8..0616c3e1 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -34,10 +34,16 @@ def test_constraint_assignment() -> None: assert "con0" in getattr(m.constraints, attr) assert m.constraints.labels.con0.shape == (10, 10) - assert m.constraints.labels.con0.dtype == int - assert m.constraints.coeffs.con0.dtype in (int, float) - assert m.constraints.vars.con0.dtype in (int, float) - assert m.constraints.rhs.con0.dtype in (int, float) + assert np.issubdtype(m.constraints.labels.con0.dtype, np.integer) + assert np.issubdtype(m.constraints.coeffs.con0.dtype, np.floating) or np.issubdtype( + m.constraints.coeffs.con0.dtype, np.integer + ) + assert np.issubdtype(m.constraints.vars.con0.dtype, np.integer) or np.issubdtype( + m.constraints.vars.con0.dtype, np.floating + ) + assert np.issubdtype(m.constraints.rhs.con0.dtype, np.floating) or np.issubdtype( + m.constraints.rhs.con0.dtype, np.integer + ) assert_conequal(m.constraints.con0, con0) @@ -88,10 +94,16 @@ def test_anonymous_constraint_assignment() -> None: assert "con0" in getattr(m.constraints, attr) assert m.constraints.labels.con0.shape == (10, 10) - assert m.constraints.labels.con0.dtype == int - assert m.constraints.coeffs.con0.dtype in (int, float) - assert m.constraints.vars.con0.dtype in (int, float) - assert m.constraints.rhs.con0.dtype in (int, float) + assert np.issubdtype(m.constraints.labels.con0.dtype, np.integer) + assert np.issubdtype(m.constraints.coeffs.con0.dtype, np.floating) or np.issubdtype( + m.constraints.coeffs.con0.dtype, np.integer + ) + assert np.issubdtype(m.constraints.vars.con0.dtype, np.integer) or np.issubdtype( + m.constraints.vars.con0.dtype, np.floating + ) + assert np.issubdtype(m.constraints.rhs.con0.dtype, np.floating) or np.issubdtype( + m.constraints.rhs.con0.dtype, np.integer + ) def test_constraint_assignment_with_tuples() -> None: diff --git a/test/test_dtypes.py b/test/test_dtypes.py new file mode 100644 index 00000000..cb96ed87 --- /dev/null +++ b/test/test_dtypes.py @@ -0,0 +1,104 @@ +"""Tests for default dtype settings (int32 labels, float32 floats).""" + +import numpy as np +import pytest + +from linopy import Model +from linopy.constants import DEFAULT_FLOAT_DTYPE, DEFAULT_LABEL_DTYPE + +# --- int32 label tests --- + + +def test_default_label_dtype_is_int32(): + assert DEFAULT_LABEL_DTYPE == np.int32 + + +def test_variable_labels_are_int32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + assert x.labels.dtype == np.int32 + + +def test_constraint_labels_are_int32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + m.add_constraints(x >= 1, name="c") + assert m.constraints["c"].labels.dtype == np.int32 + + +def test_expression_vars_are_int32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + expr = 2 * x + 1 + assert expr.vars.dtype == np.int32 + + +def test_solve_with_int32_labels(): + m = Model() + x = m.add_variables(lower=0, upper=10, name="x") + y = m.add_variables(lower=0, upper=10, name="y") + m.add_constraints(x + y <= 15, name="c1") + m.add_objective(x + 2 * y, sense="max") + m.solve("highs") + assert m.objective.value == pytest.approx(25.0) + + +def test_overflow_guard_variables(): + m = Model() + m._xCounter = np.iinfo(np.int32).max - 1 + with pytest.raises(ValueError, match="exceeds the maximum"): + m.add_variables(lower=0, upper=1, coords=[range(5)], name="x") + + +def test_overflow_guard_constraints(): + m = Model() + x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x") + m._cCounter = np.iinfo(np.int32).max - 1 + with pytest.raises(ValueError, match="exceeds the maximum"): + m.add_constraints(x >= 0, name="c") + + +# --- float32 tests --- + + +def test_default_float_dtype_is_float32(): + assert DEFAULT_FLOAT_DTYPE == np.float32 + + +def test_variable_bounds_are_float32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + assert x.lower.dtype == np.float32 + assert x.upper.dtype == np.float32 + + +def test_expression_coeffs_are_float32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + expr = 2 * x + 1 + assert expr.coeffs.dtype == np.float32 + + +def test_expression_const_is_float32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + expr = 2 * x + 1 + assert expr.const.dtype == np.float32 + + +def test_solve_with_float32_model_data(): + m = Model() + x = m.add_variables(lower=0, upper=10, name="x") + y = m.add_variables(lower=0, upper=10, name="y") + m.add_constraints(x + y <= 15, name="c1") + m.add_objective(x + 2 * y, sense="max") + m.solve("highs") + assert m.objective.value == pytest.approx(25.0) + + +def test_solution_values_are_float64(): + m = Model() + x = m.add_variables(lower=0, upper=10, name="x") + m.add_objective(x, sense="max") + m.solve("highs") + assert m.solution["x"].dtype == np.float64 diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index a75ace3f..8d2d4280 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -527,10 +527,10 @@ def test_linear_expression_multiplication( assert (mexpr.coeffs.sel(dim_1=0, dim_0=0, _term=0) == 100).item() mexpr = expr / 100 - assert (mexpr.coeffs.sel(dim_1=0, dim_0=0, _term=0) == 1 / 10).item() + assert mexpr.coeffs.sel(dim_1=0, dim_0=0, _term=0).item() == pytest.approx(1 / 10) mexpr = expr / 100.0 - assert (mexpr.coeffs.sel(dim_1=0, dim_0=0, _term=0) == 1 / 10).item() + assert mexpr.coeffs.sel(dim_1=0, dim_0=0, _term=0).item() == pytest.approx(1 / 10) def test_matmul_variable_and_const(x: Variable, y: Variable) -> None: