"""
Equipment Replacement Variants Dataset.
This module provides synthetic datasets for equipment replacement problems,
with multiple variants to test estimators on different action/state configurations.
Variants:
- "binary": 2 actions (keep, replace) - similar to Rust (1987)
- "ternary": 3 actions (keep, minor_repair, major_repair)
- "continuous_state": More states (200 instead of 90)
These variants are useful for testing estimator robustness across different
problem configurations.
"""
import numpy as np
import pandas as pd
[docs]
def load_equipment_replacement(
variant: str = "binary",
n_machines: int = 100,
n_periods: int = 100,
as_panel: bool = False,
seed: int = 1987,
) -> pd.DataFrame:
"""
Load synthetic equipment replacement data with configurable variants.
This dataset represents machines making maintenance/replacement decisions
over time. The state represents equipment wear level, and actions vary
by variant (binary replacement, ternary with repairs, or continuous state).
Args:
variant: Problem variant to generate:
- "binary": 2 actions (keep, replace), 90 states - like Rust
- "ternary": 3 actions (keep, minor_repair, major_repair), 90 states
- "continuous_state": 2 actions (keep, replace), 200 states
n_machines: Number of machines to simulate (default: 100)
n_periods: Number of time periods per machine (default: 100)
as_panel: If True, return data structured as a Panel object
compatible with econirl estimators. If False (default),
return as a pandas DataFrame.
seed: Random seed for reproducibility (default: 1987)
Returns:
DataFrame with columns:
- id: Machine identifier
- period: Time period (0-indexed)
- state: Discretized wear state index
- action: Chosen action (varies by variant)
- wear_level: Continuous wear level (underlying state)
- variant: The variant name used to generate this data
Raises:
ValueError: If variant is not one of "binary", "ternary", "continuous_state"
Example:
>>> from econirl.datasets import load_equipment_replacement
>>> df = load_equipment_replacement(variant="binary")
>>> print(f"Observations: {len(df):,}")
>>> print(f"Machines: {df['id'].nunique()}")
>>> print(f"States: {df['state'].nunique()}")
>>> # Test with ternary actions
>>> df_ternary = load_equipment_replacement(variant="ternary")
>>> print(f"Actions: {df_ternary['action'].unique()}")
>>> # Get as Panel for estimation
>>> panel = load_equipment_replacement(as_panel=True)
>>> print(f"Panel with {panel.num_individuals} machines")
Notes:
Action interpretation by variant:
- binary: 0=keep, 1=replace
- ternary: 0=keep, 1=minor_repair, 2=major_repair
- continuous_state: 0=keep, 1=replace (but with finer state grid)
State encoding:
- binary: 90 states (wear bins, like Rust's mileage bins)
- ternary: 90 states with different transition dynamics
- continuous_state: 200 states for finer granularity
"""
valid_variants = {"binary", "ternary", "continuous_state"}
if variant not in valid_variants:
raise ValueError(
f"variant must be one of {valid_variants}, got '{variant}'"
)
df = _generate_equipment_replacement_data(
variant=variant,
n_machines=n_machines,
n_periods=n_periods,
seed=seed,
)
if as_panel:
from econirl.core.types import Panel, Trajectory
import jax.numpy as jnp
# Convert to Panel format
machine_ids = df["id"].unique()
trajectories = []
for machine_id in machine_ids:
machine_data = df[df["id"] == machine_id].sort_values("period")
states = jnp.array(machine_data["state"].values, dtype=jnp.int32)
actions = jnp.array(machine_data["action"].values, dtype=jnp.int32)
# Compute next_states (shift states by 1, use 0 for last period)
next_states = jnp.concatenate([states[1:], jnp.array([0])])
traj = Trajectory(
states=states,
actions=actions,
next_states=next_states,
individual_id=int(machine_id),
)
trajectories.append(traj)
return Panel(trajectories=trajectories)
return df
def _generate_equipment_replacement_data(
variant: str,
n_machines: int,
n_periods: int,
seed: int,
) -> pd.DataFrame:
"""
Generate synthetic equipment replacement data.
Creates a dataset with realistic choice patterns based on a dynamic
discrete choice model of equipment maintenance decisions.
"""
np.random.seed(seed)
# Get variant-specific configuration
config = _get_variant_config(variant)
num_states = config["num_states"]
num_actions = config["num_actions"]
records = []
for machine_id in range(n_machines):
# Initial state: new equipment with no wear
wear_state = 0
wear_level = 0.0
for period in range(n_periods):
# Compute choice probabilities based on variant
logits = _compute_action_logits(wear_state, variant, config)
# Convert to probabilities using softmax
exp_logits = np.exp(logits - np.max(logits))
probs = exp_logits / exp_logits.sum()
# Draw action
action = np.random.choice(num_actions, p=probs)
# Record observation
records.append({
"id": machine_id,
"period": period,
"state": wear_state,
"action": action,
"wear_level": wear_level,
"variant": variant,
})
# State transition based on action and variant
wear_state, wear_level = _transition(
wear_state, wear_level, action, variant, config
)
return pd.DataFrame(records)
def _get_variant_config(variant: str) -> dict:
"""Get configuration parameters for each variant."""
if variant == "binary":
return {
"num_states": 90,
"num_actions": 2,
"action_names": {0: "keep", 1: "replace"},
# Cost parameters (in utility units)
"theta_c": 0.001, # Operating cost per state
"RC": 3.0, # Replacement cost
# Transition probabilities (stay, +1, +2 bins)
"p_transition": np.array([0.3919, 0.5953, 0.0128]),
"wear_per_bin": 5.0, # Wear units per state bin
}
elif variant == "ternary":
return {
"num_states": 90,
"num_actions": 3,
"action_names": {0: "keep", 1: "minor_repair", 2: "major_repair"},
# Cost parameters
"theta_c": 0.001,
"RC_minor": 1.0, # Minor repair cost
"RC_major": 3.0, # Major repair cost (full replacement)
# Transition probabilities
"p_transition": np.array([0.3919, 0.5953, 0.0128]),
"wear_per_bin": 5.0,
}
else: # continuous_state
return {
"num_states": 200,
"num_actions": 2,
"action_names": {0: "keep", 1: "replace"},
"theta_c": 0.0005, # Lower cost per state (more states)
"RC": 3.0,
# Finer transition probabilities
"p_transition": np.array([0.25, 0.45, 0.20, 0.08, 0.02]),
"wear_per_bin": 2.25, # Finer granularity
}
def _compute_action_logits(
wear_state: int,
variant: str,
config: dict,
) -> np.ndarray:
"""
Compute action logits based on current wear state and variant.
Returns logits for each action in the variant's action space.
"""
num_actions = config["num_actions"]
logits = np.zeros(num_actions)
if variant == "binary":
# Binary: keep vs replace
# V(keep) ~ -theta_c * wear_state
# V(replace) ~ -RC
logits[0] = -config["theta_c"] * wear_state # keep
logits[1] = -config["RC"] # replace
elif variant == "ternary":
# Ternary: keep vs minor_repair vs major_repair
theta_c = config["theta_c"]
# V(keep) ~ -theta_c * wear_state
logits[0] = -theta_c * wear_state # keep
# V(minor_repair) ~ -RC_minor - theta_c * (wear_state/2)
# Minor repair reduces wear but doesn't eliminate it
logits[1] = -config["RC_minor"] - theta_c * (wear_state / 2) # minor_repair
# V(major_repair) ~ -RC_major (like full replacement)
logits[2] = -config["RC_major"] # major_repair
else: # continuous_state
# Same as binary, but with finer state granularity
logits[0] = -config["theta_c"] * wear_state # keep
logits[1] = -config["RC"] # replace
return logits
def _transition(
wear_state: int,
wear_level: float,
action: int,
variant: str,
config: dict,
) -> tuple[int, float]:
"""
Compute state transition based on action.
Returns the new (wear_state, wear_level) tuple.
"""
num_states = config["num_states"]
p_transition = config["p_transition"]
wear_per_bin = config["wear_per_bin"]
if variant == "binary" or variant == "continuous_state":
if action == 1: # Replace
return 0, 0.0
else: # Keep
# Stochastic wear increment
increment = np.random.choice(len(p_transition), p=p_transition)
new_state = min(wear_state + increment, num_states - 1)
new_level = wear_level + increment * wear_per_bin
return new_state, new_level
else: # ternary
if action == 2: # Major repair (full replacement)
return 0, 0.0
elif action == 1: # Minor repair (reduce wear by half)
new_state = max(0, wear_state // 2)
new_level = max(0.0, wear_level / 2)
return new_state, new_level
else: # Keep
increment = np.random.choice(len(p_transition), p=p_transition)
new_state = min(wear_state + increment, num_states - 1)
new_level = wear_level + increment * wear_per_bin
return new_state, new_level
def get_equipment_replacement_info(variant: str = "binary") -> dict:
"""
Get metadata about the equipment replacement dataset.
Args:
variant: Which variant to get info for ("binary", "ternary", "continuous_state")
Returns:
Dictionary with dataset information including number of states,
actions, and description of the state/action spaces.
"""
config = _get_variant_config(variant)
return {
"name": f"Equipment Replacement ({variant})",
"variant": variant,
"num_states": config["num_states"],
"num_actions": config["num_actions"],
"action_names": config["action_names"],
"state_description": "Discretized equipment wear level",
"description": {
"binary": "Binary replacement decision (keep/replace), 90 states - similar to Rust (1987)",
"ternary": "Three maintenance options (keep/minor_repair/major_repair), 90 states",
"continuous_state": "Binary replacement with finer state granularity, 200 states",
}[variant],
}