"""Sklearn-style NNES estimator for dynamic discrete choice models.
This module provides an NNES class with a scikit-learn style API that wraps
the underlying NNESEstimator from econirl.estimation.nnes. It accepts pandas
DataFrames with column names instead of the low-level Panel API.
NNES (Neural Network Estimation of Structural models, Nguyen 2025) trains
a neural V(s) network in Phase 1 and uses it in a structural MLE in Phase 2.
Two Bellman variants are available via the ``bellman`` parameter:
- ``"npl"`` (default): Uses the NPL Bellman with Hotz-Miller emax correction.
Has the zero Jacobian property (Neyman orthogonality), so standard errors
are semiparametrically efficient despite V-approximation error.
- ``"nfxp"``: Uses the NFXP soft Bellman operator. Does NOT have Neyman
orthogonality. V-approximation errors contaminate the score.
Example:
>>> from econirl.estimators import NNES
>>> import pandas as pd
>>>
>>> df = pd.read_csv("zurcher_bus.csv")
>>> model = NNES(n_states=90, discount=0.9999, bellman="npl")
>>> model.fit(data=df, state="mileage_bin", action="replaced", id="bus_id")
>>> print(model.params_)
>>> print(model.summary())
"""
from __future__ import annotations
from typing import Literal
import jax.numpy as jnp
import numpy as np
import pandas as pd
from scipy.stats import norm as scipy_norm
from econirl.core.reward_spec import RewardSpec
from econirl.core.types import DDCProblem, Panel, Trajectory, TrajectoryPanel
from econirl.estimation.nnes import NNESConfig, NNESEstimator, NNESNFXPEstimator
from econirl.preferences.linear import LinearUtility
from econirl.transitions import TransitionEstimator
[docs]
class NNES:
"""Sklearn-style NNES estimator for dynamic discrete choice models.
NNES (Neural Network Estimation of Structural models) estimates utility
parameters using a two-phase approach: Phase 1 trains a neural V(s)
network, Phase 2 uses the learned V-network in a structural MLE for
theta (Nguyen 2025).
Parameters
----------
n_states : int, default=90
Number of discrete states (e.g., mileage bins).
n_actions : int, default=2
Number of discrete actions (e.g., keep/replace).
discount : float, default=0.9999
Time discount factor (beta).
utility : str or RewardSpec, default="linear_cost"
Utility specification. Pass ``"linear_cost"`` for the classic Rust
bus model (``u = -theta_c * s * (1-a) - RC * a``), or a
``RewardSpec`` for custom features.
bellman : str, default="npl"
Which Bellman equation to use in Phase 1. ``"npl"`` uses the NPL
Bellman with Hotz-Miller emax correction (has Neyman orthogonality,
standard errors are semiparametrically efficient). ``"nfxp"`` uses
the NFXP soft Bellman operator (no orthogonality, V-errors
contaminate the score).
se_method : str, default="asymptotic"
Method for computing standard errors. Options: "robust", "asymptotic".
hidden_dim : int, default=32
Number of hidden units per layer in the V-network.
num_layers : int, default=2
Number of hidden layers in the V-network.
v_lr : float, default=1e-3
Learning rate for V-network training (Phase 1).
v_epochs : int, default=500
Number of training epochs for V-network per outer iteration.
n_outer_iterations : int, default=3
Number of Phase 1 + Phase 2 alternations.
verbose : bool, default=False
Whether to print progress messages during estimation.
Attributes
----------
params_ : dict
Estimated parameters after fitting.
se_ : dict
Standard errors for each parameter.
coef_ : numpy.ndarray
Coefficients as a numpy array (sklearn convention).
log_likelihood_ : float
Maximized log-likelihood value.
pvalues_ : dict
P-values for each parameter (from Wald t-test).
policy_ : numpy.ndarray
Estimated choice probabilities P(a|s) of shape (n_states, n_actions).
value_ : numpy.ndarray
Estimated value function V(s) of shape (n_states,).
v_network_ : numpy.ndarray or None
V-network values for all states after training, shape (n_states,).
converged_ : bool
Whether the optimization converged.
reward_spec_ : RewardSpec
The reward specification used for estimation.
Examples
--------
>>> from econirl.estimators import NNES
>>> model = NNES(n_states=90, bellman="npl") # default: NPL Bellman
>>> model.fit(df, state="mileage", action="replaced", id="bus_id")
>>> print(model.params_)
"""
[docs]
def __init__(
self,
n_states: int = 90,
n_actions: int = 2,
discount: float = 0.9999,
utility: str | RewardSpec = "linear_cost",
bellman: Literal["npl", "nfxp"] = "npl",
se_method: Literal["robust", "asymptotic"] = "asymptotic",
hidden_dim: int = 32,
num_layers: int = 2,
v_lr: float = 1e-3,
v_epochs: int = 500,
n_outer_iterations: int = 3,
verbose: bool = False,
):
self.n_states = n_states
self.n_actions = n_actions
self.discount = discount
self.utility = utility
self.bellman = bellman
self.se_method = se_method
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.v_lr = v_lr
self.v_epochs = v_epochs
self.n_outer_iterations = n_outer_iterations
self.verbose = verbose
# Fitted attributes (set after fit())
self.params_: dict[str, float] | None = None
self.se_: dict[str, float] | None = None
self.pvalues_: dict[str, float] | None = None
self.coef_: np.ndarray | None = None
self.log_likelihood_: float | None = None
self.value_: np.ndarray | None = None
self.policy_: np.ndarray | None = None
self.transitions_: np.ndarray | None = None
self.converged_: bool | None = None
self.reward_spec_: RewardSpec | None = None
self.v_network_: np.ndarray | None = None
# Internal storage
self._result = None
self._panel = None
self._utility_fn = None
self._problem = None
[docs]
def fit(
self,
data: pd.DataFrame | Panel | TrajectoryPanel,
state: str | None = None,
action: str | None = None,
id: str | None = None,
transitions: np.ndarray | None = None,
reward: RewardSpec | None = None,
) -> "NNES":
"""Fit the NNES estimator to data.
Parameters
----------
data : pandas.DataFrame or Panel or TrajectoryPanel
Panel data with observations. When a DataFrame is passed,
``state``, ``action``, and ``id`` column names are required.
When a Panel/TrajectoryPanel is passed, column names are ignored.
state : str, optional
Column name for the state variable (required for DataFrame input).
action : str, optional
Column name for the action variable (required for DataFrame input).
id : str, optional
Column name for the individual identifier (required for DataFrame
input).
transitions : numpy.ndarray, optional
Pre-estimated transition matrix of shape (n_states, n_states).
If None, transitions are estimated from the data.
reward : RewardSpec, optional
Reward/utility specification. If provided, overrides the
``utility`` parameter passed at construction time.
Returns
-------
self : NNES
Returns self for method chaining.
"""
# Resolve reward spec: explicit argument > constructor parameter
reward_spec = reward if reward is not None else self.utility
# --- Handle data: DataFrame or Panel/TrajectoryPanel ---
if isinstance(data, pd.DataFrame):
if state is None or action is None or id is None:
raise ValueError(
"state, action, and id column names are required "
"when data is a DataFrame"
)
self._panel = TrajectoryPanel.from_dataframe(
data, state=state, action=action, id=id
)
elif isinstance(data, (Panel, TrajectoryPanel)):
self._panel = data
else:
raise TypeError(
f"data must be a DataFrame, Panel, or TrajectoryPanel, "
f"got {type(data)}"
)
# --- Handle reward: RewardSpec or string ---
if isinstance(reward_spec, RewardSpec):
self.reward_spec_ = reward_spec
self._utility_fn = reward_spec.to_linear_utility()
elif reward_spec == "linear_cost":
self._utility_fn = self._create_utility()
# Also create RewardSpec from the utility for consistency
self.reward_spec_ = RewardSpec(
self._utility_fn.feature_matrix,
self._utility_fn.parameter_names,
)
else:
raise ValueError(f"Unknown reward/utility specification: {reward_spec}")
# Estimate transitions if not provided
if transitions is None:
trans_estimator = TransitionEstimator(
n_states=self.n_states,
max_increase=2,
)
trans_estimator.fit(self._panel)
self.transitions_ = trans_estimator.matrix_
else:
self.transitions_ = np.asarray(transitions)
# Build full transition matrices (for both actions)
transition_tensor = self._build_transition_tensor(self.transitions_)
# Create problem specification with state encoder for neural network
self._problem = DDCProblem(
num_states=self.n_states,
num_actions=self.n_actions,
discount_factor=self.discount,
scale_parameter=1.0,
state_dim=1,
state_encoder=lambda s: jnp.expand_dims(jnp.asarray(s, dtype=jnp.float32) / max(self.n_states - 1, 1), axis=-1),
)
# Create the underlying NNES estimator (NPL or NFXP variant)
config = NNESConfig(
hidden_dim=self.hidden_dim,
num_layers=self.num_layers,
v_lr=self.v_lr,
v_epochs=self.v_epochs,
n_outer_iterations=self.n_outer_iterations,
compute_se=True,
se_method=self.se_method,
verbose=self.verbose,
)
if self.bellman == "nfxp":
estimator = NNESNFXPEstimator(config=config)
else:
estimator = NNESEstimator(config=config)
# Run estimation
self._result = estimator.estimate(
panel=self._panel,
utility=self._utility_fn,
problem=self._problem,
transitions=transition_tensor,
)
# Extract results
self._extract_results()
return self
def _build_transition_tensor(self, keep_transitions: np.ndarray) -> jnp.ndarray:
"""Build full transition tensor for both actions.
Parameters
----------
keep_transitions : numpy.ndarray
Transition matrix for action=0 (keep), shape (n_states, n_states).
Returns
-------
jnp.ndarray
Transition tensor of shape (n_actions, n_states, n_states).
"""
n = self.n_states
transitions = np.zeros((self.n_actions, n, n), dtype=np.float32)
# Action 0 (keep): use provided transitions
transitions[0] = np.asarray(keep_transitions, dtype=np.float32)
# Action 1 (replace): reset to state 0, then transition
for s in range(n):
transitions[1, s, :] = transitions[0, 0, :]
return jnp.array(transitions)
def _create_utility(self) -> LinearUtility:
"""Create utility function for estimation.
Returns
-------
LinearUtility
Utility function with appropriate features.
"""
if self.utility != "linear_cost":
raise ValueError(f"Unknown utility specification: {self.utility}")
# Build feature matrix for linear cost utility
# U(s, keep) = -theta_c * s
# U(s, replace) = -RC
n = self.n_states
features = jnp.zeros((n, self.n_actions, 2))
mileage = jnp.arange(n, dtype=jnp.float32)
# Keep action (a=0): feature = [-s, 0]
features = features.at[:, 0, 0].set(-mileage)
# Replace action (a=1): feature = [0, -1]
features = features.at[:, 1, 1].set(-1.0)
return LinearUtility(
feature_matrix=features,
parameter_names=["theta_c", "RC"],
)
def _extract_results(self) -> None:
"""Extract results from estimation into sklearn-style attributes."""
if self._result is None:
return
# Parameter estimates
params = np.asarray(self._result.parameters)
param_names = self._result.parameter_names
self.params_ = {name: float(val) for name, val in zip(param_names, params)}
self.coef_ = params.copy()
# Standard errors
se = np.asarray(self._result.standard_errors)
self.se_ = {name: float(val) for name, val in zip(param_names, se)}
# Other attributes
self.log_likelihood_ = float(self._result.log_likelihood)
self.converged_ = bool(self._result.converged)
if self._result.value_function is not None:
self.value_ = np.asarray(self._result.value_function)
# Policy matrix
if self._result.policy is not None:
self.policy_ = np.asarray(self._result.policy)
# p-values from t-statistics
if self.se_ is not None:
pvalues: dict[str, float] = {}
for name in self.params_:
se_val = self.se_[name]
if se_val and se_val > 0:
t_stat = self.params_[name] / se_val
pvalues[name] = float(
2 * (1 - scipy_norm.cdf(abs(t_stat)))
)
else:
pvalues[name] = float("nan")
self.pvalues_ = pvalues
# NNES-specific: V-network values
if self._result.metadata:
v_vals = self._result.metadata.get("v_network_values")
if v_vals is not None:
self.v_network_ = np.asarray(v_vals)
@property
def reward_matrix_(self) -> np.ndarray | None:
"""Structural reward matrix R(s,a) of shape (n_states, n_actions).
Computes the utility matrix from the fitted parameters and the
feature specification. Returns None if the model has not been fitted.
"""
if self.params_ is None or self._utility_fn is None or self._result is None:
return None
param_names = self._result.parameter_names
param_vector = jnp.array(
[self.params_[name] for name in param_names],
dtype=jnp.float32,
)
utility_matrix = self._utility_fn.compute(param_vector)
return np.asarray(utility_matrix)
[docs]
def summary(self) -> str:
"""Generate a formatted summary of estimation results.
Returns
-------
str
Human-readable summary of the estimation.
"""
if self._result is None:
return "NNES: Not fitted yet. Call fit() first."
return self._result.summary()
[docs]
def conf_int(self, alpha: float = 0.05) -> dict:
"""Compute confidence intervals for parameters.
Parameters
----------
alpha : float, default=0.05
Significance level. Returns (1 - alpha) confidence intervals.
Returns
-------
dict
``{param_name: (lower, upper)}`` confidence intervals.
Raises
------
RuntimeError
If the model has not been fitted yet.
"""
if self.params_ is None or self.se_ is None:
raise RuntimeError("Model not fitted. Call fit() first.")
z = scipy_norm.ppf(1 - alpha / 2)
intervals: dict[str, tuple[float, float]] = {}
for name in self.params_:
est = self.params_[name]
se = self.se_[name]
intervals[name] = (est - z * se, est + z * se)
return intervals
[docs]
def predict_proba(self, states: np.ndarray) -> np.ndarray:
"""Predict choice probabilities for given states.
Parameters
----------
states : numpy.ndarray
Array of state indices.
Returns
-------
numpy.ndarray
Choice probabilities of shape (len(states), n_actions).
Each row sums to 1.
"""
if self._result is None:
raise RuntimeError("Model not fitted. Call fit() first.")
states = np.asarray(states, dtype=np.int64)
# Get policy (choice probabilities) from result
policy = np.asarray(self._result.policy)
# Index into the policy for the requested states
proba = policy[states]
return proba
def __repr__(self) -> str:
fitted = self.params_ is not None
return (
f"NNES(n_states={self.n_states}, n_actions={self.n_actions}, "
f"discount={self.discount}, bellman={self.bellman!r}, fitted={fitted})"
)