Source code for econirl.datasets.eth_ucy

"""ETH and UCY Pedestrian Trajectory Datasets.

This module provides access to the classic ETH and UCY pedestrian trajectory
datasets, widely used as benchmarks in trajectory prediction and pedestrian
behavior modeling.

Scenes:
- ETH: ETH building entrance (Zurich)
- Hotel: Hotel entrance (Zurich)
- Univ: University campus (Cyprus)
- Zara1: Shopping street scene 1 (Seville)
- Zara2: Shopping street scene 2 (Seville)

Reference:
    Pellegrini, S., et al. (2009). "You'll Never Walk Alone: Modeling Social
    Behavior for Multi-target Tracking." ICCV.

    Lerner, A., et al. (2007). "Crowds by Example." Computer Graphics Forum.

Data source:
    https://service.tib.eu/ldmservice/en/dataset/eth-and-ucy-datasets
"""

from pathlib import Path
from typing import Optional, List, Literal

import numpy as np
import pandas as pd


[docs] def load_eth_ucy( scene: Optional[Literal["eth", "hotel", "univ", "zara1", "zara2"]] = None, as_trajectories: bool = False, discretize: bool = False, grid_size: int = 50, seed: Optional[int] = 2009, ) -> pd.DataFrame: """Load ETH/UCY pedestrian trajectory data. The ETH/UCY datasets are classic benchmarks for pedestrian trajectory prediction, containing world-coordinate trajectories (in meters) at 2.5 FPS. Args: scene: Specific scene to load (None = all scenes) as_trajectories: If True, return list of trajectory arrays discretize: If True, convert coordinates to grid states grid_size: Grid size for discretization seed: Random seed for sample generation Returns: DataFrame with columns: - pedestrian_id: Unique pedestrian identifier - frame: Frame number - x: X coordinate (meters, world coordinates) - y: Y coordinate (meters, world coordinates) - scene: Scene name Example: >>> from econirl.datasets import load_eth_ucy >>> df = load_eth_ucy(scene="eth") >>> print(f"Pedestrians: {df['pedestrian_id'].nunique()}") >>> # For trajectory IRL >>> trajectories = load_eth_ucy(as_trajectories=True, discretize=True) """ data_path = Path(__file__).parent / "eth_ucy_sample.csv" if not data_path.exists(): df = _generate_eth_ucy_sample(seed=seed) df.to_csv(data_path, index=False) else: df = pd.read_csv(data_path) if scene is not None: df = df[df['scene'] == scene] if discretize: df = _discretize_coords(df, grid_size) if as_trajectories: return _to_trajectories(df, discretize) return df
def _generate_eth_ucy_sample( n_pedestrians_per_scene: int = 100, n_frames_per_ped: int = 20, seed: int = 2009, ) -> pd.DataFrame: """Generate synthetic ETH/UCY-like data.""" np.random.seed(seed) scenes = ['eth', 'hotel', 'univ', 'zara1', 'zara2'] # Approximate scene dimensions in meters scene_dims = { 'eth': (20, 15), 'hotel': (15, 12), 'univ': (30, 25), 'zara1': (18, 14), 'zara2': (18, 14), } records = [] ped_id = 0 for scene in scenes: width, height = scene_dims[scene] for _ in range(n_pedestrians_per_scene): ped_id += 1 # Typical pedestrian speed: 1.2-1.5 m/s # At 2.5 FPS, that's ~0.5 m per frame speed = np.random.uniform(0.4, 0.6) # Random start and goal (often at edges) if np.random.random() < 0.5: # Enter from left/right x = 0 if np.random.random() < 0.5 else width y = np.random.uniform(0, height) goal_x = width - x # Go to opposite side goal_y = np.random.uniform(0, height) else: # Enter from top/bottom x = np.random.uniform(0, width) y = 0 if np.random.random() < 0.5 else height goal_x = np.random.uniform(0, width) goal_y = height - y start_frame = np.random.randint(0, 500) for t in range(n_frames_per_ped): frame = start_frame + t records.append({ 'pedestrian_id': ped_id, 'frame': frame, 'x': x, 'y': y, 'scene': scene, }) # Move towards goal dx = goal_x - x dy = goal_y - y dist = np.sqrt(dx**2 + dy**2) if dist > speed: # Add social force-like perturbation x += speed * dx / dist + np.random.normal(0, 0.05) y += speed * dy / dist + np.random.normal(0, 0.05) return pd.DataFrame(records) def _discretize_coords(df: pd.DataFrame, grid_size: int) -> pd.DataFrame: """Convert world coordinates to discrete grid cells.""" df = df.copy() x_min, x_max = df['x'].min(), df['x'].max() y_min, y_max = df['y'].min(), df['y'].max() x_bins = np.linspace(x_min, x_max, grid_size + 1) y_bins = np.linspace(y_min, y_max, grid_size + 1) x_idx = np.clip(np.digitize(df['x'], x_bins) - 1, 0, grid_size - 1) y_idx = np.clip(np.digitize(df['y'], y_bins) - 1, 0, grid_size - 1) df['state'] = y_idx * grid_size + x_idx return df def _to_trajectories(df: pd.DataFrame, has_states: bool) -> List[np.ndarray]: """Convert DataFrame to list of trajectory arrays.""" trajectories = [] for ped_id in df['pedestrian_id'].unique(): ped_data = df[df['pedestrian_id'] == ped_id].sort_values('frame') if has_states: traj = ped_data['state'].values else: traj = ped_data[['x', 'y']].values trajectories.append(traj) return trajectories
[docs] def get_eth_ucy_info() -> dict: """Get metadata about ETH/UCY datasets.""" return { "name": "ETH and UCY Pedestrian Datasets", "type": "real (bundled sample) / synthetic fallback", "domain": "Pedestrian trajectory prediction", "scenes": ["eth", "hotel", "univ", "zara1", "zara2"], "coordinate_system": "World coordinates (meters)", "fps": 2.5, "n_pedestrians_total": "~1500+", "use_cases": [ "Social force IRL", "Pedestrian path preference learning", "Crowd simulation", ], "reference": "Pellegrini et al. (2009). ICCV. / Lerner et al. (2007). CGF.", "download_url": "https://service.tib.eu/ldmservice/en/dataset/eth-and-ucy-datasets", }