Spaces:

BuildingBench
/

BuildingBench-Dataset

Sleeping

File size: 30,719 Bytes

dd4ed72
 
 
 
5c4dfca
dd4ed72

import argparse
import logging
import sys
from pathlib import Path
import pandas as pd
sys.path.insert(0, str(Path(__file__).parent.parent))
from building_gen.core import BuildingPipeline, create_building_weather_combinations


def configure_logging(level: str = "INFO"):
    log_level = getattr(logging, level.upper(), logging.INFO)
    
    logging.basicConfig(
        level=log_level,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S'
    )
    
    # Reduce noise from external libraries
    logging.getLogger('googleapiclient').setLevel(logging.WARNING)
    logging.getLogger('google_auth_oauthlib').setLevel(logging.WARNING)



def main():
    parser = argparse.ArgumentParser(
        description="Building Processing Pipeline - Process and create variations of building energy models",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Process everything
  python scripts/main.py --all

  # Preprocess existing files
  python scripts/main.py --preprocess

  # Weather management
  python scripts/main.py --create-weather-table
  python scripts/main.py --weather-stats
  python scripts/main.py --query-weather --country USA --climate-zone 6A

  # Building-weather simulation
  python scripts/main.py --match-buildings-weather
  python scripts/main.py --create-combinations --simulation-climate-zones 6A 2A
  python scripts/main.py --simulation-stats

  # Weather variations
  python scripts/main.py --weather-vars
  python scripts/main.py --weather-vars --weather-types base real_weather
  python scripts/main.py --weather-variation-stats

  # Create specific variations
  python scripts/main.py --occupancy-vars --occupancy-schedules standard low_occupancy high_occupancy
  python scripts/main.py --window-vars --wwr-ratios 0.2 0.4 0.6 0.8
  python scripts/main.py --thermal-vars --thermal-scenarios default high_performance low_performance

  # Create combined variations with thermal
  python scripts/main.py --combined-vars --thermal-scenarios default high_performance

  # Create table and query
  python scripts/main.py --create-table
  python scripts/main.py --query --building-type office --climate-zone 6A

  # Query and export
  python scripts/main.py --query --building-type office --export results.csv
  python scripts/main.py --query --thermal-scenario high_performance --export high_performance_buildings.csv
        """
    )
    
    # Global configuration
    parser.add_argument("--data-dir", default="data", help="Data directory (default: data)")
    parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"], 
                       default="INFO", help="Logging level (default: INFO)")
    
    # Step selection arguments
    step_group = parser.add_argument_group("Pipeline Steps")
    step_group.add_argument("--preprocess", action="store_true", help="Preprocess buildings (add meters, setpoints, etc.)")
    step_group.add_argument("--occupancy-vars", action="store_true", help="Create occupancy variations")
    step_group.add_argument("--window-vars", action="store_true", help="Create window variations")
    step_group.add_argument("--thermal-vars", action="store_true", help="Create thermal resistance variations")
    step_group.add_argument("--combined-vars", action="store_true", help="Create combined variations")
    step_group.add_argument("--create-table", action="store_true", help="Create/update building database table")
    step_group.add_argument("--all", action="store_true", help="Run all steps")
    
    # Weather management arguments
    weather_group = parser.add_argument_group("Weather Management")
    weather_group.add_argument("--create-weather-table", action="store_true", 
                              help="Create weather table from all EPW files")
    weather_group.add_argument("--weather-stats", action="store_true", 
                              help="Show weather collection statistics")
    weather_group.add_argument("--query-weather", action="store_true", 
                              help="Query weather locations")
    weather_group.add_argument("--export-weather", type=Path, 
                              help="Export weather query results to CSV file")
    weather_group.add_argument("--validate-weather", action="store_true",
                              help="Validate all EPW files")
    weather_group.add_argument("--weather-vars", action="store_true",
                              help="Create comprehensive weather variations for all buildings")
    weather_group.add_argument("--weather-types", nargs="+", 
                              default=["base", "climate_zone_expanded", "real_weather"],
                              choices=["base", "climate_zone_expanded", "real_weather"],
                              help="Weather variation types to create")
    weather_group.add_argument("--weather-variation-stats", action="store_true",
                              help="Show weather variation statistics")
    
    # Building-Weather Simulation arguments
    simulation_group = parser.add_argument_group("Building-Weather Simulation")
    simulation_group.add_argument("--match-buildings-weather", action="store_true",
                                 help="Match buildings to weather files (add base_weather_id)")
    simulation_group.add_argument("--create-combinations", action="store_true",
                                 help="Create building-weather combinations for simulation")
    simulation_group.add_argument("--simulation-climate-zones", nargs="+",
                                 help="Climate zones to include in simulations (e.g., 6A 2A 4A)")
    simulation_group.add_argument("--simulation-stats", action="store_true",
                                 help="Show building-weather combination statistics")
    simulation_group.add_argument("--export-combinations", type=Path,
                                 help="Export building-weather combinations to CSV")
    
    # Weather filtering options
    weather_filter_group = parser.add_argument_group("Weather Filtering Options")
    weather_filter_group.add_argument("--country", help="Filter by country code (e.g., USA, CAN, CHN)")
    weather_filter_group.add_argument("--weather-climate-zone", help="Filter weather by climate zone")
    weather_filter_group.add_argument("--data-source", choices=["base", "expanded", "real"], 
                                     help="Filter by data source")
    weather_filter_group.add_argument("--min-latitude", type=float, help="Minimum latitude")
    weather_filter_group.add_argument("--max-latitude", type=float, help="Maximum latitude")
    weather_filter_group.add_argument("--min-longitude", type=float, help="Minimum longitude")  
    weather_filter_group.add_argument("--max-longitude", type=float, help="Maximum longitude")
    
    # Query arguments
    query_group = parser.add_argument_group("Building Query Options")
    query_group.add_argument("--query", action="store_true", help="Query buildings from database")
    query_group.add_argument("--export", type=Path, help="Export query results to CSV file")
    query_group.add_argument("--stats", action="store_true", help="Show database statistics")
    
    # Preprocessing configuration
    preprocess_group = parser.add_argument_group("Preprocessing Configuration")
    preprocess_group.add_argument("--no-meters", action="store_true", 
                                 help="Skip adding HVAC meters")
    preprocess_group.add_argument("--no-outdoor-vars", action="store_true", 
                                 help="Skip adding outdoor air variables")
    preprocess_group.add_argument("--timesteps-per-hour", type=int, default=4,
                                 help="Simulation timesteps per hour (default: 4)")
    preprocess_group.add_argument("--no-setpoint-control", action="store_true",
                                 help="Skip adding setpoint control")
    preprocess_group.add_argument("--no-validation", action="store_true",
                                 help="Skip validation of processed files")
    
    # Variation configuration
    variation_group = parser.add_argument_group("Variation Configuration")
    variation_group.add_argument("--occupancy-schedules", nargs="+", 
                            default=["standard", "low_occupancy", "high_occupancy"],
                            help="Occupancy schedules. Options: standard, low_occupancy, high_occupancy, early_shift, late_shift, retail, school, flexible_hybrid, hospital, gym, warehouse, 24_7")
    variation_group.add_argument("--wwr-ratios", nargs="+", type=float, 
                                default=[0.2, 0.4, 0.6, 0.8],
                                help="Window-to-wall ratios (0.0-1.0)")
    variation_group.add_argument("--thermal-scenarios", nargs="+", 
                            default=["default", "high_performance", "low_performance"],
                            help="Thermal scenarios. Options: default, high_performance, low_performance")
    
    # Building filtering options
    filter_group = parser.add_argument_group("Building Filtering Options")
    filter_group.add_argument("--building-type", 
                         choices=["office", "retail", "school", "hospital", "warehouse", "hotel", "apartment", "restaurant", "healthcare"])
    filter_group.add_argument("--climate-zone", 
                             help="Filter by climate zone (e.g., 4A, 5A, 6A)")
    filter_group.add_argument("--variation-type", choices=["base", "occupancy", "windows", "thermal", "combined"],
                             help="Filter by variation type")
    filter_group.add_argument("--occupancy-schedule", 
                         choices=["standard", "low_occupancy", "high_occupancy", "early_shift", 
                                 "late_shift", "retail", "school", "flexible_hybrid", 
                                 "hospital", "gym", "warehouse", "24_7"],
                         help="Filter by occupancy schedule")
    filter_group.add_argument("--thermal-scenario", 
                         choices=["default", "high_performance", "low_performance"],
                         help="Filter by thermal scenario")
    filter_group.add_argument("--min-floor-area", type=float, help="Minimum floor area (m²)")
    filter_group.add_argument("--max-floor-area", type=float, help="Maximum floor area (m²)")
    filter_group.add_argument("--min-wwr", type=float, help="Minimum window-to-wall ratio")
    filter_group.add_argument("--max-wwr", type=float, help="Maximum window-to-wall ratio")
    
    # Table configuration
    table_group = parser.add_argument_group("Table Configuration")
    table_group.add_argument("--update-existing", action="store_true",
                            help="Update existing table instead of creating new")
    
    args = parser.parse_args()
    
    # Configure logging
    configure_logging(args.log_level)
    logger = logging.getLogger(__name__)
    
    # Validate arguments
    if args.wwr_ratios:
        for wwr in args.wwr_ratios:
            if not 0.0 <= wwr <= 1.0:
                logger.error(f"WWR ratio must be between 0.0 and 1.0, got {wwr}")
                sys.exit(1)
    
    # Initialize pipeline
    try:
        pipeline = BuildingPipeline(args.data_dir)
        logger.info(f"Initialized pipeline with data directory: {args.data_dir}")
    except Exception as e:
        logger.error(f"Failed to initialize pipeline: {e}")
        sys.exit(1)
    
    # Check if any action is requested
    if not any([args.all, args.preprocess, args.occupancy_vars, 
               args.window_vars, args.thermal_vars, args.combined_vars, args.create_table, 
               args.query, args.stats, args.create_weather_table, 
               args.weather_stats, args.query_weather, args.validate_weather,
               args.match_buildings_weather, args.create_combinations, 
               args.simulation_stats, args.weather_vars, args.weather_variation_stats]):
        logger.error("No action specified. Use --help for options.")
        sys.exit(1)
    
    try:
        # Execute pipeline steps
        if args.all or args.preprocess:
            logger.info("Starting preprocessing...")
            processed, failed = pipeline.preprocess_buildings(
                add_meters=not args.no_meters,
                add_outdoor_vars=not args.no_outdoor_vars,
                timesteps_per_hour=args.timesteps_per_hour,
                add_setpoint_control=not args.no_setpoint_control,
                validate=not args.no_validation
            )
            logger.info(f"Preprocessed {len(processed)} buildings ({len(failed)} failed)")
        
        if args.all or args.occupancy_vars:
            logger.info("Creating occupancy variations...")
            count, failed = pipeline.create_occupancy_variations(args.occupancy_schedules)
            logger.info(f"Created {count} occupancy variations ({len(failed)} failed)")
        
        if args.all or args.window_vars:
            logger.info("Creating window variations...")
            count, failed = pipeline.create_window_variations(args.wwr_ratios)
            logger.info(f"Created {count} window variations ({len(failed)} failed)")
        
        if args.all or args.thermal_vars:
            logger.info("Creating thermal resistance variations...")
            count, failed = pipeline.create_thermal_variations(args.thermal_scenarios)
            logger.info(f"Created {count} thermal variations ({len(failed)} failed)")
        
            
        if args.all or args.combined_vars:
            logger.info("Creating combined variations...")
            # Create combinations of occupancy and thermal variations only
            combinations = []
            for occ in args.occupancy_schedules:
                for thermal in args.thermal_scenarios:
                    combinations.append({"occupancy": occ, "thermal": thermal})
            
            count, failed = pipeline.create_combined_variations(
                variation_types=["occupancy", "thermal"],  # Remove "windows"
                combinations=combinations
            )
            logger.info(f"Created {count} combined variations ({len(failed)} failed)")
                
        if args.all or args.create_table:
            logger.info("Creating building table...")
            table_file = pipeline.create_building_table(update_existing=args.update_existing)
            logger.info(f"Building table created: {table_file}")
        
        # Weather operations
        if args.create_weather_table:
            logger.info("Creating weather table...")
            try:
                from building_gen.database.weather_table import create_weather_table_with_real
                weather_dirs = [
                    Path(args.data_dir) / "weather/base",
                    Path(args.data_dir) / "weather/expanded",
                    Path(args.data_dir) / "weather/real"
                ]
                output_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
                df = create_weather_table_with_real(weather_dirs, output_path)
                logger.info(f"Created weather table with {len(df)} locations")
            except ImportError:
                logger.error("Weather table functionality not implemented yet")
            except Exception as e:
                logger.error(f"Failed to create weather table: {e}")
        
        if args.weather_stats:
            try:
                weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
                if weather_table_path.exists():
                    df = pd.read_csv(weather_table_path)
                    print("\n🌤️ Weather Collection Statistics:")
                    print(f"  Total locations: {len(df)}")
                    print(f"  Countries: {df['country'].nunique()}")
                    print(f"  Data sources: {df['data_source'].value_counts().to_dict()}")
                    print("\n  Top 10 countries by location count:")
                    for country, count in df['country'].value_counts().head(10).items():
                        print(f"    {country}: {count}")
                    
                    if 'climate_zone_code' in df.columns:
                        print(f"\n  Climate zones represented: {df['climate_zone_code'].nunique()}")
                        print("  Climate zone distribution:")
                        for zone, count in df['climate_zone_code'].value_counts().head(10).items():
                            print(f"    {zone}: {count}")
                else:
                    logger.error("Weather table not found. Run --create-weather-table first.")
            except Exception as e:
                logger.error(f"Failed to show weather statistics: {e}")
        
        if args.query_weather or args.export_weather:
            try:
                weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
                if weather_table_path.exists():
                    df = pd.read_csv(weather_table_path)
                    
                    # Apply filters
                    if args.country:
                        df = df[df['country'] == args.country]
                    if args.weather_climate_zone:
                        df = df[df['climate_zone_code'] == args.weather_climate_zone]
                    if args.data_source:
                        df = df[df['data_source'] == args.data_source]
                    if args.min_latitude:
                        df = df[df['latitude'] >= args.min_latitude]
                    if args.max_latitude:
                        df = df[df['latitude'] <= args.max_latitude]
                    if args.min_longitude:
                        df = df[df['longitude'] >= args.min_longitude]
                    if args.max_longitude:
                        df = df[df['longitude'] <= args.max_longitude]
                    
                    if args.query_weather:
                        print(f"\n Found {len(df)} weather locations matching criteria:")
                        for _, row in df.head(15).iterrows():
                            print(f"  {row['place']}, {row['country']}")
                            print(f"    Coordinates: {row['latitude']:.2f}, {row['longitude']:.2f}")
                            if 'climate_zone_code' in row:
                                print(f"    Climate zone: {row['climate_zone_code']}")
                            print(f"    Source: {row['data_source']}")
                            print()
                        
                        if len(df) > 15:
                            print(f"  ... and {len(df) - 15} more locations")
                    
                    if args.export_weather:
                        df.to_csv(args.export_weather, index=False)
                        logger.info(f"Exported {len(df)} weather locations to {args.export_weather}")
                else:
                    logger.error("Weather table not found. Run --create-weather-table first.")
            except Exception as e:
                logger.error(f"Failed to query weather: {e}")
        
        if args.validate_weather:
            logger.info("Validating weather files...")
            try:
                from ladybug.epw import EPW
                weather_dirs = [
                    Path(args.data_dir) / "weather/base",
                    Path(args.data_dir) / "weather/expanded",
                    Path(args.data_dir) / "weather/real"
                ]
                
                valid_count = 0
                invalid_count = 0
                
                for weather_dir in weather_dirs:
                    for epw_file in weather_dir.glob("*.epw"):
                        try:
                            weather = EPW(epw_file)
                            # Basic validation - check if we can read location data
                            _ = weather.location.city
                            _ = weather.location.latitude
                            _ = weather.location.longitude
                            valid_count += 1
                        except Exception as e:
                            logger.warning(f"Invalid weather file {epw_file}: {e}")
                            invalid_count += 1
                
                logger.info(f"Weather validation complete: {valid_count} valid, {invalid_count} invalid")
            except ImportError:
                logger.error("ladybug library not available for weather validation")
            except Exception as e:
                logger.error(f"Weather validation failed: {e}")
        
        # Weather variations
        if args.weather_vars:
            logger.info("Creating weather variations...")
            try:
                count, failed = pipeline.create_weather_variations(args.weather_types)
                logger.info(f"Created {count} weather variations ({len(failed)} failed)")
            except Exception as e:
                logger.error(f"Failed to create weather variations: {e}")

        if args.weather_variation_stats:
            try:
                stats = pipeline.get_weather_variation_stats()
                print("\n🌤️ Weather Variation Statistics:")
                for key, value in stats.items():
                    if isinstance(value, dict):
                        print(f"  {key}:")
                        for subkey, subvalue in value.items():
                            print(f"    {subkey}: {subvalue}")
                    else:
                        print(f"  {key}: {value}")
            except Exception as e:
                logger.error(f"Failed to show weather variation statistics: {e}")
        
        # Building-Weather Simulation operations
        if args.match_buildings_weather:
            logger.info("Matching buildings to weather files...")
            try:
                buildings_df = pipeline.match_buildings_to_weather()
                logger.info(f"Successfully matched {len(buildings_df)} buildings to weather files")
            except Exception as e:
                logger.error(f"Failed to match buildings to weather: {e}")
        
        if args.create_combinations:
            logger.info("Creating building-weather combinations...")
            try:
                buildings_path = Path(args.data_dir) / "tables/buildings.csv"
                weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
                
                buildings_df = pd.read_csv(buildings_path)
                weather_df = pd.read_csv(weather_path)
                
                combinations = create_building_weather_combinations(
                    buildings_df, 
                    weather_df, 
                    weather_df,  # Using same table for base_weather_table - adjust if you have a separate base weather table
                    args.simulation_climate_zones
                )
                
                # Save combinations to CSV
                combinations_df = pd.DataFrame(combinations, columns=['building_id', 'weather_id'])
                combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv"
                combinations_df.to_csv(combinations_path, index=False)
                
                logger.info(f"Created {len(combinations)} building-weather combinations")
                logger.info(f"Combinations saved to: {combinations_path}")
                
                if args.export_combinations:
                    combinations_df.to_csv(args.export_combinations, index=False)
                    logger.info(f"Exported combinations to: {args.export_combinations}")
                    
            except Exception as e:
                logger.error(f"Failed to create combinations: {e}")
        
        if args.simulation_stats:
            try:
                buildings_path = Path(args.data_dir) / "tables/buildings.csv"
                weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
                combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv"
                
                if not all([buildings_path.exists(), weather_path.exists()]):
                    logger.error("Building or weather tables not found. Run --create-table and --create-weather-table first.")
                else:
                    buildings_df = pd.read_csv(buildings_path)
                    weather_df = pd.read_csv(weather_path)
                    
                    print("\n🏢 Building-Weather Simulation Statistics:")
                    print(f"  Total buildings: {len(buildings_df)}")
                    print(f"  Total weather locations: {len(weather_df)}")
                    
                    # Buildings by climate zone
                    print("\n  Buildings by climate zone:")
                    for zone, count in buildings_df['climate_zone'].value_counts().items():
                        print(f"    {zone}: {count} buildings")
                    
                    # Weather files by climate zone
                    print("\n  Weather files by climate zone:")
                    for zone, count in weather_df['climate_zone_code'].value_counts().items():
                        print(f"    {zone}: {count} weather files")
                    
                    # Potential combinations by climate zone
                    print("\n  Potential combinations by climate zone:")
                    for zone in buildings_df['climate_zone'].unique():
                        building_count = len(buildings_df[buildings_df['climate_zone'] == zone])
                        weather_count = len(weather_df[weather_df['climate_zone_code'] == zone])
                        combinations = building_count * weather_count
                        print(f"    {zone}: {building_count} buildings × {weather_count} weather = {combinations} combinations")
                    
                    # Total potential combinations
                    total_potential = sum(
                        len(buildings_df[buildings_df['climate_zone'] == zone]) * 
                        len(weather_df[weather_df['climate_zone_code'] == zone])
                        for zone in buildings_df['climate_zone'].unique()
                    )
                    print(f"\n  Total potential combinations: {total_potential}")
                    
                    # Check if combinations have been created
                    if combinations_path.exists():
                        combinations_df = pd.read_csv(combinations_path)
                        print(f"  Created combinations: {len(combinations_df)}")
                    else:
                        print("  Created combinations: 0 (run --create-combinations)")
                        
            except Exception as e:
                logger.error(f"Failed to show simulation statistics: {e}")
        
        # Building query operations
        if args.query or args.stats or args.export:
            # Build filter dictionary
            filters = {}
            if args.building_type:
                filters['building_type'] = args.building_type
            if args.climate_zone:
                filters['climate_zone'] = args.climate_zone
            if args.variation_type:
                filters['variation_type'] = args.variation_type
            if args.occupancy_schedule:
                filters['occupancy_schedule'] = args.occupancy_schedule
            if args.thermal_scenario:
                filters['thermal_scenario'] = args.thermal_scenario
            
            # Build WWR range
            wwr_range = None
            if args.min_wwr or args.max_wwr:
                wwr_range = (args.min_wwr or 0.0, args.max_wwr or 1.0)
            
            # Query buildings
            if args.query or args.export:
                buildings = pipeline.get_buildings(
                    wwr_range=wwr_range,
                    min_floor_area=args.min_floor_area,
                    max_floor_area=args.max_floor_area,
                    **filters
                )
                
                if args.query:
                    logger.info(f"Found {len(buildings)} buildings matching criteria")
                    if buildings:
                        print("\nMatching buildings:")
                        for i, building in enumerate(buildings[:10], 1):  # Show first 10
                            print(f"  {i:2d}. {building['name']}")
                            print(f"      Type: {building['building_type']}, Climate: {building['climate_zone']}")
                            print(f"      Variation: {building['variation_type']}, Occupancy: {building['occupancy_schedule']}")
                            if 'thermal_scenario' in building:
                                print(f"      Thermal: {building['thermal_scenario']}, WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²")
                            else:
                                print(f"      WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²")
                            print()
                        
                        if len(buildings) > 10:
                            print(f"  ... and {len(buildings) - 10} more buildings")
                    else:
                        print("No buildings found matching the criteria")
                
                if args.export:
                    pipeline.export_building_list(args.export, **filters)
                    logger.info(f"Exported {len(buildings)} buildings to {args.export}")
            
            # Show statistics
            if args.stats:
                stats = pipeline.get_summary_stats()
                print("\n📊 Database Statistics:")
                for key, value in stats.items():
                    if isinstance(value, dict):
                        print(f"  {key}:")
                        for subkey, subvalue in value.items():
                            print(f"    {subkey}: {subvalue}")
                    else:
                        print(f"  {key}: {value}")
        
        logger.info("Pipeline execution completed successfully!")
        
    except KeyboardInterrupt:
        logger.info("Pipeline execution interrupted by user")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Pipeline execution failed: {e}")
        if args.log_level == "DEBUG":
            import traceback
            traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()