import argparse import logging import sys from pathlib import Path import pandas as pd sys.path.insert(0, str(Path(__file__).parent.parent)) from building_gen.core import BuildingPipeline, create_building_weather_combinations def configure_logging(level: str = "INFO"): log_level = getattr(logging, level.upper(), logging.INFO) logging.basicConfig( level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) # Reduce noise from external libraries logging.getLogger('googleapiclient').setLevel(logging.WARNING) logging.getLogger('google_auth_oauthlib').setLevel(logging.WARNING) def main(): parser = argparse.ArgumentParser( description="Building Processing Pipeline - Process and create variations of building energy models", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Process everything python scripts/main.py --all # Preprocess existing files python scripts/main.py --preprocess # Weather management python scripts/main.py --create-weather-table python scripts/main.py --weather-stats python scripts/main.py --query-weather --country USA --climate-zone 6A # Building-weather simulation python scripts/main.py --match-buildings-weather python scripts/main.py --create-combinations --simulation-climate-zones 6A 2A python scripts/main.py --simulation-stats # Weather variations python scripts/main.py --weather-vars python scripts/main.py --weather-vars --weather-types base real_weather python scripts/main.py --weather-variation-stats # Create specific variations python scripts/main.py --occupancy-vars --occupancy-schedules standard low_occupancy high_occupancy python scripts/main.py --window-vars --wwr-ratios 0.2 0.4 0.6 0.8 python scripts/main.py --thermal-vars --thermal-scenarios default high_performance low_performance # Create combined variations with thermal python scripts/main.py --combined-vars --thermal-scenarios default high_performance # Create table and query python scripts/main.py --create-table python scripts/main.py --query --building-type office --climate-zone 6A # Query and export python scripts/main.py --query --building-type office --export results.csv python scripts/main.py --query --thermal-scenario high_performance --export high_performance_buildings.csv """ ) # Global configuration parser.add_argument("--data-dir", default="data", help="Data directory (default: data)") parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO", help="Logging level (default: INFO)") # Step selection arguments step_group = parser.add_argument_group("Pipeline Steps") step_group.add_argument("--preprocess", action="store_true", help="Preprocess buildings (add meters, setpoints, etc.)") step_group.add_argument("--occupancy-vars", action="store_true", help="Create occupancy variations") step_group.add_argument("--window-vars", action="store_true", help="Create window variations") step_group.add_argument("--thermal-vars", action="store_true", help="Create thermal resistance variations") step_group.add_argument("--combined-vars", action="store_true", help="Create combined variations") step_group.add_argument("--create-table", action="store_true", help="Create/update building database table") step_group.add_argument("--all", action="store_true", help="Run all steps") # Weather management arguments weather_group = parser.add_argument_group("Weather Management") weather_group.add_argument("--create-weather-table", action="store_true", help="Create weather table from all EPW files") weather_group.add_argument("--weather-stats", action="store_true", help="Show weather collection statistics") weather_group.add_argument("--query-weather", action="store_true", help="Query weather locations") weather_group.add_argument("--export-weather", type=Path, help="Export weather query results to CSV file") weather_group.add_argument("--validate-weather", action="store_true", help="Validate all EPW files") weather_group.add_argument("--weather-vars", action="store_true", help="Create comprehensive weather variations for all buildings") weather_group.add_argument("--weather-types", nargs="+", default=["base", "climate_zone_expanded", "real_weather"], choices=["base", "climate_zone_expanded", "real_weather"], help="Weather variation types to create") weather_group.add_argument("--weather-variation-stats", action="store_true", help="Show weather variation statistics") # Building-Weather Simulation arguments simulation_group = parser.add_argument_group("Building-Weather Simulation") simulation_group.add_argument("--match-buildings-weather", action="store_true", help="Match buildings to weather files (add base_weather_id)") simulation_group.add_argument("--create-combinations", action="store_true", help="Create building-weather combinations for simulation") simulation_group.add_argument("--simulation-climate-zones", nargs="+", help="Climate zones to include in simulations (e.g., 6A 2A 4A)") simulation_group.add_argument("--simulation-stats", action="store_true", help="Show building-weather combination statistics") simulation_group.add_argument("--export-combinations", type=Path, help="Export building-weather combinations to CSV") # Weather filtering options weather_filter_group = parser.add_argument_group("Weather Filtering Options") weather_filter_group.add_argument("--country", help="Filter by country code (e.g., USA, CAN, CHN)") weather_filter_group.add_argument("--weather-climate-zone", help="Filter weather by climate zone") weather_filter_group.add_argument("--data-source", choices=["base", "expanded", "real"], help="Filter by data source") weather_filter_group.add_argument("--min-latitude", type=float, help="Minimum latitude") weather_filter_group.add_argument("--max-latitude", type=float, help="Maximum latitude") weather_filter_group.add_argument("--min-longitude", type=float, help="Minimum longitude") weather_filter_group.add_argument("--max-longitude", type=float, help="Maximum longitude") # Query arguments query_group = parser.add_argument_group("Building Query Options") query_group.add_argument("--query", action="store_true", help="Query buildings from database") query_group.add_argument("--export", type=Path, help="Export query results to CSV file") query_group.add_argument("--stats", action="store_true", help="Show database statistics") # Preprocessing configuration preprocess_group = parser.add_argument_group("Preprocessing Configuration") preprocess_group.add_argument("--no-meters", action="store_true", help="Skip adding HVAC meters") preprocess_group.add_argument("--no-outdoor-vars", action="store_true", help="Skip adding outdoor air variables") preprocess_group.add_argument("--timesteps-per-hour", type=int, default=4, help="Simulation timesteps per hour (default: 4)") preprocess_group.add_argument("--no-setpoint-control", action="store_true", help="Skip adding setpoint control") preprocess_group.add_argument("--no-validation", action="store_true", help="Skip validation of processed files") # Variation configuration variation_group = parser.add_argument_group("Variation Configuration") variation_group.add_argument("--occupancy-schedules", nargs="+", default=["standard", "low_occupancy", "high_occupancy"], help="Occupancy schedules. Options: standard, low_occupancy, high_occupancy, early_shift, late_shift, retail, school, flexible_hybrid, hospital, gym, warehouse, 24_7") variation_group.add_argument("--wwr-ratios", nargs="+", type=float, default=[0.2, 0.4, 0.6, 0.8], help="Window-to-wall ratios (0.0-1.0)") variation_group.add_argument("--thermal-scenarios", nargs="+", default=["default", "high_performance", "low_performance"], help="Thermal scenarios. Options: default, high_performance, low_performance") # Building filtering options filter_group = parser.add_argument_group("Building Filtering Options") filter_group.add_argument("--building-type", choices=["office", "retail", "school", "hospital", "warehouse", "hotel", "apartment", "restaurant", "healthcare"]) filter_group.add_argument("--climate-zone", help="Filter by climate zone (e.g., 4A, 5A, 6A)") filter_group.add_argument("--variation-type", choices=["base", "occupancy", "windows", "thermal", "combined"], help="Filter by variation type") filter_group.add_argument("--occupancy-schedule", choices=["standard", "low_occupancy", "high_occupancy", "early_shift", "late_shift", "retail", "school", "flexible_hybrid", "hospital", "gym", "warehouse", "24_7"], help="Filter by occupancy schedule") filter_group.add_argument("--thermal-scenario", choices=["default", "high_performance", "low_performance"], help="Filter by thermal scenario") filter_group.add_argument("--min-floor-area", type=float, help="Minimum floor area (m²)") filter_group.add_argument("--max-floor-area", type=float, help="Maximum floor area (m²)") filter_group.add_argument("--min-wwr", type=float, help="Minimum window-to-wall ratio") filter_group.add_argument("--max-wwr", type=float, help="Maximum window-to-wall ratio") # Table configuration table_group = parser.add_argument_group("Table Configuration") table_group.add_argument("--update-existing", action="store_true", help="Update existing table instead of creating new") args = parser.parse_args() # Configure logging configure_logging(args.log_level) logger = logging.getLogger(__name__) # Validate arguments if args.wwr_ratios: for wwr in args.wwr_ratios: if not 0.0 <= wwr <= 1.0: logger.error(f"WWR ratio must be between 0.0 and 1.0, got {wwr}") sys.exit(1) # Initialize pipeline try: pipeline = BuildingPipeline(args.data_dir) logger.info(f"Initialized pipeline with data directory: {args.data_dir}") except Exception as e: logger.error(f"Failed to initialize pipeline: {e}") sys.exit(1) # Check if any action is requested if not any([args.all, args.preprocess, args.occupancy_vars, args.window_vars, args.thermal_vars, args.combined_vars, args.create_table, args.query, args.stats, args.create_weather_table, args.weather_stats, args.query_weather, args.validate_weather, args.match_buildings_weather, args.create_combinations, args.simulation_stats, args.weather_vars, args.weather_variation_stats]): logger.error("No action specified. Use --help for options.") sys.exit(1) try: # Execute pipeline steps if args.all or args.preprocess: logger.info("Starting preprocessing...") processed, failed = pipeline.preprocess_buildings( add_meters=not args.no_meters, add_outdoor_vars=not args.no_outdoor_vars, timesteps_per_hour=args.timesteps_per_hour, add_setpoint_control=not args.no_setpoint_control, validate=not args.no_validation ) logger.info(f"Preprocessed {len(processed)} buildings ({len(failed)} failed)") if args.all or args.occupancy_vars: logger.info("Creating occupancy variations...") count, failed = pipeline.create_occupancy_variations(args.occupancy_schedules) logger.info(f"Created {count} occupancy variations ({len(failed)} failed)") if args.all or args.window_vars: logger.info("Creating window variations...") count, failed = pipeline.create_window_variations(args.wwr_ratios) logger.info(f"Created {count} window variations ({len(failed)} failed)") if args.all or args.thermal_vars: logger.info("Creating thermal resistance variations...") count, failed = pipeline.create_thermal_variations(args.thermal_scenarios) logger.info(f"Created {count} thermal variations ({len(failed)} failed)") if args.all or args.combined_vars: logger.info("Creating combined variations...") # Create combinations of occupancy and thermal variations only combinations = [] for occ in args.occupancy_schedules: for thermal in args.thermal_scenarios: combinations.append({"occupancy": occ, "thermal": thermal}) count, failed = pipeline.create_combined_variations( variation_types=["occupancy", "thermal"], # Remove "windows" combinations=combinations ) logger.info(f"Created {count} combined variations ({len(failed)} failed)") if args.all or args.create_table: logger.info("Creating building table...") table_file = pipeline.create_building_table(update_existing=args.update_existing) logger.info(f"Building table created: {table_file}") # Weather operations if args.create_weather_table: logger.info("Creating weather table...") try: from building_gen.database.weather_table import create_weather_table_with_real weather_dirs = [ Path(args.data_dir) / "weather/base", Path(args.data_dir) / "weather/expanded", Path(args.data_dir) / "weather/real" ] output_path = Path(args.data_dir) / "weather/tables/all_weather.csv" df = create_weather_table_with_real(weather_dirs, output_path) logger.info(f"Created weather table with {len(df)} locations") except ImportError: logger.error("Weather table functionality not implemented yet") except Exception as e: logger.error(f"Failed to create weather table: {e}") if args.weather_stats: try: weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv" if weather_table_path.exists(): df = pd.read_csv(weather_table_path) print("\n🌤️ Weather Collection Statistics:") print(f" Total locations: {len(df)}") print(f" Countries: {df['country'].nunique()}") print(f" Data sources: {df['data_source'].value_counts().to_dict()}") print("\n Top 10 countries by location count:") for country, count in df['country'].value_counts().head(10).items(): print(f" {country}: {count}") if 'climate_zone_code' in df.columns: print(f"\n Climate zones represented: {df['climate_zone_code'].nunique()}") print(" Climate zone distribution:") for zone, count in df['climate_zone_code'].value_counts().head(10).items(): print(f" {zone}: {count}") else: logger.error("Weather table not found. Run --create-weather-table first.") except Exception as e: logger.error(f"Failed to show weather statistics: {e}") if args.query_weather or args.export_weather: try: weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv" if weather_table_path.exists(): df = pd.read_csv(weather_table_path) # Apply filters if args.country: df = df[df['country'] == args.country] if args.weather_climate_zone: df = df[df['climate_zone_code'] == args.weather_climate_zone] if args.data_source: df = df[df['data_source'] == args.data_source] if args.min_latitude: df = df[df['latitude'] >= args.min_latitude] if args.max_latitude: df = df[df['latitude'] <= args.max_latitude] if args.min_longitude: df = df[df['longitude'] >= args.min_longitude] if args.max_longitude: df = df[df['longitude'] <= args.max_longitude] if args.query_weather: print(f"\n Found {len(df)} weather locations matching criteria:") for _, row in df.head(15).iterrows(): print(f" {row['place']}, {row['country']}") print(f" Coordinates: {row['latitude']:.2f}, {row['longitude']:.2f}") if 'climate_zone_code' in row: print(f" Climate zone: {row['climate_zone_code']}") print(f" Source: {row['data_source']}") print() if len(df) > 15: print(f" ... and {len(df) - 15} more locations") if args.export_weather: df.to_csv(args.export_weather, index=False) logger.info(f"Exported {len(df)} weather locations to {args.export_weather}") else: logger.error("Weather table not found. Run --create-weather-table first.") except Exception as e: logger.error(f"Failed to query weather: {e}") if args.validate_weather: logger.info("Validating weather files...") try: from ladybug.epw import EPW weather_dirs = [ Path(args.data_dir) / "weather/base", Path(args.data_dir) / "weather/expanded", Path(args.data_dir) / "weather/real" ] valid_count = 0 invalid_count = 0 for weather_dir in weather_dirs: for epw_file in weather_dir.glob("*.epw"): try: weather = EPW(epw_file) # Basic validation - check if we can read location data _ = weather.location.city _ = weather.location.latitude _ = weather.location.longitude valid_count += 1 except Exception as e: logger.warning(f"Invalid weather file {epw_file}: {e}") invalid_count += 1 logger.info(f"Weather validation complete: {valid_count} valid, {invalid_count} invalid") except ImportError: logger.error("ladybug library not available for weather validation") except Exception as e: logger.error(f"Weather validation failed: {e}") # Weather variations if args.weather_vars: logger.info("Creating weather variations...") try: count, failed = pipeline.create_weather_variations(args.weather_types) logger.info(f"Created {count} weather variations ({len(failed)} failed)") except Exception as e: logger.error(f"Failed to create weather variations: {e}") if args.weather_variation_stats: try: stats = pipeline.get_weather_variation_stats() print("\n🌤️ Weather Variation Statistics:") for key, value in stats.items(): if isinstance(value, dict): print(f" {key}:") for subkey, subvalue in value.items(): print(f" {subkey}: {subvalue}") else: print(f" {key}: {value}") except Exception as e: logger.error(f"Failed to show weather variation statistics: {e}") # Building-Weather Simulation operations if args.match_buildings_weather: logger.info("Matching buildings to weather files...") try: buildings_df = pipeline.match_buildings_to_weather() logger.info(f"Successfully matched {len(buildings_df)} buildings to weather files") except Exception as e: logger.error(f"Failed to match buildings to weather: {e}") if args.create_combinations: logger.info("Creating building-weather combinations...") try: buildings_path = Path(args.data_dir) / "tables/buildings.csv" weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv" buildings_df = pd.read_csv(buildings_path) weather_df = pd.read_csv(weather_path) combinations = create_building_weather_combinations( buildings_df, weather_df, weather_df, # Using same table for base_weather_table - adjust if you have a separate base weather table args.simulation_climate_zones ) # Save combinations to CSV combinations_df = pd.DataFrame(combinations, columns=['building_id', 'weather_id']) combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv" combinations_df.to_csv(combinations_path, index=False) logger.info(f"Created {len(combinations)} building-weather combinations") logger.info(f"Combinations saved to: {combinations_path}") if args.export_combinations: combinations_df.to_csv(args.export_combinations, index=False) logger.info(f"Exported combinations to: {args.export_combinations}") except Exception as e: logger.error(f"Failed to create combinations: {e}") if args.simulation_stats: try: buildings_path = Path(args.data_dir) / "tables/buildings.csv" weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv" combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv" if not all([buildings_path.exists(), weather_path.exists()]): logger.error("Building or weather tables not found. Run --create-table and --create-weather-table first.") else: buildings_df = pd.read_csv(buildings_path) weather_df = pd.read_csv(weather_path) print("\n🏢 Building-Weather Simulation Statistics:") print(f" Total buildings: {len(buildings_df)}") print(f" Total weather locations: {len(weather_df)}") # Buildings by climate zone print("\n Buildings by climate zone:") for zone, count in buildings_df['climate_zone'].value_counts().items(): print(f" {zone}: {count} buildings") # Weather files by climate zone print("\n Weather files by climate zone:") for zone, count in weather_df['climate_zone_code'].value_counts().items(): print(f" {zone}: {count} weather files") # Potential combinations by climate zone print("\n Potential combinations by climate zone:") for zone in buildings_df['climate_zone'].unique(): building_count = len(buildings_df[buildings_df['climate_zone'] == zone]) weather_count = len(weather_df[weather_df['climate_zone_code'] == zone]) combinations = building_count * weather_count print(f" {zone}: {building_count} buildings × {weather_count} weather = {combinations} combinations") # Total potential combinations total_potential = sum( len(buildings_df[buildings_df['climate_zone'] == zone]) * len(weather_df[weather_df['climate_zone_code'] == zone]) for zone in buildings_df['climate_zone'].unique() ) print(f"\n Total potential combinations: {total_potential}") # Check if combinations have been created if combinations_path.exists(): combinations_df = pd.read_csv(combinations_path) print(f" Created combinations: {len(combinations_df)}") else: print(" Created combinations: 0 (run --create-combinations)") except Exception as e: logger.error(f"Failed to show simulation statistics: {e}") # Building query operations if args.query or args.stats or args.export: # Build filter dictionary filters = {} if args.building_type: filters['building_type'] = args.building_type if args.climate_zone: filters['climate_zone'] = args.climate_zone if args.variation_type: filters['variation_type'] = args.variation_type if args.occupancy_schedule: filters['occupancy_schedule'] = args.occupancy_schedule if args.thermal_scenario: filters['thermal_scenario'] = args.thermal_scenario # Build WWR range wwr_range = None if args.min_wwr or args.max_wwr: wwr_range = (args.min_wwr or 0.0, args.max_wwr or 1.0) # Query buildings if args.query or args.export: buildings = pipeline.get_buildings( wwr_range=wwr_range, min_floor_area=args.min_floor_area, max_floor_area=args.max_floor_area, **filters ) if args.query: logger.info(f"Found {len(buildings)} buildings matching criteria") if buildings: print("\nMatching buildings:") for i, building in enumerate(buildings[:10], 1): # Show first 10 print(f" {i:2d}. {building['name']}") print(f" Type: {building['building_type']}, Climate: {building['climate_zone']}") print(f" Variation: {building['variation_type']}, Occupancy: {building['occupancy_schedule']}") if 'thermal_scenario' in building: print(f" Thermal: {building['thermal_scenario']}, WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²") else: print(f" WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²") print() if len(buildings) > 10: print(f" ... and {len(buildings) - 10} more buildings") else: print("No buildings found matching the criteria") if args.export: pipeline.export_building_list(args.export, **filters) logger.info(f"Exported {len(buildings)} buildings to {args.export}") # Show statistics if args.stats: stats = pipeline.get_summary_stats() print("\n📊 Database Statistics:") for key, value in stats.items(): if isinstance(value, dict): print(f" {key}:") for subkey, subvalue in value.items(): print(f" {subkey}: {subvalue}") else: print(f" {key}: {value}") logger.info("Pipeline execution completed successfully!") except KeyboardInterrupt: logger.info("Pipeline execution interrupted by user") sys.exit(1) except Exception as e: logger.error(f"Pipeline execution failed: {e}") if args.log_level == "DEBUG": import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()