BuildingBench-Dataset / src /streamlit_app.py
BuildingBench's picture
Update src/streamlit_app.py
dd4ed72 verified
import argparse
import logging
import sys
from pathlib import Path
import pandas as pd
sys.path.insert(0, str(Path(__file__).parent.parent))
from building_gen.core import BuildingPipeline, create_building_weather_combinations
def configure_logging(level: str = "INFO"):
log_level = getattr(logging, level.upper(), logging.INFO)
logging.basicConfig(
level=log_level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# Reduce noise from external libraries
logging.getLogger('googleapiclient').setLevel(logging.WARNING)
logging.getLogger('google_auth_oauthlib').setLevel(logging.WARNING)
def main():
parser = argparse.ArgumentParser(
description="Building Processing Pipeline - Process and create variations of building energy models",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Process everything
python scripts/main.py --all
# Preprocess existing files
python scripts/main.py --preprocess
# Weather management
python scripts/main.py --create-weather-table
python scripts/main.py --weather-stats
python scripts/main.py --query-weather --country USA --climate-zone 6A
# Building-weather simulation
python scripts/main.py --match-buildings-weather
python scripts/main.py --create-combinations --simulation-climate-zones 6A 2A
python scripts/main.py --simulation-stats
# Weather variations
python scripts/main.py --weather-vars
python scripts/main.py --weather-vars --weather-types base real_weather
python scripts/main.py --weather-variation-stats
# Create specific variations
python scripts/main.py --occupancy-vars --occupancy-schedules standard low_occupancy high_occupancy
python scripts/main.py --window-vars --wwr-ratios 0.2 0.4 0.6 0.8
python scripts/main.py --thermal-vars --thermal-scenarios default high_performance low_performance
# Create combined variations with thermal
python scripts/main.py --combined-vars --thermal-scenarios default high_performance
# Create table and query
python scripts/main.py --create-table
python scripts/main.py --query --building-type office --climate-zone 6A
# Query and export
python scripts/main.py --query --building-type office --export results.csv
python scripts/main.py --query --thermal-scenario high_performance --export high_performance_buildings.csv
"""
)
# Global configuration
parser.add_argument("--data-dir", default="data", help="Data directory (default: data)")
parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO", help="Logging level (default: INFO)")
# Step selection arguments
step_group = parser.add_argument_group("Pipeline Steps")
step_group.add_argument("--preprocess", action="store_true", help="Preprocess buildings (add meters, setpoints, etc.)")
step_group.add_argument("--occupancy-vars", action="store_true", help="Create occupancy variations")
step_group.add_argument("--window-vars", action="store_true", help="Create window variations")
step_group.add_argument("--thermal-vars", action="store_true", help="Create thermal resistance variations")
step_group.add_argument("--combined-vars", action="store_true", help="Create combined variations")
step_group.add_argument("--create-table", action="store_true", help="Create/update building database table")
step_group.add_argument("--all", action="store_true", help="Run all steps")
# Weather management arguments
weather_group = parser.add_argument_group("Weather Management")
weather_group.add_argument("--create-weather-table", action="store_true",
help="Create weather table from all EPW files")
weather_group.add_argument("--weather-stats", action="store_true",
help="Show weather collection statistics")
weather_group.add_argument("--query-weather", action="store_true",
help="Query weather locations")
weather_group.add_argument("--export-weather", type=Path,
help="Export weather query results to CSV file")
weather_group.add_argument("--validate-weather", action="store_true",
help="Validate all EPW files")
weather_group.add_argument("--weather-vars", action="store_true",
help="Create comprehensive weather variations for all buildings")
weather_group.add_argument("--weather-types", nargs="+",
default=["base", "climate_zone_expanded", "real_weather"],
choices=["base", "climate_zone_expanded", "real_weather"],
help="Weather variation types to create")
weather_group.add_argument("--weather-variation-stats", action="store_true",
help="Show weather variation statistics")
# Building-Weather Simulation arguments
simulation_group = parser.add_argument_group("Building-Weather Simulation")
simulation_group.add_argument("--match-buildings-weather", action="store_true",
help="Match buildings to weather files (add base_weather_id)")
simulation_group.add_argument("--create-combinations", action="store_true",
help="Create building-weather combinations for simulation")
simulation_group.add_argument("--simulation-climate-zones", nargs="+",
help="Climate zones to include in simulations (e.g., 6A 2A 4A)")
simulation_group.add_argument("--simulation-stats", action="store_true",
help="Show building-weather combination statistics")
simulation_group.add_argument("--export-combinations", type=Path,
help="Export building-weather combinations to CSV")
# Weather filtering options
weather_filter_group = parser.add_argument_group("Weather Filtering Options")
weather_filter_group.add_argument("--country", help="Filter by country code (e.g., USA, CAN, CHN)")
weather_filter_group.add_argument("--weather-climate-zone", help="Filter weather by climate zone")
weather_filter_group.add_argument("--data-source", choices=["base", "expanded", "real"],
help="Filter by data source")
weather_filter_group.add_argument("--min-latitude", type=float, help="Minimum latitude")
weather_filter_group.add_argument("--max-latitude", type=float, help="Maximum latitude")
weather_filter_group.add_argument("--min-longitude", type=float, help="Minimum longitude")
weather_filter_group.add_argument("--max-longitude", type=float, help="Maximum longitude")
# Query arguments
query_group = parser.add_argument_group("Building Query Options")
query_group.add_argument("--query", action="store_true", help="Query buildings from database")
query_group.add_argument("--export", type=Path, help="Export query results to CSV file")
query_group.add_argument("--stats", action="store_true", help="Show database statistics")
# Preprocessing configuration
preprocess_group = parser.add_argument_group("Preprocessing Configuration")
preprocess_group.add_argument("--no-meters", action="store_true",
help="Skip adding HVAC meters")
preprocess_group.add_argument("--no-outdoor-vars", action="store_true",
help="Skip adding outdoor air variables")
preprocess_group.add_argument("--timesteps-per-hour", type=int, default=4,
help="Simulation timesteps per hour (default: 4)")
preprocess_group.add_argument("--no-setpoint-control", action="store_true",
help="Skip adding setpoint control")
preprocess_group.add_argument("--no-validation", action="store_true",
help="Skip validation of processed files")
# Variation configuration
variation_group = parser.add_argument_group("Variation Configuration")
variation_group.add_argument("--occupancy-schedules", nargs="+",
default=["standard", "low_occupancy", "high_occupancy"],
help="Occupancy schedules. Options: standard, low_occupancy, high_occupancy, early_shift, late_shift, retail, school, flexible_hybrid, hospital, gym, warehouse, 24_7")
variation_group.add_argument("--wwr-ratios", nargs="+", type=float,
default=[0.2, 0.4, 0.6, 0.8],
help="Window-to-wall ratios (0.0-1.0)")
variation_group.add_argument("--thermal-scenarios", nargs="+",
default=["default", "high_performance", "low_performance"],
help="Thermal scenarios. Options: default, high_performance, low_performance")
# Building filtering options
filter_group = parser.add_argument_group("Building Filtering Options")
filter_group.add_argument("--building-type",
choices=["office", "retail", "school", "hospital", "warehouse", "hotel", "apartment", "restaurant", "healthcare"])
filter_group.add_argument("--climate-zone",
help="Filter by climate zone (e.g., 4A, 5A, 6A)")
filter_group.add_argument("--variation-type", choices=["base", "occupancy", "windows", "thermal", "combined"],
help="Filter by variation type")
filter_group.add_argument("--occupancy-schedule",
choices=["standard", "low_occupancy", "high_occupancy", "early_shift",
"late_shift", "retail", "school", "flexible_hybrid",
"hospital", "gym", "warehouse", "24_7"],
help="Filter by occupancy schedule")
filter_group.add_argument("--thermal-scenario",
choices=["default", "high_performance", "low_performance"],
help="Filter by thermal scenario")
filter_group.add_argument("--min-floor-area", type=float, help="Minimum floor area (m²)")
filter_group.add_argument("--max-floor-area", type=float, help="Maximum floor area (m²)")
filter_group.add_argument("--min-wwr", type=float, help="Minimum window-to-wall ratio")
filter_group.add_argument("--max-wwr", type=float, help="Maximum window-to-wall ratio")
# Table configuration
table_group = parser.add_argument_group("Table Configuration")
table_group.add_argument("--update-existing", action="store_true",
help="Update existing table instead of creating new")
args = parser.parse_args()
# Configure logging
configure_logging(args.log_level)
logger = logging.getLogger(__name__)
# Validate arguments
if args.wwr_ratios:
for wwr in args.wwr_ratios:
if not 0.0 <= wwr <= 1.0:
logger.error(f"WWR ratio must be between 0.0 and 1.0, got {wwr}")
sys.exit(1)
# Initialize pipeline
try:
pipeline = BuildingPipeline(args.data_dir)
logger.info(f"Initialized pipeline with data directory: {args.data_dir}")
except Exception as e:
logger.error(f"Failed to initialize pipeline: {e}")
sys.exit(1)
# Check if any action is requested
if not any([args.all, args.preprocess, args.occupancy_vars,
args.window_vars, args.thermal_vars, args.combined_vars, args.create_table,
args.query, args.stats, args.create_weather_table,
args.weather_stats, args.query_weather, args.validate_weather,
args.match_buildings_weather, args.create_combinations,
args.simulation_stats, args.weather_vars, args.weather_variation_stats]):
logger.error("No action specified. Use --help for options.")
sys.exit(1)
try:
# Execute pipeline steps
if args.all or args.preprocess:
logger.info("Starting preprocessing...")
processed, failed = pipeline.preprocess_buildings(
add_meters=not args.no_meters,
add_outdoor_vars=not args.no_outdoor_vars,
timesteps_per_hour=args.timesteps_per_hour,
add_setpoint_control=not args.no_setpoint_control,
validate=not args.no_validation
)
logger.info(f"Preprocessed {len(processed)} buildings ({len(failed)} failed)")
if args.all or args.occupancy_vars:
logger.info("Creating occupancy variations...")
count, failed = pipeline.create_occupancy_variations(args.occupancy_schedules)
logger.info(f"Created {count} occupancy variations ({len(failed)} failed)")
if args.all or args.window_vars:
logger.info("Creating window variations...")
count, failed = pipeline.create_window_variations(args.wwr_ratios)
logger.info(f"Created {count} window variations ({len(failed)} failed)")
if args.all or args.thermal_vars:
logger.info("Creating thermal resistance variations...")
count, failed = pipeline.create_thermal_variations(args.thermal_scenarios)
logger.info(f"Created {count} thermal variations ({len(failed)} failed)")
if args.all or args.combined_vars:
logger.info("Creating combined variations...")
# Create combinations of occupancy and thermal variations only
combinations = []
for occ in args.occupancy_schedules:
for thermal in args.thermal_scenarios:
combinations.append({"occupancy": occ, "thermal": thermal})
count, failed = pipeline.create_combined_variations(
variation_types=["occupancy", "thermal"], # Remove "windows"
combinations=combinations
)
logger.info(f"Created {count} combined variations ({len(failed)} failed)")
if args.all or args.create_table:
logger.info("Creating building table...")
table_file = pipeline.create_building_table(update_existing=args.update_existing)
logger.info(f"Building table created: {table_file}")
# Weather operations
if args.create_weather_table:
logger.info("Creating weather table...")
try:
from building_gen.database.weather_table import create_weather_table_with_real
weather_dirs = [
Path(args.data_dir) / "weather/base",
Path(args.data_dir) / "weather/expanded",
Path(args.data_dir) / "weather/real"
]
output_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
df = create_weather_table_with_real(weather_dirs, output_path)
logger.info(f"Created weather table with {len(df)} locations")
except ImportError:
logger.error("Weather table functionality not implemented yet")
except Exception as e:
logger.error(f"Failed to create weather table: {e}")
if args.weather_stats:
try:
weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
if weather_table_path.exists():
df = pd.read_csv(weather_table_path)
print("\n🌤️ Weather Collection Statistics:")
print(f" Total locations: {len(df)}")
print(f" Countries: {df['country'].nunique()}")
print(f" Data sources: {df['data_source'].value_counts().to_dict()}")
print("\n Top 10 countries by location count:")
for country, count in df['country'].value_counts().head(10).items():
print(f" {country}: {count}")
if 'climate_zone_code' in df.columns:
print(f"\n Climate zones represented: {df['climate_zone_code'].nunique()}")
print(" Climate zone distribution:")
for zone, count in df['climate_zone_code'].value_counts().head(10).items():
print(f" {zone}: {count}")
else:
logger.error("Weather table not found. Run --create-weather-table first.")
except Exception as e:
logger.error(f"Failed to show weather statistics: {e}")
if args.query_weather or args.export_weather:
try:
weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
if weather_table_path.exists():
df = pd.read_csv(weather_table_path)
# Apply filters
if args.country:
df = df[df['country'] == args.country]
if args.weather_climate_zone:
df = df[df['climate_zone_code'] == args.weather_climate_zone]
if args.data_source:
df = df[df['data_source'] == args.data_source]
if args.min_latitude:
df = df[df['latitude'] >= args.min_latitude]
if args.max_latitude:
df = df[df['latitude'] <= args.max_latitude]
if args.min_longitude:
df = df[df['longitude'] >= args.min_longitude]
if args.max_longitude:
df = df[df['longitude'] <= args.max_longitude]
if args.query_weather:
print(f"\n Found {len(df)} weather locations matching criteria:")
for _, row in df.head(15).iterrows():
print(f" {row['place']}, {row['country']}")
print(f" Coordinates: {row['latitude']:.2f}, {row['longitude']:.2f}")
if 'climate_zone_code' in row:
print(f" Climate zone: {row['climate_zone_code']}")
print(f" Source: {row['data_source']}")
print()
if len(df) > 15:
print(f" ... and {len(df) - 15} more locations")
if args.export_weather:
df.to_csv(args.export_weather, index=False)
logger.info(f"Exported {len(df)} weather locations to {args.export_weather}")
else:
logger.error("Weather table not found. Run --create-weather-table first.")
except Exception as e:
logger.error(f"Failed to query weather: {e}")
if args.validate_weather:
logger.info("Validating weather files...")
try:
from ladybug.epw import EPW
weather_dirs = [
Path(args.data_dir) / "weather/base",
Path(args.data_dir) / "weather/expanded",
Path(args.data_dir) / "weather/real"
]
valid_count = 0
invalid_count = 0
for weather_dir in weather_dirs:
for epw_file in weather_dir.glob("*.epw"):
try:
weather = EPW(epw_file)
# Basic validation - check if we can read location data
_ = weather.location.city
_ = weather.location.latitude
_ = weather.location.longitude
valid_count += 1
except Exception as e:
logger.warning(f"Invalid weather file {epw_file}: {e}")
invalid_count += 1
logger.info(f"Weather validation complete: {valid_count} valid, {invalid_count} invalid")
except ImportError:
logger.error("ladybug library not available for weather validation")
except Exception as e:
logger.error(f"Weather validation failed: {e}")
# Weather variations
if args.weather_vars:
logger.info("Creating weather variations...")
try:
count, failed = pipeline.create_weather_variations(args.weather_types)
logger.info(f"Created {count} weather variations ({len(failed)} failed)")
except Exception as e:
logger.error(f"Failed to create weather variations: {e}")
if args.weather_variation_stats:
try:
stats = pipeline.get_weather_variation_stats()
print("\n🌤️ Weather Variation Statistics:")
for key, value in stats.items():
if isinstance(value, dict):
print(f" {key}:")
for subkey, subvalue in value.items():
print(f" {subkey}: {subvalue}")
else:
print(f" {key}: {value}")
except Exception as e:
logger.error(f"Failed to show weather variation statistics: {e}")
# Building-Weather Simulation operations
if args.match_buildings_weather:
logger.info("Matching buildings to weather files...")
try:
buildings_df = pipeline.match_buildings_to_weather()
logger.info(f"Successfully matched {len(buildings_df)} buildings to weather files")
except Exception as e:
logger.error(f"Failed to match buildings to weather: {e}")
if args.create_combinations:
logger.info("Creating building-weather combinations...")
try:
buildings_path = Path(args.data_dir) / "tables/buildings.csv"
weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
buildings_df = pd.read_csv(buildings_path)
weather_df = pd.read_csv(weather_path)
combinations = create_building_weather_combinations(
buildings_df,
weather_df,
weather_df, # Using same table for base_weather_table - adjust if you have a separate base weather table
args.simulation_climate_zones
)
# Save combinations to CSV
combinations_df = pd.DataFrame(combinations, columns=['building_id', 'weather_id'])
combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv"
combinations_df.to_csv(combinations_path, index=False)
logger.info(f"Created {len(combinations)} building-weather combinations")
logger.info(f"Combinations saved to: {combinations_path}")
if args.export_combinations:
combinations_df.to_csv(args.export_combinations, index=False)
logger.info(f"Exported combinations to: {args.export_combinations}")
except Exception as e:
logger.error(f"Failed to create combinations: {e}")
if args.simulation_stats:
try:
buildings_path = Path(args.data_dir) / "tables/buildings.csv"
weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv"
if not all([buildings_path.exists(), weather_path.exists()]):
logger.error("Building or weather tables not found. Run --create-table and --create-weather-table first.")
else:
buildings_df = pd.read_csv(buildings_path)
weather_df = pd.read_csv(weather_path)
print("\n🏢 Building-Weather Simulation Statistics:")
print(f" Total buildings: {len(buildings_df)}")
print(f" Total weather locations: {len(weather_df)}")
# Buildings by climate zone
print("\n Buildings by climate zone:")
for zone, count in buildings_df['climate_zone'].value_counts().items():
print(f" {zone}: {count} buildings")
# Weather files by climate zone
print("\n Weather files by climate zone:")
for zone, count in weather_df['climate_zone_code'].value_counts().items():
print(f" {zone}: {count} weather files")
# Potential combinations by climate zone
print("\n Potential combinations by climate zone:")
for zone in buildings_df['climate_zone'].unique():
building_count = len(buildings_df[buildings_df['climate_zone'] == zone])
weather_count = len(weather_df[weather_df['climate_zone_code'] == zone])
combinations = building_count * weather_count
print(f" {zone}: {building_count} buildings × {weather_count} weather = {combinations} combinations")
# Total potential combinations
total_potential = sum(
len(buildings_df[buildings_df['climate_zone'] == zone]) *
len(weather_df[weather_df['climate_zone_code'] == zone])
for zone in buildings_df['climate_zone'].unique()
)
print(f"\n Total potential combinations: {total_potential}")
# Check if combinations have been created
if combinations_path.exists():
combinations_df = pd.read_csv(combinations_path)
print(f" Created combinations: {len(combinations_df)}")
else:
print(" Created combinations: 0 (run --create-combinations)")
except Exception as e:
logger.error(f"Failed to show simulation statistics: {e}")
# Building query operations
if args.query or args.stats or args.export:
# Build filter dictionary
filters = {}
if args.building_type:
filters['building_type'] = args.building_type
if args.climate_zone:
filters['climate_zone'] = args.climate_zone
if args.variation_type:
filters['variation_type'] = args.variation_type
if args.occupancy_schedule:
filters['occupancy_schedule'] = args.occupancy_schedule
if args.thermal_scenario:
filters['thermal_scenario'] = args.thermal_scenario
# Build WWR range
wwr_range = None
if args.min_wwr or args.max_wwr:
wwr_range = (args.min_wwr or 0.0, args.max_wwr or 1.0)
# Query buildings
if args.query or args.export:
buildings = pipeline.get_buildings(
wwr_range=wwr_range,
min_floor_area=args.min_floor_area,
max_floor_area=args.max_floor_area,
**filters
)
if args.query:
logger.info(f"Found {len(buildings)} buildings matching criteria")
if buildings:
print("\nMatching buildings:")
for i, building in enumerate(buildings[:10], 1): # Show first 10
print(f" {i:2d}. {building['name']}")
print(f" Type: {building['building_type']}, Climate: {building['climate_zone']}")
print(f" Variation: {building['variation_type']}, Occupancy: {building['occupancy_schedule']}")
if 'thermal_scenario' in building:
print(f" Thermal: {building['thermal_scenario']}, WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²")
else:
print(f" WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²")
print()
if len(buildings) > 10:
print(f" ... and {len(buildings) - 10} more buildings")
else:
print("No buildings found matching the criteria")
if args.export:
pipeline.export_building_list(args.export, **filters)
logger.info(f"Exported {len(buildings)} buildings to {args.export}")
# Show statistics
if args.stats:
stats = pipeline.get_summary_stats()
print("\n📊 Database Statistics:")
for key, value in stats.items():
if isinstance(value, dict):
print(f" {key}:")
for subkey, subvalue in value.items():
print(f" {subkey}: {subvalue}")
else:
print(f" {key}: {value}")
logger.info("Pipeline execution completed successfully!")
except KeyboardInterrupt:
logger.info("Pipeline execution interrupted by user")
sys.exit(1)
except Exception as e:
logger.error(f"Pipeline execution failed: {e}")
if args.log_level == "DEBUG":
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()