Spaces:

BuildingBench
/

BuildingBench-Dataset

Sleeping

App Files Files Community

BuildingBench-Dataset / src /streamlit_app.py

BuildingBench

Update src/streamlit_app.py

dd4ed72 verified 8 months ago

raw

history blame contribute delete

30.7 kB

	import argparse
	import logging
	import sys
	from pathlib import Path
	import pandas as pd
	sys.path.insert(0, str(Path(__file__).parent.parent))
	from building_gen.core import BuildingPipeline, create_building_weather_combinations


	def configure_logging(level: str = "INFO"):
	log_level = getattr(logging, level.upper(), logging.INFO)

	logging.basicConfig(
	level=log_level,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S'
	)

	# Reduce noise from external libraries
	logging.getLogger('googleapiclient').setLevel(logging.WARNING)
	logging.getLogger('google_auth_oauthlib').setLevel(logging.WARNING)



	def main():
	parser = argparse.ArgumentParser(
	description="Building Processing Pipeline - Process and create variations of building energy models",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Process everything
	python scripts/main.py --all

	# Preprocess existing files
	python scripts/main.py --preprocess

	# Weather management
	python scripts/main.py --create-weather-table
	python scripts/main.py --weather-stats
	python scripts/main.py --query-weather --country USA --climate-zone 6A

	# Building-weather simulation
	python scripts/main.py --match-buildings-weather
	python scripts/main.py --create-combinations --simulation-climate-zones 6A 2A
	python scripts/main.py --simulation-stats

	# Weather variations
	python scripts/main.py --weather-vars
	python scripts/main.py --weather-vars --weather-types base real_weather
	python scripts/main.py --weather-variation-stats

	# Create specific variations
	python scripts/main.py --occupancy-vars --occupancy-schedules standard low_occupancy high_occupancy
	python scripts/main.py --window-vars --wwr-ratios 0.2 0.4 0.6 0.8
	python scripts/main.py --thermal-vars --thermal-scenarios default high_performance low_performance

	# Create combined variations with thermal
	python scripts/main.py --combined-vars --thermal-scenarios default high_performance

	# Create table and query
	python scripts/main.py --create-table
	python scripts/main.py --query --building-type office --climate-zone 6A

	# Query and export
	python scripts/main.py --query --building-type office --export results.csv
	python scripts/main.py --query --thermal-scenario high_performance --export high_performance_buildings.csv
	"""
	)

	# Global configuration
	parser.add_argument("--data-dir", default="data", help="Data directory (default: data)")
	parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
	default="INFO", help="Logging level (default: INFO)")

	# Step selection arguments
	step_group = parser.add_argument_group("Pipeline Steps")
	step_group.add_argument("--preprocess", action="store_true", help="Preprocess buildings (add meters, setpoints, etc.)")
	step_group.add_argument("--occupancy-vars", action="store_true", help="Create occupancy variations")
	step_group.add_argument("--window-vars", action="store_true", help="Create window variations")
	step_group.add_argument("--thermal-vars", action="store_true", help="Create thermal resistance variations")
	step_group.add_argument("--combined-vars", action="store_true", help="Create combined variations")
	step_group.add_argument("--create-table", action="store_true", help="Create/update building database table")
	step_group.add_argument("--all", action="store_true", help="Run all steps")

	# Weather management arguments
	weather_group = parser.add_argument_group("Weather Management")
	weather_group.add_argument("--create-weather-table", action="store_true",
	help="Create weather table from all EPW files")
	weather_group.add_argument("--weather-stats", action="store_true",
	help="Show weather collection statistics")
	weather_group.add_argument("--query-weather", action="store_true",
	help="Query weather locations")
	weather_group.add_argument("--export-weather", type=Path,
	help="Export weather query results to CSV file")
	weather_group.add_argument("--validate-weather", action="store_true",
	help="Validate all EPW files")
	weather_group.add_argument("--weather-vars", action="store_true",
	help="Create comprehensive weather variations for all buildings")
	weather_group.add_argument("--weather-types", nargs="+",
	default=["base", "climate_zone_expanded", "real_weather"],
	choices=["base", "climate_zone_expanded", "real_weather"],
	help="Weather variation types to create")
	weather_group.add_argument("--weather-variation-stats", action="store_true",
	help="Show weather variation statistics")

	# Building-Weather Simulation arguments
	simulation_group = parser.add_argument_group("Building-Weather Simulation")
	simulation_group.add_argument("--match-buildings-weather", action="store_true",
	help="Match buildings to weather files (add base_weather_id)")
	simulation_group.add_argument("--create-combinations", action="store_true",
	help="Create building-weather combinations for simulation")
	simulation_group.add_argument("--simulation-climate-zones", nargs="+",
	help="Climate zones to include in simulations (e.g., 6A 2A 4A)")
	simulation_group.add_argument("--simulation-stats", action="store_true",
	help="Show building-weather combination statistics")
	simulation_group.add_argument("--export-combinations", type=Path,
	help="Export building-weather combinations to CSV")

	# Weather filtering options
	weather_filter_group = parser.add_argument_group("Weather Filtering Options")
	weather_filter_group.add_argument("--country", help="Filter by country code (e.g., USA, CAN, CHN)")
	weather_filter_group.add_argument("--weather-climate-zone", help="Filter weather by climate zone")
	weather_filter_group.add_argument("--data-source", choices=["base", "expanded", "real"],
	help="Filter by data source")
	weather_filter_group.add_argument("--min-latitude", type=float, help="Minimum latitude")
	weather_filter_group.add_argument("--max-latitude", type=float, help="Maximum latitude")
	weather_filter_group.add_argument("--min-longitude", type=float, help="Minimum longitude")
	weather_filter_group.add_argument("--max-longitude", type=float, help="Maximum longitude")

	# Query arguments
	query_group = parser.add_argument_group("Building Query Options")
	query_group.add_argument("--query", action="store_true", help="Query buildings from database")
	query_group.add_argument("--export", type=Path, help="Export query results to CSV file")
	query_group.add_argument("--stats", action="store_true", help="Show database statistics")

	# Preprocessing configuration
	preprocess_group = parser.add_argument_group("Preprocessing Configuration")
	preprocess_group.add_argument("--no-meters", action="store_true",
	help="Skip adding HVAC meters")
	preprocess_group.add_argument("--no-outdoor-vars", action="store_true",
	help="Skip adding outdoor air variables")
	preprocess_group.add_argument("--timesteps-per-hour", type=int, default=4,
	help="Simulation timesteps per hour (default: 4)")
	preprocess_group.add_argument("--no-setpoint-control", action="store_true",
	help="Skip adding setpoint control")
	preprocess_group.add_argument("--no-validation", action="store_true",
	help="Skip validation of processed files")

	# Variation configuration
	variation_group = parser.add_argument_group("Variation Configuration")
	variation_group.add_argument("--occupancy-schedules", nargs="+",
	default=["standard", "low_occupancy", "high_occupancy"],
	help="Occupancy schedules. Options: standard, low_occupancy, high_occupancy, early_shift, late_shift, retail, school, flexible_hybrid, hospital, gym, warehouse, 24_7")
	variation_group.add_argument("--wwr-ratios", nargs="+", type=float,
	default=[0.2, 0.4, 0.6, 0.8],
	help="Window-to-wall ratios (0.0-1.0)")
	variation_group.add_argument("--thermal-scenarios", nargs="+",
	default=["default", "high_performance", "low_performance"],
	help="Thermal scenarios. Options: default, high_performance, low_performance")

	# Building filtering options
	filter_group = parser.add_argument_group("Building Filtering Options")
	filter_group.add_argument("--building-type",
	choices=["office", "retail", "school", "hospital", "warehouse", "hotel", "apartment", "restaurant", "healthcare"])
	filter_group.add_argument("--climate-zone",
	help="Filter by climate zone (e.g., 4A, 5A, 6A)")
	filter_group.add_argument("--variation-type", choices=["base", "occupancy", "windows", "thermal", "combined"],
	help="Filter by variation type")
	filter_group.add_argument("--occupancy-schedule",
	choices=["standard", "low_occupancy", "high_occupancy", "early_shift",
	"late_shift", "retail", "school", "flexible_hybrid",
	"hospital", "gym", "warehouse", "24_7"],
	help="Filter by occupancy schedule")
	filter_group.add_argument("--thermal-scenario",
	choices=["default", "high_performance", "low_performance"],
	help="Filter by thermal scenario")
	filter_group.add_argument("--min-floor-area", type=float, help="Minimum floor area (m²)")
	filter_group.add_argument("--max-floor-area", type=float, help="Maximum floor area (m²)")
	filter_group.add_argument("--min-wwr", type=float, help="Minimum window-to-wall ratio")
	filter_group.add_argument("--max-wwr", type=float, help="Maximum window-to-wall ratio")

	# Table configuration
	table_group = parser.add_argument_group("Table Configuration")
	table_group.add_argument("--update-existing", action="store_true",
	help="Update existing table instead of creating new")

	args = parser.parse_args()

	# Configure logging
	configure_logging(args.log_level)
	logger = logging.getLogger(__name__)

	# Validate arguments
	if args.wwr_ratios:
	for wwr in args.wwr_ratios:
	if not 0.0 <= wwr <= 1.0:
	logger.error(f"WWR ratio must be between 0.0 and 1.0, got {wwr}")
	sys.exit(1)

	# Initialize pipeline
	try:
	pipeline = BuildingPipeline(args.data_dir)
	logger.info(f"Initialized pipeline with data directory: {args.data_dir}")
	except Exception as e:
	logger.error(f"Failed to initialize pipeline: {e}")
	sys.exit(1)

	# Check if any action is requested
	if not any([args.all, args.preprocess, args.occupancy_vars,
	args.window_vars, args.thermal_vars, args.combined_vars, args.create_table,
	args.query, args.stats, args.create_weather_table,
	args.weather_stats, args.query_weather, args.validate_weather,
	args.match_buildings_weather, args.create_combinations,
	args.simulation_stats, args.weather_vars, args.weather_variation_stats]):
	logger.error("No action specified. Use --help for options.")
	sys.exit(1)

	try:
	# Execute pipeline steps
	if args.all or args.preprocess:
	logger.info("Starting preprocessing...")
	processed, failed = pipeline.preprocess_buildings(
	add_meters=not args.no_meters,
	add_outdoor_vars=not args.no_outdoor_vars,
	timesteps_per_hour=args.timesteps_per_hour,
	add_setpoint_control=not args.no_setpoint_control,
	validate=not args.no_validation
	)
	logger.info(f"Preprocessed {len(processed)} buildings ({len(failed)} failed)")

	if args.all or args.occupancy_vars:
	logger.info("Creating occupancy variations...")
	count, failed = pipeline.create_occupancy_variations(args.occupancy_schedules)
	logger.info(f"Created {count} occupancy variations ({len(failed)} failed)")

	if args.all or args.window_vars:
	logger.info("Creating window variations...")
	count, failed = pipeline.create_window_variations(args.wwr_ratios)
	logger.info(f"Created {count} window variations ({len(failed)} failed)")

	if args.all or args.thermal_vars:
	logger.info("Creating thermal resistance variations...")
	count, failed = pipeline.create_thermal_variations(args.thermal_scenarios)
	logger.info(f"Created {count} thermal variations ({len(failed)} failed)")


	if args.all or args.combined_vars:
	logger.info("Creating combined variations...")
	# Create combinations of occupancy and thermal variations only
	combinations = []
	for occ in args.occupancy_schedules:
	for thermal in args.thermal_scenarios:
	combinations.append({"occupancy": occ, "thermal": thermal})

	count, failed = pipeline.create_combined_variations(
	variation_types=["occupancy", "thermal"], # Remove "windows"
	combinations=combinations
	)
	logger.info(f"Created {count} combined variations ({len(failed)} failed)")

	if args.all or args.create_table:
	logger.info("Creating building table...")
	table_file = pipeline.create_building_table(update_existing=args.update_existing)
	logger.info(f"Building table created: {table_file}")

	# Weather operations
	if args.create_weather_table:
	logger.info("Creating weather table...")
	try:
	from building_gen.database.weather_table import create_weather_table_with_real
	weather_dirs = [
	Path(args.data_dir) / "weather/base",
	Path(args.data_dir) / "weather/expanded",
	Path(args.data_dir) / "weather/real"
	]
	output_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
	df = create_weather_table_with_real(weather_dirs, output_path)
	logger.info(f"Created weather table with {len(df)} locations")
	except ImportError:
	logger.error("Weather table functionality not implemented yet")
	except Exception as e:
	logger.error(f"Failed to create weather table: {e}")

	if args.weather_stats:
	try:
	weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
	if weather_table_path.exists():
	df = pd.read_csv(weather_table_path)
	print("\n🌤️ Weather Collection Statistics:")
	print(f" Total locations: {len(df)}")
	print(f" Countries: {df['country'].nunique()}")
	print(f" Data sources: {df['data_source'].value_counts().to_dict()}")
	print("\n Top 10 countries by location count:")
	for country, count in df['country'].value_counts().head(10).items():
	print(f" {country}: {count}")

	if 'climate_zone_code' in df.columns:
	print(f"\n Climate zones represented: {df['climate_zone_code'].nunique()}")
	print(" Climate zone distribution:")
	for zone, count in df['climate_zone_code'].value_counts().head(10).items():
	print(f" {zone}: {count}")
	else:
	logger.error("Weather table not found. Run --create-weather-table first.")
	except Exception as e:
	logger.error(f"Failed to show weather statistics: {e}")

	if args.query_weather or args.export_weather:
	try:
	weather_table_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
	if weather_table_path.exists():
	df = pd.read_csv(weather_table_path)

	# Apply filters
	if args.country:
	df = df[df['country'] == args.country]
	if args.weather_climate_zone:
	df = df[df['climate_zone_code'] == args.weather_climate_zone]
	if args.data_source:
	df = df[df['data_source'] == args.data_source]
	if args.min_latitude:
	df = df[df['latitude'] >= args.min_latitude]
	if args.max_latitude:
	df = df[df['latitude'] <= args.max_latitude]
	if args.min_longitude:
	df = df[df['longitude'] >= args.min_longitude]
	if args.max_longitude:
	df = df[df['longitude'] <= args.max_longitude]

	if args.query_weather:
	print(f"\n Found {len(df)} weather locations matching criteria:")
	for _, row in df.head(15).iterrows():
	print(f" {row['place']}, {row['country']}")
	print(f" Coordinates: {row['latitude']:.2f}, {row['longitude']:.2f}")
	if 'climate_zone_code' in row:
	print(f" Climate zone: {row['climate_zone_code']}")
	print(f" Source: {row['data_source']}")
	print()

	if len(df) > 15:
	print(f" ... and {len(df) - 15} more locations")

	if args.export_weather:
	df.to_csv(args.export_weather, index=False)
	logger.info(f"Exported {len(df)} weather locations to {args.export_weather}")
	else:
	logger.error("Weather table not found. Run --create-weather-table first.")
	except Exception as e:
	logger.error(f"Failed to query weather: {e}")

	if args.validate_weather:
	logger.info("Validating weather files...")
	try:
	from ladybug.epw import EPW
	weather_dirs = [
	Path(args.data_dir) / "weather/base",
	Path(args.data_dir) / "weather/expanded",
	Path(args.data_dir) / "weather/real"
	]

	valid_count = 0
	invalid_count = 0

	for weather_dir in weather_dirs:
	for epw_file in weather_dir.glob("*.epw"):
	try:
	weather = EPW(epw_file)
	# Basic validation - check if we can read location data
	_ = weather.location.city
	_ = weather.location.latitude
	_ = weather.location.longitude
	valid_count += 1
	except Exception as e:
	logger.warning(f"Invalid weather file {epw_file}: {e}")
	invalid_count += 1

	logger.info(f"Weather validation complete: {valid_count} valid, {invalid_count} invalid")
	except ImportError:
	logger.error("ladybug library not available for weather validation")
	except Exception as e:
	logger.error(f"Weather validation failed: {e}")

	# Weather variations
	if args.weather_vars:
	logger.info("Creating weather variations...")
	try:
	count, failed = pipeline.create_weather_variations(args.weather_types)
	logger.info(f"Created {count} weather variations ({len(failed)} failed)")
	except Exception as e:
	logger.error(f"Failed to create weather variations: {e}")

	if args.weather_variation_stats:
	try:
	stats = pipeline.get_weather_variation_stats()
	print("\n🌤️ Weather Variation Statistics:")
	for key, value in stats.items():
	if isinstance(value, dict):
	print(f" {key}:")
	for subkey, subvalue in value.items():
	print(f" {subkey}: {subvalue}")
	else:
	print(f" {key}: {value}")
	except Exception as e:
	logger.error(f"Failed to show weather variation statistics: {e}")

	# Building-Weather Simulation operations
	if args.match_buildings_weather:
	logger.info("Matching buildings to weather files...")
	try:
	buildings_df = pipeline.match_buildings_to_weather()
	logger.info(f"Successfully matched {len(buildings_df)} buildings to weather files")
	except Exception as e:
	logger.error(f"Failed to match buildings to weather: {e}")

	if args.create_combinations:
	logger.info("Creating building-weather combinations...")
	try:
	buildings_path = Path(args.data_dir) / "tables/buildings.csv"
	weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv"

	buildings_df = pd.read_csv(buildings_path)
	weather_df = pd.read_csv(weather_path)

	combinations = create_building_weather_combinations(
	buildings_df,
	weather_df,
	weather_df, # Using same table for base_weather_table - adjust if you have a separate base weather table
	args.simulation_climate_zones
	)

	# Save combinations to CSV
	combinations_df = pd.DataFrame(combinations, columns=['building_id', 'weather_id'])
	combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv"
	combinations_df.to_csv(combinations_path, index=False)

	logger.info(f"Created {len(combinations)} building-weather combinations")
	logger.info(f"Combinations saved to: {combinations_path}")

	if args.export_combinations:
	combinations_df.to_csv(args.export_combinations, index=False)
	logger.info(f"Exported combinations to: {args.export_combinations}")

	except Exception as e:
	logger.error(f"Failed to create combinations: {e}")

	if args.simulation_stats:
	try:
	buildings_path = Path(args.data_dir) / "tables/buildings.csv"
	weather_path = Path(args.data_dir) / "weather/tables/all_weather.csv"
	combinations_path = Path(args.data_dir) / "tables/building_weather_combinations.csv"

	if not all([buildings_path.exists(), weather_path.exists()]):
	logger.error("Building or weather tables not found. Run --create-table and --create-weather-table first.")
	else:
	buildings_df = pd.read_csv(buildings_path)
	weather_df = pd.read_csv(weather_path)

	print("\n🏢 Building-Weather Simulation Statistics:")
	print(f" Total buildings: {len(buildings_df)}")
	print(f" Total weather locations: {len(weather_df)}")

	# Buildings by climate zone
	print("\n Buildings by climate zone:")
	for zone, count in buildings_df['climate_zone'].value_counts().items():
	print(f" {zone}: {count} buildings")

	# Weather files by climate zone
	print("\n Weather files by climate zone:")
	for zone, count in weather_df['climate_zone_code'].value_counts().items():
	print(f" {zone}: {count} weather files")

	# Potential combinations by climate zone
	print("\n Potential combinations by climate zone:")
	for zone in buildings_df['climate_zone'].unique():
	building_count = len(buildings_df[buildings_df['climate_zone'] == zone])
	weather_count = len(weather_df[weather_df['climate_zone_code'] == zone])
	combinations = building_count * weather_count
	print(f" {zone}: {building_count} buildings × {weather_count} weather = {combinations} combinations")

	# Total potential combinations
	total_potential = sum(
	len(buildings_df[buildings_df['climate_zone'] == zone]) *
	len(weather_df[weather_df['climate_zone_code'] == zone])
	for zone in buildings_df['climate_zone'].unique()
	)
	print(f"\n Total potential combinations: {total_potential}")

	# Check if combinations have been created
	if combinations_path.exists():
	combinations_df = pd.read_csv(combinations_path)
	print(f" Created combinations: {len(combinations_df)}")
	else:
	print(" Created combinations: 0 (run --create-combinations)")

	except Exception as e:
	logger.error(f"Failed to show simulation statistics: {e}")

	# Building query operations
	if args.query or args.stats or args.export:
	# Build filter dictionary
	filters = {}
	if args.building_type:
	filters['building_type'] = args.building_type
	if args.climate_zone:
	filters['climate_zone'] = args.climate_zone
	if args.variation_type:
	filters['variation_type'] = args.variation_type
	if args.occupancy_schedule:
	filters['occupancy_schedule'] = args.occupancy_schedule
	if args.thermal_scenario:
	filters['thermal_scenario'] = args.thermal_scenario

	# Build WWR range
	wwr_range = None
	if args.min_wwr or args.max_wwr:
	wwr_range = (args.min_wwr or 0.0, args.max_wwr or 1.0)

	# Query buildings
	if args.query or args.export:
	buildings = pipeline.get_buildings(
	wwr_range=wwr_range,
	min_floor_area=args.min_floor_area,
	max_floor_area=args.max_floor_area,
	**filters
	)

	if args.query:
	logger.info(f"Found {len(buildings)} buildings matching criteria")
	if buildings:
	print("\nMatching buildings:")
	for i, building in enumerate(buildings[:10], 1): # Show first 10
	print(f" {i:2d}. {building['name']}")
	print(f" Type: {building['building_type']}, Climate: {building['climate_zone']}")
	print(f" Variation: {building['variation_type']}, Occupancy: {building['occupancy_schedule']}")
	if 'thermal_scenario' in building:
	print(f" Thermal: {building['thermal_scenario']}, WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²")
	else:
	print(f" WWR: {building['window_wall_ratio']:.0%}, Floor area: {building['floor_area']:.0f} m²")
	print()

	if len(buildings) > 10:
	print(f" ... and {len(buildings) - 10} more buildings")
	else:
	print("No buildings found matching the criteria")

	if args.export:
	pipeline.export_building_list(args.export, **filters)
	logger.info(f"Exported {len(buildings)} buildings to {args.export}")

	# Show statistics
	if args.stats:
	stats = pipeline.get_summary_stats()
	print("\n📊 Database Statistics:")
	for key, value in stats.items():
	if isinstance(value, dict):
	print(f" {key}:")
	for subkey, subvalue in value.items():
	print(f" {subkey}: {subvalue}")
	else:
	print(f" {key}: {value}")

	logger.info("Pipeline execution completed successfully!")

	except KeyboardInterrupt:
	logger.info("Pipeline execution interrupted by user")
	sys.exit(1)
	except Exception as e:
	logger.error(f"Pipeline execution failed: {e}")
	if args.log_level == "DEBUG":
	import traceback
	traceback.print_exc()
	sys.exit(1)

	if __name__ == "__main__":
	main()