| """ |
| Advanced 3D Reconstruction from Single Images with Responsible AI Features |
| |
| """ |
|
|
| import gradio as gr |
| import numpy as np |
| import torch |
| from PIL import Image |
| from transformers import GLPNForDepthEstimation, GLPNImageProcessor |
| import open3d as o3d |
| import plotly.graph_objects as go |
| import matplotlib.pyplot as plt |
| import io |
| import json |
| import time |
| from pathlib import Path |
| import tempfile |
| import zipfile |
| import hashlib |
| from datetime import datetime |
|
|
| |
| |
| |
|
|
| RESPONSIBLE_AI_NOTICE = """ |
| ## ⚠️ Responsible Use Guidelines |
| |
| ### Privacy & Consent |
| - **Do not upload images containing identifiable people without their explicit consent** |
| - **Do not use for surveillance, tracking, or monitoring individuals** |
| - Facial features may be reconstructed in 3D - consider privacy implications |
| - Remove metadata (EXIF) that may contain location or personal information |
| |
| ### Ethical Use |
| - This tool is for **educational, research, and creative purposes only** |
| - **Prohibited uses:** |
| - Creating deepfakes or misleading 3D content |
| - Unauthorized documentation of private property |
| - Circumventing security systems |
| - Generating 3D models for harassment or stalking |
| - Commercial use without proper rights to source images |
| |
| ### Limitations & Bias |
| - Models trained primarily on indoor Western architecture |
| - May perform poorly on non-Western architectural styles |
| - Scale is relative, not absolute - not suitable for precision measurements |
| - Single viewpoint limitations - occluded areas are inferred, not captured |
| |
| ### Data Usage |
| - Images are processed locally during your session |
| - No images are stored or transmitted to external servers |
| - Processing logs contain only technical metrics, no image content |
| - You retain all rights to your uploaded images and generated 3D models |
| |
| |
| **By using this tool, you agree to these responsible use guidelines.** |
| """ |
|
|
| |
| |
| |
|
|
| def check_image_safety(image): |
| """Basic safety checks for uploaded images""" |
| warnings = [] |
| |
| width, height = image.size |
| if width * height > 10_000_000: |
| warnings.append("⚠️ Very large image - consider resizing to improve processing speed") |
| |
| aspect_ratio = max(width, height) / min(width, height) |
| if aspect_ratio > 3: |
| warnings.append("⚠️ Unusual aspect ratio detected - ensure image doesn't contain unintended content") |
| |
| try: |
| exif = image.getexif() |
| if exif: |
| has_gps = any(k for k in exif.keys() if k in [34853, 0x8825]) |
| if has_gps: |
| warnings.append("⚠️ GPS location data detected in image - consider removing EXIF data for privacy") |
| except: |
| pass |
| |
| return True, "\n".join(warnings) if warnings else None |
|
|
| def generate_session_id(): |
| """Generate anonymous session ID for logging""" |
| return hashlib.sha256(str(datetime.now()).encode()).hexdigest()[:16] |
|
|
| def content_policy_check(image): |
| """Check if image content violates usage policies""" |
| width, height = image.size |
| |
| if width < 100 or height < 100: |
| return False, "Image too small - minimum 100x100 pixels required for meaningful reconstruction" |
| |
| return True, None |
|
|
| |
| |
| |
|
|
| print("Loading GLPN model (lightweight)...") |
| try: |
| glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") |
| glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") |
| print("✓ GLPN model loaded successfully!") |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| glpn_processor = None |
| glpn_model = None |
|
|
| |
| dpt_model = None |
| dpt_processor = None |
|
|
| |
| |
| |
|
|
| def process_image(image, model_choice="GLPN (Recommended)", visualization_type="mesh"): |
| """Optimized processing pipeline""" |
| |
| def _generate_quality_assessment(metrics): |
| assessment = [] |
| outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100 |
| |
| if outlier_pct < 5: |
| assessment.append("Very clean depth estimation") |
| elif outlier_pct < 15: |
| assessment.append("Good depth quality") |
| else: |
| assessment.append("High noise in depth estimation") |
| |
| if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']: |
| assessment.append("Excellent topology") |
| elif metrics['is_vertex_manifold']: |
| assessment.append("Good local topology") |
| else: |
| assessment.append("Topology issues present") |
| |
| if metrics['is_watertight']: |
| assessment.append("Watertight mesh - ready for 3D printing!") |
| else: |
| assessment.append("Not watertight - needs repair for 3D printing") |
| |
| return "\n".join(f"- {item}" for item in assessment) |
| |
| if glpn_model is None: |
| return None, None, None, "❌ Model failed to load. Please refresh the page.", None |
| |
| try: |
| print("Starting reconstruction...") |
| |
| |
| new_height = 480 if image.height > 480 else image.height |
| new_height -= (new_height % 32) |
| new_width = int(new_height * image.width / image.height) |
| diff = new_width % 32 |
| new_width = new_width - diff if diff < 16 else new_width + (32 - diff) |
| new_size = (new_width, new_height) |
| image = image.resize(new_size, Image.LANCZOS) |
| |
| |
| if model_choice == "GLPN (Recommended)": |
| processor = glpn_processor |
| model = glpn_model |
| else: |
| global dpt_model, dpt_processor |
| if dpt_model is None: |
| print("Loading DPT model (first time only)...") |
| from transformers import DPTForDepthEstimation, DPTImageProcessor |
| dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large") |
| dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") |
| print("✓ DPT model loaded!") |
| processor = dpt_processor |
| model = dpt_model |
| |
| inputs = processor(images=image, return_tensors="pt") |
| |
| start_time = time.time() |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| predicted_depth = outputs.predicted_depth |
| depth_time = time.time() - start_time |
| |
| |
| pad = 16 |
| output = predicted_depth.squeeze().cpu().numpy() * 1000.0 |
| output = output[pad:-pad, pad:-pad] |
| image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) |
| |
| depth_height, depth_width = output.shape |
| img_width, img_height = image_cropped.size |
| |
| if depth_height != img_height or depth_width != img_width: |
| from scipy import ndimage |
| zoom_factors = (img_height / depth_height, img_width / depth_width) |
| output = ndimage.zoom(output, zoom_factors, order=1) |
| |
| image = image_cropped |
| |
| |
| fig, ax = plt.subplots(1, 2, figsize=(14, 7)) |
| ax[0].imshow(image) |
| ax[0].set_title('Original Image', fontsize=14, fontweight='bold') |
| ax[0].axis('off') |
| |
| im = ax[1].imshow(output, cmap='plasma') |
| ax[1].set_title('Estimated Depth Map', fontsize=14, fontweight='bold') |
| ax[1].axis('off') |
| plt.colorbar(im, ax=ax[1], fraction=0.046, pad=0.04) |
| plt.tight_layout() |
| |
| buf = io.BytesIO() |
| plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') |
| buf.seek(0) |
| depth_viz = Image.open(buf) |
| plt.close() |
| |
| |
| width, height = image.size |
| |
| if output.shape != (height, width): |
| from scipy import ndimage |
| zoom_factors = (height / output.shape[0], width / output.shape[1]) |
| output = ndimage.zoom(output, zoom_factors, order=1) |
| |
| depth_image = (output * 255 / np.max(output)).astype(np.uint8) |
| image_array = np.array(image) |
| |
| depth_o3d = o3d.geometry.Image(depth_image) |
| image_o3d = o3d.geometry.Image(image_array) |
| rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( |
| image_o3d, depth_o3d, convert_rgb_to_intensity=False |
| ) |
| |
| camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() |
| camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) |
| |
| pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) |
| initial_points = len(pcd.points) |
| |
| |
| cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) |
| pcd = pcd.select_by_index(ind) |
| outliers_removed = initial_points - len(pcd.points) |
| |
| |
| pcd.estimate_normals() |
| pcd.orient_normals_to_align_with_direction() |
| |
| |
| mesh_start = time.time() |
| mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( |
| pcd, depth=9, n_threads=1 |
| )[0] |
| |
| |
| pcd_tree = o3d.geometry.KDTreeFlann(pcd) |
| mesh_colors = [] |
| for vertex in mesh.vertices: |
| [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) |
| mesh_colors.append(pcd.colors[idx[0]]) |
| mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) |
| |
| rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) |
| mesh.rotate(rotation, center=(0, 0, 0)) |
| mesh_time = time.time() - mesh_start |
| |
| |
| mesh.compute_vertex_normals() |
| |
| metrics = { |
| 'model_used': model_choice, |
| 'depth_estimation_time': f"{depth_time:.2f}s", |
| 'mesh_reconstruction_time': f"{mesh_time:.2f}s", |
| 'total_time': f"{depth_time + mesh_time:.2f}s", |
| 'initial_points': initial_points, |
| 'outliers_removed': outliers_removed, |
| 'final_points': len(pcd.points), |
| 'vertices': len(mesh.vertices), |
| 'triangles': len(mesh.triangles), |
| 'is_edge_manifold': mesh.is_edge_manifold(), |
| 'is_vertex_manifold': mesh.is_vertex_manifold(), |
| 'is_watertight': mesh.is_watertight(), |
| } |
| |
| |
| try: |
| surface_area = mesh.get_surface_area() |
| if surface_area > 0: |
| metrics['surface_area'] = float(surface_area) |
| else: |
| vertices = np.asarray(mesh.vertices) |
| triangles = np.asarray(mesh.triangles) |
| v0 = vertices[triangles[:, 0]] |
| v1 = vertices[triangles[:, 1]] |
| v2 = vertices[triangles[:, 2]] |
| cross = np.cross(v1 - v0, v2 - v0) |
| areas = 0.5 * np.linalg.norm(cross, axis=1) |
| metrics['surface_area'] = float(np.sum(areas)) |
| except: |
| metrics['surface_area'] = "Unable to compute" |
| |
| |
| try: |
| if mesh.is_watertight(): |
| metrics['volume'] = float(mesh.get_volume()) |
| else: |
| metrics['volume'] = None |
| except: |
| metrics['volume'] = None |
| |
| |
| points = np.asarray(pcd.points) |
| colors = np.asarray(pcd.colors) |
| |
| if visualization_type == "point_cloud": |
| scatter = go.Scatter3d( |
| x=points[:, 0], y=points[:, 1], z=points[:, 2], |
| mode='markers', |
| marker=dict( |
| size=2, |
| color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) |
| for r, g, b in colors], |
| ), |
| name='Point Cloud' |
| ) |
| |
| plotly_fig = go.Figure(data=[scatter]) |
| plotly_fig.update_layout( |
| scene=dict( |
| xaxis=dict(visible=False), |
| yaxis=dict(visible=False), |
| zaxis=dict(visible=False), |
| aspectmode='data' |
| ), |
| height=700, |
| title="Point Cloud" |
| ) |
| else: |
| vertices = np.asarray(mesh.vertices) |
| triangles = np.asarray(mesh.triangles) |
| |
| if mesh.has_vertex_colors(): |
| vertex_colors = np.asarray(mesh.vertex_colors) |
| colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) |
| for r, g, b in vertex_colors] |
| |
| mesh_trace = go.Mesh3d( |
| x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], |
| i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], |
| vertexcolor=colors_rgb, |
| opacity=0.95 |
| ) |
| else: |
| mesh_trace = go.Mesh3d( |
| x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], |
| i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], |
| color='lightblue', |
| opacity=0.9 |
| ) |
| |
| plotly_fig = go.Figure(data=[mesh_trace]) |
| plotly_fig.update_layout( |
| scene=dict( |
| xaxis=dict(visible=False), |
| yaxis=dict(visible=False), |
| zaxis=dict(visible=False), |
| aspectmode='data' |
| ), |
| height=700, |
| title="3D Mesh" |
| ) |
| |
| |
| temp_dir = tempfile.mkdtemp() |
| |
| pcd_path = Path(temp_dir) / "point_cloud.ply" |
| o3d.io.write_point_cloud(str(pcd_path), pcd) |
| |
| mesh_path = Path(temp_dir) / "mesh.ply" |
| o3d.io.write_triangle_mesh(str(mesh_path), mesh) |
| |
| mesh_obj_path = Path(temp_dir) / "mesh.obj" |
| o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh) |
| |
| mesh_stl_path = Path(temp_dir) / "mesh.stl" |
| o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh) |
| |
| metrics_path = Path(temp_dir) / "metrics.json" |
| with open(metrics_path, 'w') as f: |
| json.dump(metrics, f, indent=2, default=str) |
| |
| zip_path = Path(temp_dir) / "reconstruction_complete.zip" |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: |
| zipf.write(pcd_path, pcd_path.name) |
| zipf.write(mesh_path, mesh_path.name) |
| zipf.write(mesh_obj_path, mesh_obj_path.name) |
| zipf.write(mesh_stl_path, mesh_stl_path.name) |
| zipf.write(metrics_path, metrics_path.name) |
| |
| assessment = _generate_quality_assessment(metrics) |
| |
| report = f""" |
| ## Reconstruction Complete! |
| |
| ### Performance |
| - **Processing Time**: {metrics['total_time']} |
| - **Points**: {metrics['final_points']:,} |
| - **Triangles**: {metrics['triangles']:,} |
| |
| ### Quality |
| - **Topology**: {'Good' if metrics['is_vertex_manifold'] else 'Issues'} |
| - **Watertight**: {'Yes' if metrics['is_watertight'] else 'No'} |
| |
| ### Assessment |
| {assessment} |
| |
| **Download the complete package below!** |
| """ |
| |
| return depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str) |
| |
| except Exception as e: |
| import traceback |
| return None, None, None, f"Error: {str(e)}\n\n{traceback.format_exc()}", None |
|
|
| def process_image_with_safeguards(image, model_choice="GLPN (Recommended)", visualization_type="mesh", consent_given=False): |
| """Main processing with safeguards""" |
| session_id = generate_session_id() |
| |
| if not consent_given: |
| return None, None, None, "**You must agree to the Responsible Use Guidelines first.**", None |
| |
| if image is None: |
| return None, None, None, "Please upload an image first.", None |
| |
| is_safe, safety_warning = check_image_safety(image) |
| passes_policy, policy_message = content_policy_check(image) |
| |
| if not passes_policy: |
| return None, None, None, f"{policy_message}", None |
| |
| try: |
| result = process_image(image, model_choice, visualization_type) |
| depth_viz, plotly_fig, zip_path, report, json_metrics = result |
| |
| if safety_warning: |
| report = f"**Privacy Notice:**\n{safety_warning}\n\n{report}" |
| |
| metrics = json.loads(json_metrics) |
| metrics['responsible_ai'] = { |
| 'session_id': session_id, |
| 'timestamp': datetime.now().isoformat(), |
| 'consent_given': True |
| } |
| |
| return depth_viz, plotly_fig, zip_path, report, json.dumps(metrics, indent=2) |
| |
| except Exception as e: |
| return None, None, None, f"Error: {str(e)}", None |
|
|
| |
| |
| |
|
|
| with gr.Blocks(title="Responsible AI 3D Reconstruction", theme=gr.themes.Soft()) as demo: |
| |
| gr.Markdown(""" |
| # 🏗️ 3D Reconstruction from Single Images |
| |
| |
| Transform 2D photographs into 3D spatial models |
| |
| <div style="background-color: #fff3cd; border: 2px solid #ffc107; padding: 15px; border-radius: 5px; margin: 10px 0;"> |
| <h3 style="color: #856404; margin-top: 0;">⚠️ Responsible Use Required</h3> |
| <p style="color: #856404; margin-bottom: 0;">This tool must be used ethically and legally. Review the guidelines in the <b>first tab</b>.</p> |
| </div> |
| """) |
| |
| with gr.Tabs(): |
| |
| with gr.Tab("⚠️ Responsible Use (READ FIRST)"): |
| gr.Markdown(RESPONSIBLE_AI_NOTICE) |
| gr.Markdown(""" |
| ### Known Limitations & Biases |
| - Trained primarily on Western indoor architecture |
| - May underperform on non-Western styles |
| - Scale is relative, not absolute |
| - Single viewpoint captures only visible surfaces |
| """) |
| |
| with gr.Tab("Reconstruction"): |
| consent_checkbox = gr.Checkbox( |
| label="**I have read and agree to the Responsible Use Guidelines**", |
| value=False |
| ) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| input_image = gr.Image( |
| type="pil", |
| label="Upload Image", |
| sources=["upload", "clipboard"] |
| ) |
| |
| model_choice = gr.Radio( |
| choices=["GLPN (Recommended)", "DPT (High Quality)"], |
| value="GLPN (Recommended)", |
| label="Depth Estimation Model" |
| ) |
| |
| visualization_type = gr.Radio( |
| choices=["mesh", "point_cloud"], |
| value="mesh", |
| label="Visualization Type" |
| ) |
| |
| reconstruct_btn = gr.Button("Start Reconstruction", variant="primary", size="lg") |
| |
| with gr.Column(scale=2): |
| depth_output = gr.Image(label="Depth Map") |
| viewer_3d = gr.Plot(label="Interactive 3D Viewer") |
| |
| with gr.Row(): |
| with gr.Column(): |
| metrics_output = gr.Markdown(label="Report") |
| with gr.Column(): |
| json_output = gr.Textbox(label="Metrics (JSON)", lines=8) |
| |
| download_output = gr.File(label="Download Package (ZIP)") |
| |
| reconstruct_btn.click( |
| fn=process_image_with_safeguards, |
| inputs=[input_image, model_choice, visualization_type, consent_checkbox], |
| outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output] |
| ) |
| |
| with gr.Tab("Theory & Background"): |
| gr.Markdown(""" |
| ## About This Tool |
| |
| This application demonstrates how artificial intelligence can convert single 2D photographs |
| into interactive 3D models automatically. |
| |
| ### What Makes This Special |
| |
| **Traditional Approach:** |
| - Need special equipment (3D scanner, multiple cameras) |
| - Requires technical expertise |
| - Time-consuming process |
| - Expensive |
| |
| **This AI Approach:** |
| - Works with any single photograph |
| - No special equipment needed |
| - Automatic processing |
| - Free and accessible |
| |
| |
| |
| ## The Technology |
| |
| ### AI Model Used: GLPN |
| |
| **GLPN (Global-Local Path Networks)** |
| - Paper: Kim et al., CVPR 2022 |
| - Optimized for: Indoor/outdoor architectural scenes |
| - Training: NYU Depth V2 (urban indoor environments) |
| - Best for: Building interiors, street-level views |
| - Speed: Fast (~0.3-2.5s) |
| |
| ### How It Works (Simplified) |
| |
| 1. **AI analyzes photo** → Recognizes objects, patterns, perspective |
| 2. **Estimates distance** → Figures out what's close, what's far |
| 3. **Creates 3D points** → Places colored dots in 3D space |
| 4. **Builds surface** → Connects dots into smooth shape |
| |
| ### Spatial Data Pipeline |
| |
| **1. Monocular Depth Estimation** |
| - Challenge: Extracting 3D spatial information from 2D photographs |
| - Application: Similar to photogrammetry but from single images |
| - Output: Relative depth maps for spatial analysis |
| |
| **2. Point Cloud Generation** |
| - Creates 3D coordinate system (X, Y, Z) from pixels |
| - Each point: Spatial location + RGB color information |
| - Compatible with: GIS software, CAD tools, spatial databases |
| |
| **3. 3D Mesh Generation** |
| - Creates continuous surface from discrete points |
| - Similar to: Digital terrain models (DTMs) for buildings |
| - Output formats: Compatible with ArcGIS, QGIS, SketchUp |
| |
| ### Quality Metrics Explained |
| |
| - **Point Cloud Density**: Higher points = better spatial resolution |
| - **Geometric Accuracy**: Manifold checks ensure valid topology |
| - **Surface Continuity**: Watertight meshes = complete volume calculations |
| - **Data Fidelity**: Triangle count indicates level of detail |
| |
| ### Limitations for Geographic Applications |
| |
| 1. **Scale Ambiguity**: Requires ground control points for absolute measurements |
| 2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces |
| 3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon) |
| 4. **Weather Dependent**: Best results with clear, well-lit conditions |
| |
| ### Comparison with Traditional Methods |
| |
| **vs. Terrestrial Laser Scanning (TLS):** |
| - Much cheaper, faster, more accessible |
| - Lower accuracy, no absolute scale |
| |
| **vs. Photogrammetry (Structure-from-Motion):** |
| - Works with single image, faster processing |
| - Less accurate, cannot resolve scale |
| |
| **vs. LiDAR:** |
| - Much lower cost, consumer cameras sufficient |
| - Lower precision, no absolute measurements |
| |
| |
| |
| ## Reconstruction Pipeline (10 Steps) |
| |
| 1. **Image Preprocessing**: Resize to model requirements |
| 2. **Depth Estimation**: Neural network inference |
| 3. **Depth Visualization**: Create comparison images |
| 4. **Point Cloud Generation**: Back-project using camera model |
| 5. **Outlier Removal**: Statistical filtering |
| 6. **Normal Estimation**: Surface orientation calculation |
| 7. **Mesh Reconstruction**: Poisson surface reconstruction |
| 8. **Quality Metrics**: Compute geometric measures |
| 9. **3D Visualization**: Create interactive viewer |
| 10. **File Export**: Generate multiple formats |
| |
| ### Key References |
| |
| 1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation |
| with Vertical CutDepth." *CVPR 2022* |
| 2. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." |
| *Eurographics Symposium on Geometry Processing* |
| """) |
| |
| with gr.Tab("Usage Guide"): |
| gr.Markdown(""" |
| ## How to Use This Application |
| |
| ### Step 1: Read Responsible Use Guidelines |
| - **REQUIRED**: Review the "Responsible Use" tab first |
| - Understand privacy implications |
| - Acknowledge model limitations and biases |
| - Ensure you have rights to use source images |
| |
| ### Step 2: Prepare Your Image |
| |
| **Best Practices:** |
| - Remove EXIF metadata (GPS, timestamps) for privacy |
| - Ensure you have consent if image contains people |
| - Use well-lit, clear photographs |
| - Recommended resolution: 512-1024 pixels |
| - Indoor scenes work best |
| |
| **Privacy Checklist:** |
| - [ ] No identifiable people (or consent obtained) |
| - [ ] No sensitive/private locations |
| - [ ] EXIF data removed |
| - [ ] You own rights to the image |
| |
| ### Step 3: Upload Image |
| - Click "Upload Image" area |
| - Select JPG, PNG, or BMP file |
| - **Note:** Webcam option removed for privacy protection |
| - You can also paste from clipboard |
| |
| ### Step 4: Check Consent Box |
| - Check "I have read and agree to Responsible Use Guidelines" |
| - This confirms you've reviewed ethical guidelines |
| - Processing won't start without consent |
| |
| ### Step 5: Choose Visualization |
| - **Mesh**: Solid 3D surface (recommended) |
| - **Point Cloud**: Individual 3D points with colors |
| |
| ### Step 6: Start Reconstruction |
| - Click "Start Reconstruction" |
| - Processing takes 10-60 seconds |
| - All processing is local (no cloud upload) |
| |
| ### Step 7: Explore Results |
| |
| **Depth Map:** |
| - Yellow/Red = Farther objects |
| - Purple/Blue = Closer objects |
| - Shows AI's depth understanding |
| |
| **3D Viewer:** |
| - Rotate: Click and drag |
| - Zoom: Scroll wheel |
| - Pan: Right-click and drag |
| - Reset: Double-click |
| |
| **Metrics Report:** |
| - Processing performance |
| - Quality indicators |
| - Topology validation |
| |
| ### Step 8: Download Files |
| - ZIP package contains: |
| - Point cloud (PLY) |
| - Mesh (PLY, OBJ, STL) |
| - Quality metrics (JSON) |
| - All files include responsible AI metadata |
| |
| |
| |
| ## Viewing Downloaded 3D Files |
| |
| ### Free Software Options: |
| |
| **MeshLab** (Recommended for beginners) |
| - Download: https://www.meshlab.net/ |
| - Open PLY, OBJ, STL files |
| - Great for viewing and basic editing |
| |
| **Blender** (For advanced users) |
| - Download: https://www.blender.org/ |
| - Import → Wavefront (.obj) or PLY |
| - Full 3D modeling and rendering capabilities |
| |
| **CloudCompare** (For point clouds) |
| - Download: https://www.cloudcompare.org/ |
| - Best for analyzing point cloud data |
| - Measurement and analysis tools |
| |
| **Online Viewers** (No installation) |
| - https://3dviewer.net/ |
| - https://www.creators3d.com/online-viewer |
| - Just drag and drop your OBJ/PLY file |
| |
| |
| |
| |
| ## Tips for Best Results |
| |
| ### DO: |
| - Use well-lit images |
| - Include depth cues (corners, edges) |
| - Indoor scenes work best |
| - Medium resolution (512-1024px) |
| - Remove personal metadata |
| - Obtain consent for people in images |
| |
| ### AVOID: |
| - Motion blur or low resolution |
| - Reflective surfaces (mirrors, glass) |
| - Images without consent |
| - Private property without permission |
| - Surveillance or monitoring purposes |
| - Heavy shadows or darkness |
| |
| |
| ## Understanding the Metrics |
| |
| ### Point Cloud Statistics: |
| - **Initial Points**: Raw points generated from depth |
| - **Outliers Removed**: Noisy points filtered out (typically 5-15%) |
| - **Final Points**: Clean points used for mesh generation |
| |
| ### Mesh Quality Indicators: |
| - ** Edge Manifold**: Each edge connects exactly 2 faces (good topology) |
| - ** Vertex Manifold**: Clean vertex connections |
| - ** Watertight**: No holes, ready for 3D printing |
| - ** Marks**: Indicate potential issues (still usable, may need repair) |
| |
| ### Processing Times: |
| - **Depth Estimation**: 0.3-2.5s (GLPN model) |
| - **Mesh Reconstruction**: 2-10s (depends on point cloud size) |
| - **Total Time**: Usually 10-60 seconds |
| |
| --- |
| |
| ## Troubleshooting |
| |
| **Problem: No output appears** |
| - Check browser console for errors |
| - Try refreshing the page |
| - Try a smaller/simpler image first |
| - Check that image uploaded successfully |
| |
| **Problem: Mesh has holes or artifacts** |
| - This is normal for single-view reconstruction |
| - Hidden surfaces cannot be reconstructed |
| - Use mesh repair tools in MeshLab if needed |
| |
| **Problem: Colors look wrong on mesh** |
| - Vertex color interpolation is approximate |
| - This is expected behavior |
| - Colors on point cloud are more accurate |
| |
| **Problem: Processing is very slow** |
| - Use smaller images |
| - This is normal on CPU (GPU is much faster) |
| |
| **Problem: "Not watertight" in metrics** |
| - Common for complex scenes |
| - Still usable for visualization |
| - For 3D printing: use mesh repair in MeshLab |
| """) |
| |
| with gr.Tab(" Ethics & Impact"): |
| gr.Markdown(""" |
| |
| ## Algorithmic Bias & Fairness |
| |
| ### Training Data Representation |
| |
| **Geographic Bias:** |
| - Heavy representation: North America, Europe |
| - Underrepresented: Africa, South Asia, Pacific Islands |
| - Impact: Lower accuracy for non-Western architecture |
| |
| **Architectural Style Bias:** |
| - Well-represented: Modern interiors, Western buildings |
| - Underrepresented: Traditional, vernacular, indigenous structures |
| - Impact: May misinterpret non-standard spatial layouts |
| |
| **Socioeconomic Bias:** |
| - Training data skewed toward middle/upper-class interiors |
| - Limited representation of informal settlements |
| - May not generalize well to all socioeconomic contexts |
| |
| |
| |
| |
| |
| ### Potential Harms |
| |
| ** Privacy Violations:** |
| - Unauthorized 3D reconstruction of private spaces |
| - Creating models of individuals without consent |
| - Surveillance and tracking applications |
| |
| ** Misinformation:** |
| - Generating fake 3D evidence |
| - Manipulating spatial understanding |
| - Creating misleading visualizations |
| |
| ** Property Rights:** |
| - Unauthorized documentation of copyrighted designs |
| - Intellectual property theft |
| - Commercial exploitation without permission |
| |
| ### Harm Prevention |
| |
| 1. **Mandatory consent**: Require user acknowledgment |
| 2. **Use case restriction**: Prohibit surveillance and deceptive uses |
| 3. **Privacy protection**: Disable webcam, encourage EXIF removal |
| 4. **Transparency**: Clear documentation of limitations |
| |
| |
| |
| ## Accountability & Governance |
| |
| ### User Responsibilities |
| |
| As a user, you are responsible for: |
| - Ensuring lawful use of source images |
| - Obtaining necessary consents and permissions |
| - Respecting privacy and intellectual property |
| - Using outputs ethically and transparently |
| - Understanding and accounting for model biases |
| |
| ### Developer Responsibilities |
| |
| This tool implements: |
| - Clear responsible use guidelines |
| - Privacy-protective design (no webcam, local processing) |
| - Bias documentation and transparency |
| - Prohibited use cases explicitly stated |
| |
| |
| ## Future Directions |
| |
| ### Improving Fairness |
| - Train on more diverse geographic datasets |
| - Include underrepresented architectural styles |
| - Develop bias mitigation techniques |
| - Community-driven model evaluation |
| |
| ### Enhancing Privacy |
| - Face/person detection and redaction |
| - Automatic EXIF stripping |
| - Differential privacy techniques |
| """) |
| |
| with gr.Tab(" Citation"): |
| gr.Markdown(""" |
| ## Academic Citation |
| |
| ### For GLPN Model: |
| ```bibtex |
| @inproceedings{kim2022global, |
| title={Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth}, |
| author={Kim, Doyeon and Ga, Woonghyun and Ahn, Pyungwhan and Joo, Donggyu and Chun, Sehwan and Kim, Junmo}, |
| booktitle={CVPR}, |
| year={2022} |
| } |
| ``` |
| |
| ### For Poisson Surface Reconstruction: |
| ```bibtex |
| @inproceedings{kazhdan2006poisson, |
| title={Poisson Surface Reconstruction}, |
| author={Kazhdan, Michael and Bolitho, Matthew and Hoppe, Hugues}, |
| booktitle={Symposium on Geometry Processing}, |
| year={2006} |
| } |
| ``` |
| |
| ## Open Source Components |
| |
| This application is built with: |
| |
| - **Transformers** (Hugging Face): Model inference framework |
| - **Open3D**: Point cloud and mesh processing |
| - **PyTorch**: Deep learning framework |
| - **Plotly**: Interactive 3D visualization |
| - **Gradio**: Web interface framework |
| - **NumPy** & **SciPy**: Numerical computing |
| - **Matplotlib**: Data visualization |
| - **Pillow (PIL)**: Image processing |
| |
| ## Model Credits |
| |
| **GLPN Model:** |
| - Developed by: KAIST (Korea Advanced Institute of Science and Technology) |
| - Hosted by: Hugging Face (vinvino02/glpn-nyu) |
| - License: Apache 2.0 |
| |
| ## Responsible AI Features |
| |
| This implementation includes: |
| - Privacy-protective design (no webcam option) |
| - Mandatory consent acknowledgment |
| - Bias documentation and transparency |
| - Ethical use guidelines |
| |
| |
| |
| """) |
| |
| gr.Markdown(""" |
| --- |
| |
| **Version:** 2.0 (Responsible AI Edition - Optimized) |
| **Last Updated:** 2025 |
| **License:** Educational and Research Use |
| |
| """) |
|
|
| if __name__ == "__main__": |
| print("="*60) |
| print("RESPONSIBLE AI 3D RECONSTRUCTION") |
| print("="*60) |
| print("✓ Lightweight model (GLPN only)") |
| print("✓ No webcam option") |
| print("✓ Local processing") |
| print("✓ Consent required") |
| print("="*60) |
| demo.launch(share=True) |