import io import os import sys import laspy import numpy as np import rasterio from rasterio.transform import from_origin from rasterio.crs import CRS from skimage.morphology import skeletonize, binary_closing, disk from scipy.spatial import cKDTree from tqdm import tqdm import multiprocessing from concurrent.futures import ProcessPoolExecutor, as_completed import networkx as nx # GIS Imports for Shapefiles try: import shapely from shapely.geometry import LineString, Point import geopandas as gpd HAS_GIS = True except ImportError: HAS_GIS = False print("WARNING: 'geopandas' or 'shapely' not found. Shapefile export will be skipped.") print("Install them via: pip install geopandas shapely") # Ensure UTF-8 output sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') def build_graph_from_skeleton(skeleton_grid): """ Converts a skeleton binary grid into a NetworkX graph. Nodes = (row, col) tuples. Edges = connections between 8-neighbor pixels. """ rows, cols = np.where(skeleton_grid) # Using a set for O(1) lookups is faster than querying the array pixel_set = set(zip(rows, cols)) G = nx.Graph() for r, c in pixel_set: G.add_node((r, c)) # Check neighbors (Right, Down, Diagonals) to avoid duplicate edges neighbors = [ (r, c+1), (r+1, c), (r+1, c+1), (r+1, c-1) ] for nr, nc in neighbors: if (nr, nc) in pixel_set: G.add_edge((r, c), (nr, nc)) return G def save_ply_with_edges(path, points_xyz, edges_indices): """ Saves a PLY file with vertices and edges (lines) for 3D viewing. """ with open(path, 'w') as f: f.write("ply\n") f.write("format ascii 1.0\n") f.write(f"element vertex {len(points_xyz)}\n") f.write("property float x\n") f.write("property float y\n") f.write("property float z\n") f.write(f"element edge {len(edges_indices)}\n") f.write("property int vertex1\n") f.write("property int vertex2\n") f.write("end_header\n") for p in points_xyz: f.write(f"{p[0]:.3f} {p[1]:.3f} {p[2]:.3f}\n") for e in edges_indices: f.write(f"{e[0]} {e[1]}\n") def extract_centerline_worker(args): """ Worker function for parallel processing. """ input_path, out_laz, out_tif, out_ply, out_shp, resolution, closing_radius, epsg = args filename = os.path.basename(input_path) result = {"success": False, "filename": filename, "message": ""} try: # --- 1. Load Data --- las = laspy.read(input_path) if len(las.points) == 0: result["message"] = "Skipped: File is empty." return result points_xyz = np.vstack((las.x, las.y, las.z)).transpose() # --- 2. Rasterization --- min_x, min_y = np.min(points_xyz[:, 0]), np.min(points_xyz[:, 1]) max_x, max_y = np.max(points_xyz[:, 0]), np.max(points_xyz[:, 1]) width = int(np.ceil((max_x - min_x) / resolution)) height = int(np.ceil((max_y - min_y) / resolution)) # Create grid idx_x = ((points_xyz[:, 0] - min_x) / resolution).astype(int) idx_y = ((max_y - points_xyz[:, 1]) / resolution).astype(int) idx_x = np.clip(idx_x, 0, width - 1) idx_y = np.clip(idx_y, 0, height - 1) grid = np.zeros((height, width), dtype=np.uint8) grid[idx_y, idx_x] = 1 # --- 3. Morphology & Skeletonization --- binary_grid = grid > 0 closed_grid = binary_closing(binary_grid, disk(closing_radius)) skeleton = skeletonize(closed_grid) skel_y_idx, skel_x_idx = np.where(skeleton) if len(skel_x_idx) == 0: result["message"] = "Skipped: No centerline found." return result # --- 4. 3D Recovery --- # Map Grid -> Real Coords skel_x = min_x + (skel_x_idx * resolution) + (resolution / 2) skel_y = max_y - (skel_y_idx * resolution) - (resolution / 2) # Z Recovery (Nearest Neighbor) tree = cKDTree(points_xyz[:, :2]) skel_xy = np.vstack((skel_x, skel_y)).transpose() dists, indices = tree.query(skel_xy, k=1) skel_z = points_xyz[indices, 2] # Array of final 3D points [x, y, z] final_points_xyz = np.vstack((skel_x, skel_y, skel_z)).transpose() # --- 5. Export Basic Formats (LAZ, TIF) --- # Export TIF transform = from_origin(min_x, max_y, resolution, resolution) crs_obj = CRS.from_epsg(epsg) with rasterio.open( out_tif, 'w', driver='GTiff', height=skeleton.shape[0], width=skeleton.shape[1], count=1, dtype=rasterio.uint8, crs=crs_obj, transform=transform, compress='lzw', nodata=0 ) as dst: dst.write(skeleton.astype(rasterio.uint8), 1) # Export LAZ header = laspy.LasHeader(point_format=3, version="1.2") header.scales = [0.001, 0.001, 0.001] header.offsets = [min_x, min_y, np.min(skel_z)] las_out = laspy.LasData(header) las_out.x = skel_x las_out.y = skel_y las_out.z = skel_z las_out.write(out_laz) # --- 6. Build Graph for Vectors --- G = build_graph_from_skeleton(skeleton) # Map (row, col) pixels to the index in `final_points_xyz` array # This allows us to look up the 3D coordinate for any graph node coord_map = { (r, c): i for i, (r, c) in enumerate(zip(skel_y_idx, skel_x_idx)) } # --- 7. Export PLY (3D Wireframe) --- edges_indices = [] for u, v in G.edges(): if u in coord_map and v in coord_map: edges_indices.append((coord_map[u], coord_map[v])) save_ply_with_edges(out_ply, final_points_xyz, edges_indices) # --- 8. Export SHP (Shapefile) --- if HAS_GIS: lines = [] # We treat every edge in the graph as a 3D line segment. # Merging them into long polylines is complex due to branching, # so keeping them as segments is the most robust method for raw extraction. for u, v in G.edges(): if u in coord_map and v in coord_map: idx_u = coord_map[u] idx_v = coord_map[v] p1 = final_points_xyz[idx_u] # (x, y, z) p2 = final_points_xyz[idx_v] # (x, y, z) # Create 3D LineString line = LineString([p1, p2]) lines.append(line) if lines: # Create GeoDataFrame gdf = gpd.GeoDataFrame(geometry=lines, crs=f"EPSG:{epsg}") # Save to Shapefile gdf.to_file(out_shp, driver="ESRI Shapefile") result["success"] = True return result except Exception as e: result["message"] = f"Error: {str(e)}" return result def batch_process_parallel(input_folder, output_folder, resolution, closing_radius, epsg): # 1. Setup Folders os.makedirs(output_folder, exist_ok=True) subfolders = { "tif": os.path.join(output_folder, "tif"), "laz": os.path.join(output_folder, "laz"), "ply": os.path.join(output_folder, "ply"), "shp": os.path.join(output_folder, "shp") } for path in subfolders.values(): os.makedirs(path, exist_ok=True) log_file = os.path.join(output_folder, "processing_log.txt") with open(log_file, 'w') as f: f.write(f"Batch Processing Log\n{'='*40}\n") # 2. Collect Files files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.laz', '.las'))] if not files: print("No .laz files found.") return # 3. CPU Config (90%) total_cores = os.cpu_count() or 1 max_workers = max(1, int(total_cores * 0.90)) print(f"\n{'='*60}") print(f"High-Performance Vector Extraction") print(f"Files: {len(files)} | Cores: {max_workers}") print(f"Shapefile Export: {'ENABLED' if HAS_GIS else 'DISABLED (install geopandas)'}") print(f"{'='*60}\n") # 4. Prepare Tasks tasks = [] for file in files: in_path = os.path.join(input_folder, file) base = os.path.splitext(file)[0] args = ( in_path, os.path.join(subfolders["laz"], f"{base}.laz"), os.path.join(subfolders["tif"], f"{base}.tif"), os.path.join(subfolders["ply"], f"{base}.ply"), os.path.join(subfolders["shp"], f"{base}.shp"), resolution, closing_radius, epsg ) tasks.append(args) # 5. Execute success_count = 0 fail_count = 0 with ProcessPoolExecutor(max_workers=max_workers) as executor: future_to_file = {executor.submit(extract_centerline_worker, t): t[0] for t in tasks} with tqdm(total=len(files), unit="file") as pbar: for future in as_completed(future_to_file): res = future.result() if res["success"]: success_count += 1 else: fail_count += 1 with open(log_file, 'a') as f: f.write(f"{res['filename']}: {res['message']}\n") pbar.update(1) print(f"\n{'='*60}") print(f"Done. Success: {success_count} | Failed: {fail_count}") print(f"Shapefiles saved in: {subfolders['shp']}") print(f"{'='*60}") # === Main Configuration === if __name__ == "__main__": # --- Input / Output --- input_directory = "0_Trail_Points" output_directory = "1_Vector_Results" # --- Parameters --- # Coordinate System (EPSG:28992 = Amersfoort / RD New) epsg_code = 28992 # Resolution (meters): 0.5 is good for trails resolution = 0.1 # Closing Radius (pixels): # Increase to 4-6 if trails are very broken/dotted. closing_radius = 4 # --- Run --- batch_process_parallel( input_directory, output_directory, resolution, closing_radius, epsg_code )