[ ]:
from IPython.display import Markdown, display
import inspect
import pathlib
import ast
def display_task_source(task_func, title):
"""Display task source code from the original file, including decorators but excluding docstrings.
Only shows code from @flowtask decorator to end of function, excluding the function's docstring.
"""
# Get the module where the function is defined
module = inspect.getmodule(task_func)
func_name = task_func.__name__
if not module or not hasattr(module, '__file__'):
display(Markdown(f"❌ Could not find source file for `{func_name}`"))
return
# Read the entire source file
source_file = pathlib.Path(module.__file__)
with open(source_file, 'r', encoding='utf-8') as f:
file_content = f.read()
# Parse the AST to find the function
tree = ast.parse(file_content)
# Find the function definition in the AST
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == func_name:
# Get line numbers (1-indexed)
start_line = node.lineno
end_line = node.end_lineno
# Read the lines
lines = file_content.splitlines()
# Look backwards from function def to find @flowtask decorator
decorator_start = start_line - 1 # Convert to 0-indexed
while decorator_start > 0:
line = lines[decorator_start - 1].strip()
if line.startswith('@flowtask'):
break
decorator_start -= 1
# Extract from decorator to end of function
source_lines = lines[decorator_start - 1:end_line]
# Now remove the docstring if present
# The docstring is the first statement in the function body
if node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant):
if isinstance(node.body[0].value.value, str):
# Found a docstring - get its line range
docstring_start = node.body[0].lineno - 1 # Convert to 0-indexed
docstring_end = node.body[0].end_lineno - 1 # Convert to 0-indexed
# Calculate relative positions in source_lines array
relative_doc_start = docstring_start - (decorator_start - 1)
relative_doc_end = docstring_end - (decorator_start - 1)
# Remove docstring lines from source_lines
source_lines = source_lines[:relative_doc_start] + source_lines[relative_doc_end + 1:]
source_code = '\n'.join(source_lines)
# Display as markdown
markdown_content = f"""
### `{func_name}`
```python
{source_code}
```
"""
display(Markdown(markdown_content))
return
# Fallback if AST parsing fails
display(Markdown(f"❌ Could not parse function `{func_name}` from source file"))
Fabwave - Part Classification using HOOPS AI
[2]:
import hoops_ai
import os
hoops_ai.set_license(hoops_ai.use_test_license(), validate=False)
ℹ️ Using TEST LICENSE (expires December 8, 2025 - 37 days remaining)
For production use, obtain your own license from Tech Soft 3D
[3]:
# Using a test flow name for 10-file test
from cad_tasks_fabwave import get_flow_name
flow_name = get_flow_name()
print(f"Flow name: {flow_name}")
Flow name: ETL_Fabwave_training
[4]:
import os
import pathlib
from typing import Tuple, List
# Import the flow builder framework from the library
import hoops_ai
from hoops_ai.flowmanager import flowtask
from hoops_ai.cadaccess import HOOPSLoader, HOOPSTools
from hoops_ai.cadencoder import BrepEncoder
from hoops_ai.dataset import DatasetExplorer
from hoops_ai.storage import DataStorage, CADFileRetriever, LocalStorageProvider
from hoops_ai.storage.datasetstorage.schema_builder import SchemaBuilder
Configuring Setup
[5]:
# Configuration - Using simpler paths
nb_dir = pathlib.Path.cwd()
flows_outputdir = nb_dir.joinpath("out")
# Import task functions from external module for ProcessPoolExecutor compatibility
from cad_tasks_fabwave import gather_fabwave_files, encode_data_for_ml_training, my_workflow_for_fabewave, get_flow_name
ETL Data pipeline
[6]:
# data source
datasources_dir = [str(nb_dir.parent.joinpath("packages","cadfiles","fabwave"))
#str(nb_dir.parent.joinpath("packages","cadfiles","fabwave", "CAD_1_15_Classes", "Bolts")),
#str(nb_dir.parent.joinpath("packages","cadfiles","fabwave", "CAD_1_15_Classes", "Bushing_Damping_Liners")),
#str(nb_dir.parent.joinpath("packages","cadfiles","fabwave", "CAD25-45_TOTAL1000", "Sleeve Washers"))
]
[7]:
display_task_source(gather_fabwave_files, "gather_fabwave_files")
gather_fabwave_files
@flowtask.extract(
name="gather fabwave files",
inputs=["cad_datasources"],
outputs=["cad_dataset"],
parallel_execution=True
)
def gather_fabwave_files(source: str) -> List[str]:
# Example 1: Basic retrieval with format filtering
retriever = CADFileRetriever(
storage_provider=LocalStorageProvider(directory_path=source),
formats=[".stp", ".step", ".iges", ".igs"],
#filter_pattern="*5*" # Only files with "5" in name
)
# Get files using the library's retriever
source_files = retriever.get_file_list()
# Shuffle to get random sample instead of first N files in order
import random
random.seed(42) # For reproducibility
shuffled_files = source_files.copy()
random.shuffle(shuffled_files)
return shuffled_files
[8]:
labels_description = {
0: {"name": "Bearings" , "description": " fabewave dataset sample "},
1: {"name": "Bolts" , "description": " fabewave dataset sample "},
2: {"name": "Brackets" , "description": " fabewave dataset sample "},
3: {"name": "Bushing" , "description": " fabewave dataset sample "},
4: {"name": "Bushing_Damping_Liners", "description": " fabewave dataset sample "},
5: {"name": "Collets" , "description": " fabewave dataset sample "},
6: {"name": "Gasket" , "description": " fabewave dataset sample "},
7: {"name": "Grommets" , "description": " fabewave dataset sample "},
8: {"name": "HeadlessScrews" , "description": " fabewave dataset sample "},
9: {"name": "Hex_Head_Screws" , "description": " fabewave dataset sample "},
10: {"name": "Keyway_Shaft" , "description": " fabewave dataset sample "},
11: {"name": "Machine_Key" , "description": " fabewave dataset sample "},
12: {"name": "Nuts" , "description": " fabewave dataset sample "},
13: {"name": "O_Rings" , "description": " fabewave dataset sample "},
14: {"name": "Thumb_Screws" , "description": " fabewave dataset sample "},
15: {"name": "Pipe_Fittings" , "description": " fabewave dataset sample "},
16: {"name": "Pipe_Joints" , "description": " fabewave dataset sample "},
17: {"name": "Pipes" , "description": " fabewave dataset sample "},
18: {"name": "Rollers" , "description": " fabewave dataset sample "},
19: {"name": "Rotary_Shaft" , "description": " fabewave dataset sample "},
20: {"name": "Shaft_Collar" , "description": " fabewave dataset sample "},
21: {"name": "Slotted_Flat_Head_Screws" , "description": " fabewave dataset sample "},
22: {"name": "Socket_Head_Screws" , "description": " fabewave dataset sample "},
23: {"name": "Washers" , "description": " fabewave dataset sample "},
24: {"name": "Boxes" , "description": " fabewave dataset sample "},
25: {"name": "Cotter_Pin" , "description": " fabewave dataset sample "},
26: {"name": "External Retaining Rings" , "description": " fabewave dataset sample "},
27: {"name": "Eyesbolts With Shoulders" , "description": " fabewave dataset sample "},
28: {"name": "Fixed Cap Flange" , "description": " fabewave dataset sample "},
29: {"name": "Gear Rod Stock" , "description": " fabewave dataset sample "},
30: {"name": "Gears" , "description": " fabewave dataset sample "},
31: {"name": "Holebolts With Shoulders" , "description": " fabewave dataset sample "},
32: {"name": "Idler Sprocket" , "description": " fabewave dataset sample "},
33: {"name": "Miter Gear Set Screw" , "description": " fabewave dataset sample "},
34: {"name": "Miter Gears" , "description": " fabewave dataset sample "},
35: {"name": "Rectangular Gear Rack" , "description": " fabewave dataset sample "},
36: {"name": "Routing EyeBolts Bent Closed Eye" , "description": " fabewave dataset sample "},
37: {"name": "Sleeve Washers" , "description": " fabewave dataset sample "},
38: {"name": "Socket-Connect Flanges" , "description": " fabewave dataset sample "},
39: {"name": "Sprocket Taper-Lock Bushing" , "description": " fabewave dataset sample "},
40: {"name": "Strut Channel Floor Mount" , "description": " fabewave dataset sample "},
41: {"name": "Strut Channel Side-Side" , "description": " fabewave dataset sample "},
42: {"name": "Tag Holder" , "description": " fabewave dataset sample "},
43: {"name": "Webbing Guide" , "description": " fabewave dataset sample "},
44: {"name": "Wide Grip External Retaining Ring" , "description": " fabewave dataset sample "},
}
# Invert the dictionary
description_to_code = {v["name"]: k for k, v in labels_description.items()}
Data Transformation : Encoded data to be used as ml input
[9]:
display_task_source(encode_data_for_ml_training, "encode_data_for_ml_training")
encode_data_for_ml_training
@flowtask.transform(
name="Preparing data for Exploring and ML training",
inputs=["cad_dataset"],
outputs=["cad_files_encoded"],
parallel_execution=True
)
def encode_data_for_ml_training(cad_file: str, cad_loader : HOOPSLoader, storage : DataStorage) -> str:
import numpy as np
import random
cad_model = cad_loader.create_from_file(cad_file)
storage.set_schema(cad_schema)
facecount, edgecount = my_workflow_for_fabewave.encode_cad_data(cad_file, cad_loader, storage)
# Add label data
folder_with_name = str(pathlib.Path(cad_file).parent.parent.stem)
label_code = description_to_code.get(folder_with_name, None)
# Validate label_code - skip if unknown category
if label_code is None:
raise ValueError(f"Unknown category '{folder_with_name}' for file {cad_file}. Category not found in labels_description.")
label_description = [{int(label_code) : labels_description[label_code]["name"]} ]
# Save label data in the schema-defined group for dataset analytics
storage.save_data("Labels/part_label", np.array([label_code]))
storage.save_metadata("part_label_description", folder_with_name)
# ALSO save label using the key expected by GraphClassification.convert_encoded_data_to_graph
# This is required for the DGL graph files to have the correct labels
storage.save_data(LabelStorage.GRAPH_CADENTITY, np.array([label_code]))
#my_workflow_for_fabewave.encode_label_data()
dgl_storage = DGLGraphStoreHandler()
# DGL graph Bin file
item_no_suffix = pathlib.Path(cad_file).with_suffix("") # Remove the suffix to get the base name
hash_id = generate_unique_id_from_path(str(item_no_suffix))
dgl_output_path = pathlib.Path(flows_outputdir).joinpath("flows", flow_name, "dgl", f"{hash_id}.ml")
dgl_output_path.parent.mkdir(parents=True, exist_ok=True)
my_workflow_for_fabewave.convert_encoded_data_to_graph(storage, dgl_storage, str(dgl_output_path))
# Save file-level metadata (will be routed to .infoset)
storage.save_metadata("Item", str(cad_file))
storage.save_metadata("source", "FABWAVE")
# Compress the storage into a .data file
storage.compress_store()
# Return the base storage path
return storage.get_file_path("")
Pipeline execution
[10]:
# Create and run the Data Flow
flow_name = get_flow_name()
cad_flow = hoops_ai.create_flow(
name=flow_name,
tasks=[gather_fabwave_files, encode_data_for_ml_training],
max_workers=50,
flows_outputdir=str(flows_outputdir),
ml_task="Part Classification",
auto_dataset_export=True, # Enable automatic dataset merging
export_visualization=True # Disable visualization export
)
# Run the flow to process all files
print("Starting flow execution with parallel processing...")
flow_output, output_dict, flow_file = cad_flow.process(inputs={'cad_datasources': datasources_dir})
# Display results
print("\n" + "="*70)
print("FLOW EXECUTION COMPLETED SUCCESSFULLY")
print("="*70)
print(f"\nDataset files created:")
print(f" Main dataset: {output_dict.get('flow_data', 'N/A')}")
print(f" Info dataset: {output_dict.get('flow_info', 'N/A')}")
print(f" Attributes: {output_dict.get('flow_attributes', 'N/A')}")
print(f" Flow file: {flow_file}")
print(f"\nTotal processing time: {output_dict.get('Duration [seconds]', {}).get('total', 0):.2f} seconds")
print(f"Files processed: {output_dict.get('file_count', 0)}")
Starting flow execution with parallel processing...
|INFO| FLOW | ######### Flow 'ETL_Fabwave_training' start #######
|WARNING| FLOW | Cleaning up existing flow directory: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training
|WARNING| FLOW | Removing all previous outputs for flow 'ETL_Fabwave_training' to avoid build conflicts.
|INFO| FLOW | Flow directory successfully cleaned and recreated: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training
|INFO| FLOW |
Flow Execution Summary
|INFO| FLOW | ==================================================
|INFO| FLOW | Task 1: gather fabwave files
|INFO| FLOW | Inputs : cad_datasources
|INFO| FLOW | Outputs: cad_dataset
|INFO| FLOW | Task 2: Preparing data for Exploring and ML training
|INFO| FLOW | Inputs : cad_dataset
|INFO| FLOW | Outputs: cad_files_encoded
|INFO| FLOW | Task 3: AutoDatasetExportTask
|INFO| FLOW | Inputs : cad_files_encoded
|INFO| FLOW | Outputs: encoded_dataset, encoded_dataset_info, encoded_dataset_attribs
|INFO| FLOW |
Task Dependencies:
|INFO| FLOW | gather fabwave files has no dependencies.
|INFO| FLOW | gather fabwave files --> Preparing data for Exploring and ML training
|INFO| FLOW | Preparing data for Exploring and ML training --> AutoDatasetExportTask
|INFO| FLOW | ==================================================
|INFO| FLOW | Executing ParallelTask 'gather fabwave files' with 1 items.
|INFO| FLOW | Executing ParallelTask 'Preparing data for Exploring and ML training' with 4572 items.
|WARNING| FLOW | Total number of items with errors: 26 (0.57%)
|WARNING| FLOW | Corrupted items are listed in 'C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\error_summary.json'.
|INFO| FLOW | Executing SequentialTask 'AutoDatasetExportTask'.
[DatasetMerger] Saved schema with 4 groups to metadata.json
|INFO| FLOW | Auto dataset export completed in 10976.49 seconds
Sequential Task end=====================
|INFO| FLOW | Time taken: 17058.31 seconds
|INFO| FLOW | ######### Flow 'ETL_Fabwave_training' end ######
======================================================================
FLOW EXECUTION COMPLETED SUCCESSFULLY
======================================================================
Dataset files created:
Main dataset: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ETL_Fabwave_training.dataset
Info dataset: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ETL_Fabwave_training.infoset
Attributes: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ETL_Fabwave_training.attribset
Flow file: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out/flows/ETL_Fabwave_training/ETL_Fabwave_training.flow
Total processing time: 17058.31 seconds
Files processed: 4572
Data Serving : Analytics
[11]:
# Explore the generated dataset
# flow_file = str(pathlib.Path("c:/Users/LuisSalazar/Documents/MAIN/MLProject/repo/ML-Initiative/notebooks/out/flows/ETL_Fabwave_training/ETL_Fabwave_training.flow"))
explorer = DatasetExplorer(flow_output_file=str(flow_file))
explorer.print_table_of_contents()
[DatasetExplorer] Default local cluster started: <Client: 'tcp://127.0.0.1:58622' processes=1 threads=16, memory=7.45 GiB>
Dataset Table of Contents
LABELS_GROUP:
FILE_ID_CODE_LABELS_DATA: Shape: (4546,), Dims: ('Labels_part_label_dim_0',), Size: 4546
PART_LABEL_DATA: Shape: (4546,), Dims: ('Labels_part_label_dim_0',), Size: 4546
EDGES_GROUP:
EDGE_CONVEXITIES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_DIHEDRAL_ANGLES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_INDICES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_LENGTHS_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_TYPES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_U_GRIDS_DATA: Shape: (337065, 10, 6), Dims: ('edge', 'u', 'component'), Size: 20223900
FILE_ID_CODE_EDGES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
FACES_GROUP:
FACE_AREAS_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FACE_DISCRETIZATION_DATA: Shape: (130923, 100, 7), Dims: ('face', 'sample', 'component'), Size: 91646100
FACE_INDICES_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FACE_LOOPS_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FACE_TYPES_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FILE_ID_CODE_FACES_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
GRAPH_GROUP:
EDGES_DESTINATION_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGES_SOURCE_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
FILE_ID_CODE_GRAPH_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
NUM_NODES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
==================================
Columns in file_info:
name id description stream_cache_png stream_cache_3d subset table_name
0 000757c971d4af379cff2bf219566f76 0 ...25a-d5c6-4a99-a7f1-efc5c122e442.stp ...00757c971d4af379cff2bf219566f76.png ...00757c971d4af379cff2bf219566f76.scs N/A file_info
1 000d2096f1b75775aee0cca3869062ac 1 ...376-4e32-4a62-8636-833a596c3a24.stp ...00d2096f1b75775aee0cca3869062ac.png ...00d2096f1b75775aee0cca3869062ac.scs N/A file_info
2 0018d103885051d4463bfbdf97642644 2 ...8a0-14fc-4bcc-94f5-7e206c7ab2eb.stp ...018d103885051d4463bfbdf97642644.png ...018d103885051d4463bfbdf97642644.scs N/A file_info
3 001ec9bc5e47900ff95873b20a8cf97e 3 ...s\STEP\sshettigarsleevewasher30.stp ...01ec9bc5e47900ff95873b20a8cf97e.png ...01ec9bc5e47900ff95873b20a8cf97e.scs N/A file_info
4 002b9953b2669f0dbf7e95a97d47e759 4 ...b39-9f03-490e-b962-55ec3761f456.stp ...02b9953b2669f0dbf7e95a97d47e759.png ...02b9953b2669f0dbf7e95a97d47e759.scs N/A file_info
5 0043ebf2efcc3849c5b61c45f4190369 5 ...aae-e8c6-40df-a29b-5770a2d5a73c.stp ...043ebf2efcc3849c5b61c45f4190369.png ...043ebf2efcc3849c5b61c45f4190369.scs N/A file_info
6 0045b381a0be8fbc8d49688a6479a076 6 ...ef6-87ef-45b6-8c9c-0414903668e3.stp ...045b381a0be8fbc8d49688a6479a076.png ...045b381a0be8fbc8d49688a6479a076.scs N/A file_info
7 005479bbb0a5df52f6d9cc2ad0f8253c 7 ...s\STEP\sshettigarsleevewasher44.stp ...05479bbb0a5df52f6d9cc2ad0f8253c.png ...05479bbb0a5df52f6d9cc2ad0f8253c.scs N/A file_info
8 0054a7cfbadf02dce98531f4dd7508a5 8 ...800-c9bd-4dd6-b425-30bf848fcd41.stp ...054a7cfbadf02dce98531f4dd7508a5.png ...054a7cfbadf02dce98531f4dd7508a5.scs N/A file_info
9 00a5e9cd26129c2757fe6a08407df0f9 9 ...dbe-97b8-46d4-b550-aa40dab589cd.stp ...0a5e9cd26129c2757fe6a08407df0f9.png ...0a5e9cd26129c2757fe6a08407df0f9.scs N/A file_info
... ... ... ... ... ... ... ...
4536 ff3d884bd5a5e96e491a4ea638d583e0 4536 ...5ae-1468-46f5-bce0-c959e3cbf870.stp ...f3d884bd5a5e96e491a4ea638d583e0.png ...f3d884bd5a5e96e491a4ea638d583e0.scs N/A file_info
4537 ff3fcef336d538c95a1f04ccf01339f1 4537 ...02-e51d-43d8-bd4b-171c3589485b.step ...f3fcef336d538c95a1f04ccf01339f1.png ...f3fcef336d538c95a1f04ccf01339f1.scs N/A file_info
4538 ff69cdbe147e5b2b231a48c668268d3b 4538 ...c3a-a37f-416c-8ddb-e0683a1a8926.stp ...f69cdbe147e5b2b231a48c668268d3b.png ...f69cdbe147e5b2b231a48c668268d3b.scs N/A file_info
4539 ff860352bbd8c7af1b596835829288c1 4539 ...543-8b94-4dee-a8e5-fdc362a80359.stp ...f860352bbd8c7af1b596835829288c1.png ...f860352bbd8c7af1b596835829288c1.scs N/A file_info
4540 ff94b5f50c009fcc19e231c8dc4fef18 4540 ...lasses\Brackets\STEP\bracket217.stp ...f94b5f50c009fcc19e231c8dc4fef18.png ...f94b5f50c009fcc19e231c8dc4fef18.scs N/A file_info
4541 ff96da3be8fa50b0c3516392e7c57770 4541 ...cdb-e812-42b6-a29f-5973b8b41042.stp ...f96da3be8fa50b0c3516392e7c57770.png ...f96da3be8fa50b0c3516392e7c57770.scs N/A file_info
4542 ffb399838ecd3d81f1e05edd13002887 4542 ...106-6aba-43ff-b412-4b97010388b2.stp ...fb399838ecd3d81f1e05edd13002887.png ...fb399838ecd3d81f1e05edd13002887.scs N/A file_info
4543 ffc34e21ca8c3b0aefc40057489b1db0 4543 ...30-ad62-4695-9e60-317db1586e81.step ...fc34e21ca8c3b0aefc40057489b1db0.png ...fc34e21ca8c3b0aefc40057489b1db0.scs N/A file_info
4544 ffd6cacefde868ff77c8057d55add378 4544 ...83d-2422-433d-88d1-0193c2c1471c.stp ...fd6cacefde868ff77c8057d55add378.png ...fd6cacefde868ff77c8057d55add378.scs N/A file_info
4545 ffd8d4beab2e24522adc38308def99ec 4545 ...200-0d37-4719-b72d-b58833bef79a.stp ...fd8d4beab2e24522adc38308def99ec.png ...fd8d4beab2e24522adc38308def99ec.scs N/A file_info
ML-Ready Dataset Preparation
The DatasetLoader provides tools for preparing the merged dataset for machine learning:
Key Capabilities:
Stratified Splitting: Create train/validation/test splits while preserving class distributions
Subset Tracking: Records file assignments in the dataset metadata
[12]:
print(explorer.available_groups())
{'graph', 'faces', 'edges', 'Labels'}
[13]:
print(explorer.available_arrays('Labels'))
{'part_label', 'file_id_code_Labels'}
[14]:
# Visualization libraries
import matplotlib.pyplot as plt
def print_distribution_info(dist, title="Distribution"):
"""Helper function to print and visualize distribution data."""
list_filecount = list()
for i, bin_files in enumerate(dist['file_id_codes_in_bins']):
list_filecount.append(bin_files.size)
dist['file_count'] =list_filecount
# Visualization with matplotlib
fig, ax = plt.subplots(figsize=(12, 4))
bin_centers = 0.5 * (dist['bin_edges'][1:] + dist['bin_edges'][:-1])
ax.bar(bin_centers, dist['file_count'], width=(dist['bin_edges'][1] - dist['bin_edges'][0]),
alpha=0.7, color='steelblue', edgecolor='black', linewidth=1)
# Add file count annotations
for i, count in enumerate(dist['file_count']):
if count > 0: # Only annotate non-empty bins
ax.text(bin_centers[i], count + 0.5, f"{count}",
ha='center', va='bottom', fontsize=8)
ax.set_xlabel('Value')
ax.set_ylabel('Count')
ax.set_title(f'{title} Histogram')
ax.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
[15]:
import time
start_time = time.time()
face_dist = explorer.create_distribution(key="part_label", bins=None, group="Labels")
print(f"Material distribution created in {(time.time() - start_time):.2f} seconds\n")
print_distribution_info(face_dist, title="Materials")
Material distribution created in 2.21 seconds
Dataset Visualization with DatasetViewer
The DatasetViewer is a powerful visualization tool that bridges dataset queries and visual analysis. It enables you to quickly visualize query results in two ways:
Image Grids: Generate collages of PNG previews for rapid visual scanning
Interactive 3D Views: Open inline 3D viewers for detailed model inspection
[16]:
# Import the DatasetViewer from the insights module
from hoops_ai.insights import DatasetViewer
# Create a DatasetViewer using the convenience method from_explorer
# This method queries the explorer and builds the file ID to visualization path mappings
dataset_viewer = DatasetViewer.from_explorer(explorer)
2025-11-01 01:47:54 | INFO | hoops_ai.insights.dataset_viewer | Built file mapping for 4546 files
[17]:
start_time = time.time()
# condition
material_is_frequent = lambda ds: ds['part_label'] == 23
filelist = explorer.get_file_list(group="Labels", where=material_is_frequent)
print(f"Filtering completed in {(time.time() - start_time):.2f} seconds")
print(len(filelist))
Filtering completed in 0.11 seconds
722
Example 1: Visualize Query Results as Image Grid
Now let’s use the query results we obtained earlier and visualize them as a grid of images. This is perfect for quickly scanning through many files to understand patterns or identify specific cases.
[ ]:
# Visualize the filtered files as a 5x5 grid with file IDs as labels
fig = dataset_viewer.show_preview_as_image(
filelist,
k=25, # Show up to 25 files
grid_cols=8, # 5 columns
label_format='id', # Show file IDs as labels
figsize=(15, 5) # Larger figure size
)
plt.show()
Machine Learning Training
[ ]:
# Load and split dataset for machine learning
from hoops_ai.dataset import DatasetLoader
flow_path = pathlib.Path(flow_file)
loader = DatasetLoader(
merged_store_path=str(flow_path.parent / f"{flow_path.stem}.dataset"),
parquet_file_path=str(flow_path.parent / f"{flow_path.stem}.infoset")
)
# Split dataset by machining category with explicit group parameter
train_size, val_size, test_size = loader.split(
key="part_label",
group="Labels", # Explicitly specify the group for clarity
train=0.6,
validation=0.2,
test=0.2,
random_state=42
)
print(f"Dataset split: Train={train_size}, Validation={val_size}, Test={test_size}")
# Access training dataset
train_dataset = loader.get_dataset("train")
print(f"Training dataset ready with {len(train_dataset)} samples")
[ ]:
from hoops_ai.ml.EXPERIMENTAL import FlowTrainer
flow_root_dir = nb_dir.joinpath("out","flows","ETL_Fabwave_training")
[ ]:
flow_trainer = FlowTrainer(
flowmodel = my_workflow_for_fabewave,
datasetLoader = loader,
experiment_name = "HOOPS_AI_train",
result_dir = flow_root_dir,
accelerator = 'cpu',
devices = 1,
max_epochs = 30,
batch_size = 64
)
[22]:
trained_model_path = flow_trainer.train()
print(f"Training finished. Model checkpoint saved in {trained_model_path}")
IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.
Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)
`Trainer.fit` stopped: `max_epochs=30` reached.
Training finished. Model checkpoint saved in C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ml_output\HOOPS_AI_train\1101\014808\best.ckpt
[23]:
## Testing phase
flow_trainer.test(trained_model_path)
print(f"Testing finished")
Restoring states from the checkpoint path at C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ml_output\HOOPS_AI_train\1101\014808\best.ckpt
-----------------------------------------------------------------------------------
GRAPH CLASSIFICATION MODEL - TESTING STEP
-----------------------------------------------------------------------------------
The trained model: C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ml_output\HOOPS_AI_train\1101\014808\best.ckpt
Test set contains 917 training samples
-----------------------------------------------------------------------------------
Loaded model weights from checkpoint at C:\Users\LuisSalazar\Documents\MAIN\MLProject\repo\HOOPS-AI-tutorials\notebooks\out\flows\ETL_Fabwave_training\ml_output\HOOPS_AI_train\1101\014808\best.ckpt
Number of classes: 45
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Test metric DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
IoU 0.9566790352504638
overall_accuracy 0.9921875
per_class_accuracy 0.9844567179679871
test_acc 0.9921875
test_loss 0.03970249742269516
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Testing finished
[24]:
explorer = DatasetExplorer(flow_output_file=str(flow_file))
explorer.print_table_of_contents()
[DatasetExplorer] Default local cluster started: <Client: 'tcp://127.0.0.1:60191' processes=1 threads=16, memory=7.45 GiB>
[DatasetExplorer] All resources closed.
Dataset Table of Contents
LABELS_GROUP:
FILE_ID_CODE_LABELS_DATA: Shape: (4546,), Dims: ('Labels_part_label_dim_0',), Size: 4546
PART_LABEL_DATA: Shape: (4546,), Dims: ('Labels_part_label_dim_0',), Size: 4546
EDGES_GROUP:
EDGE_CONVEXITIES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_DIHEDRAL_ANGLES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_INDICES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_LENGTHS_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_TYPES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGE_U_GRIDS_DATA: Shape: (337065, 10, 6), Dims: ('edge', 'u', 'component'), Size: 20223900
FILE_ID_CODE_EDGES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
FACES_GROUP:
FACE_AREAS_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FACE_DISCRETIZATION_DATA: Shape: (130923, 100, 7), Dims: ('face', 'sample', 'component'), Size: 91646100
FACE_INDICES_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FACE_LOOPS_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FACE_TYPES_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
FILE_ID_CODE_FACES_DATA: Shape: (130923,), Dims: ('face',), Size: 130923
GRAPH_GROUP:
EDGES_DESTINATION_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
EDGES_SOURCE_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
FILE_ID_CODE_GRAPH_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
NUM_NODES_DATA: Shape: (337065,), Dims: ('edge',), Size: 337065
==================================
Columns in file_info:
name id description stream_cache_png stream_cache_3d subset table_name
0 000757c971d4af379cff2bf219566f76 0 ...25a-d5c6-4a99-a7f1-efc5c122e442.stp ...00757c971d4af379cff2bf219566f76.png ...00757c971d4af379cff2bf219566f76.scs test file_info
1 000d2096f1b75775aee0cca3869062ac 1 ...376-4e32-4a62-8636-833a596c3a24.stp ...00d2096f1b75775aee0cca3869062ac.png ...00d2096f1b75775aee0cca3869062ac.scs test file_info
2 0018d103885051d4463bfbdf97642644 2 ...8a0-14fc-4bcc-94f5-7e206c7ab2eb.stp ...018d103885051d4463bfbdf97642644.png ...018d103885051d4463bfbdf97642644.scs test file_info
3 001ec9bc5e47900ff95873b20a8cf97e 3 ...s\STEP\sshettigarsleevewasher30.stp ...01ec9bc5e47900ff95873b20a8cf97e.png ...01ec9bc5e47900ff95873b20a8cf97e.scs test file_info
4 002b9953b2669f0dbf7e95a97d47e759 4 ...b39-9f03-490e-b962-55ec3761f456.stp ...02b9953b2669f0dbf7e95a97d47e759.png ...02b9953b2669f0dbf7e95a97d47e759.scs train file_info
5 0043ebf2efcc3849c5b61c45f4190369 5 ...aae-e8c6-40df-a29b-5770a2d5a73c.stp ...043ebf2efcc3849c5b61c45f4190369.png ...043ebf2efcc3849c5b61c45f4190369.scs train file_info
6 0045b381a0be8fbc8d49688a6479a076 6 ...ef6-87ef-45b6-8c9c-0414903668e3.stp ...045b381a0be8fbc8d49688a6479a076.png ...045b381a0be8fbc8d49688a6479a076.scs train file_info
7 005479bbb0a5df52f6d9cc2ad0f8253c 7 ...s\STEP\sshettigarsleevewasher44.stp ...05479bbb0a5df52f6d9cc2ad0f8253c.png ...05479bbb0a5df52f6d9cc2ad0f8253c.scs train file_info
8 0054a7cfbadf02dce98531f4dd7508a5 8 ...800-c9bd-4dd6-b425-30bf848fcd41.stp ...054a7cfbadf02dce98531f4dd7508a5.png ...054a7cfbadf02dce98531f4dd7508a5.scs train file_info
9 00a5e9cd26129c2757fe6a08407df0f9 9 ...dbe-97b8-46d4-b550-aa40dab589cd.stp ...0a5e9cd26129c2757fe6a08407df0f9.png ...0a5e9cd26129c2757fe6a08407df0f9.scs train file_info
... ... ... ... ... ... ... ...
4536 ff3d884bd5a5e96e491a4ea638d583e0 4536 ...5ae-1468-46f5-bce0-c959e3cbf870.stp ...f3d884bd5a5e96e491a4ea638d583e0.png ...f3d884bd5a5e96e491a4ea638d583e0.scs train file_info
4537 ff3fcef336d538c95a1f04ccf01339f1 4537 ...02-e51d-43d8-bd4b-171c3589485b.step ...f3fcef336d538c95a1f04ccf01339f1.png ...f3fcef336d538c95a1f04ccf01339f1.scs train file_info
4538 ff69cdbe147e5b2b231a48c668268d3b 4538 ...c3a-a37f-416c-8ddb-e0683a1a8926.stp ...f69cdbe147e5b2b231a48c668268d3b.png ...f69cdbe147e5b2b231a48c668268d3b.scs validation file_info
4539 ff860352bbd8c7af1b596835829288c1 4539 ...543-8b94-4dee-a8e5-fdc362a80359.stp ...f860352bbd8c7af1b596835829288c1.png ...f860352bbd8c7af1b596835829288c1.scs train file_info
4540 ff94b5f50c009fcc19e231c8dc4fef18 4540 ...lasses\Brackets\STEP\bracket217.stp ...f94b5f50c009fcc19e231c8dc4fef18.png ...f94b5f50c009fcc19e231c8dc4fef18.scs validation file_info
4541 ff96da3be8fa50b0c3516392e7c57770 4541 ...cdb-e812-42b6-a29f-5973b8b41042.stp ...f96da3be8fa50b0c3516392e7c57770.png ...f96da3be8fa50b0c3516392e7c57770.scs train file_info
4542 ffb399838ecd3d81f1e05edd13002887 4542 ...106-6aba-43ff-b412-4b97010388b2.stp ...fb399838ecd3d81f1e05edd13002887.png ...fb399838ecd3d81f1e05edd13002887.scs train file_info
4543 ffc34e21ca8c3b0aefc40057489b1db0 4543 ...30-ad62-4695-9e60-317db1586e81.step ...fc34e21ca8c3b0aefc40057489b1db0.png ...fc34e21ca8c3b0aefc40057489b1db0.scs test file_info
4544 ffd6cacefde868ff77c8057d55add378 4544 ...83d-2422-433d-88d1-0193c2c1471c.stp ...fd6cacefde868ff77c8057d55add378.png ...fd6cacefde868ff77c8057d55add378.scs validation file_info
4545 ffd8d4beab2e24522adc38308def99ec 4545 ...200-0d37-4719-b72d-b58833bef79a.stp ...fd8d4beab2e24522adc38308def99ec.png ...fd8d4beab2e24522adc38308def99ec.scs train file_info
[25]:
test_dataset = loader.get_dataset("test")
file_list = test_dataset.indices[:50]
print(file_list)
[ 0 1 2 3 13 20 21 22 25 40 46 49 52 57 59 63 67 68
74 78 82 86 99 105 108 110 111 117 124 126 128 133 136 142 146 147
155 158 173 178 194 201 204 210 226 229 233 237 245 248]
[26]:
# Visualize the filtered files as a 5x5 grid with file IDs as labels
fig = dataset_viewer.show_preview_as_image(
file_list,
k=len(file_list), # Show up to 25 files
grid_cols=8, # 5 columns
figsize=(15, 15) # Larger figure size
)
plt.show()