
Gather files that satisfy a given condition
[41]:
start_time = time.time()
# condition
label_is_pipefittings = lambda ds: ds['part_label'] == 15
filelist = explorer.get_file_list(group="Labels", where=label_is_pipefittings)
print(f"Filtering completed in {(time.time() - start_time):.2f} seconds")
print(f"Found {len(filelist)} files with file_labels == 15 (Pipe Fittings)\n")
print(filelist)
Filtering completed in 0.06 seconds
Found 107 files with file_labels == 15 (Pipe Fittings)
[ 8 32 114 193 254 267 344 347 352 367 402 405 411 461
492 544 561 571 572 673 675 710 794 799 856 862 879 890
946 961 978 1017 1031 1051 1053 1085 1091 1146 1205 1223 1320 1404
1449 1479 1497 1508 1560 1565 1595 1658 1712 1722 1874 1952 1965 2012
2036 2040 2042 2172 2203 2209 2273 2344 2380 2460 2462 2466 2543 2875
3012 3016 3032 3040 3098 3129 3157 3194 3221 3230 3252 3332 3379 3387
3472 3636 3661 3677 3725 3745 3769 3810 3872 3890 3962 3991 4046 4078
4111 4112 4191 4263 4276 4303 4308 4334 4473]
Query data for single file
[42]:
def demo_query_single_file(explorer, file_id):
"""Show how to access and query dataset details for a single file."""
print("=== Single File Dataset Access ===")
import time
# Get and print parquet info
df_info = explorer.get_parquet_info_by_code(file_id)
print("File info:")
for column in df_info.columns:
print(f"Column: {column}")
for value in df_info[column]:
print(f" {value}")
print()
# Access various dataset groups
groups = ["faces", "Labels", "edges", "graph"]
datasets = {grp: explorer.file_dataset(file_id_code=file_id, group=grp) for grp in groups}
print(f"Datasets for file ID '{file_id}':")
for grp, ds in datasets.items():
for name, da in ds.data_vars.items():
print(f" [{grp}] VARIABLE: {name}, Shape: {da.shape}, Dims: {da.dims}, Size: {da.size}")
print()
# Query UV-grid data for a specific face
start_time = time.time()
face_da = datasets["faces"]["face_areas"]
face_index = min(2, face_da.sizes["face"] - 1)
uv_grid_data = face_da.isel(face=face_index)
print(f"face grid data for face index {face_index}:")
np_uvgrid = uv_grid_data.data.compute()
print(f"Query took {(time.time() - start_time):.2f} seconds\n")
# print(np_uvgrid)
[43]:
demo_query_single_file(explorer,file_id=4500)
=== Single File Dataset Access ===
File info:
Column: name
fc2c360dbd9f8ab6968702bc468647ac_0
Column: id
4500
Column: description
/home/maxime.marechal/Projects/HAI-repo/ML-Initiative/test_packages/cadfiles/fabwave/CAD_1_15_Classes/O_Rings/STEP/4be34a5d-20fd-47b9-9ee9-02fa697ceb83.stp
Column: subset
N/A
Column: table_name
file_info
Datasets for file ID '4500':
[faces] VARIABLE: face_areas, Shape: (1,), Dims: ('face',), Size: 1
[faces] VARIABLE: face_centroids, Shape: (1, 3), Dims: ('face', 'dim'), Size: 3
[faces] VARIABLE: face_discretization, Shape: (1, 100, 7), Dims: ('face', 'sample', 'component'), Size: 700
[faces] VARIABLE: face_indices, Shape: (1,), Dims: ('face',), Size: 1
[faces] VARIABLE: face_loops, Shape: (1,), Dims: ('face',), Size: 1
[faces] VARIABLE: face_types, Shape: (1,), Dims: ('face',), Size: 1
[faces] VARIABLE: file_id_code_faces, Shape: (1,), Dims: ('face',), Size: 1
[Labels] VARIABLE: file_id_code_Labels, Shape: (1,), Dims: ('part',), Size: 1
[Labels] VARIABLE: part_label, Shape: (1,), Dims: ('part',), Size: 1
[edges] VARIABLE: edge_convexities, Shape: (1,), Dims: ('edge',), Size: 1
[edges] VARIABLE: edge_dihedral_angles, Shape: (1,), Dims: ('edge',), Size: 1
[edges] VARIABLE: edge_indices, Shape: (1,), Dims: ('edge',), Size: 1
[edges] VARIABLE: edge_lengths, Shape: (1,), Dims: ('edge',), Size: 1
[edges] VARIABLE: edge_types, Shape: (1,), Dims: ('edge',), Size: 1
[edges] VARIABLE: edge_u_grids, Shape: (1, 10, 6), Dims: ('edge', 'u', 'component'), Size: 60
[edges] VARIABLE: file_id_code_edges, Shape: (1,), Dims: ('edge',), Size: 1
[graph] VARIABLE: edges_destination, Shape: (1,), Dims: ('edge',), Size: 1
[graph] VARIABLE: edges_source, Shape: (1,), Dims: ('edge',), Size: 1
[graph] VARIABLE: file_id_code_graph, Shape: (1,), Dims: ('edge',), Size: 1
[graph] VARIABLE: num_nodes, Shape: (1,), Dims: ('edge',), Size: 1
face grid data for face index 0:
Query took 0.03 seconds