Skip to content

Commit 2751991

Browse files
committed
skipping concepts that trigger pellet bug + run all evals script added
1 parent 280b073 commit 2751991

2 files changed

Lines changed: 336 additions & 10 deletions

File tree

ddp_reasoning_eval.py

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ def concept_reducer_properties(concepts, properties, cls, cardinality=None):
199199

200200

201201
def execute(args):
202+
# Suppress noisy Ray worker error traces for exceptions we catch in the driver
203+
# (e.g., Openllet ArrayIndexOutOfBoundsException propagated via RayTaskError)
204+
os.environ.setdefault("RAY_IGNORE_UNHANDLED_ERRORS", "1")
205+
202206
# (1) Initialize Ray
203207
if not ray.is_initialized():
204208
if args.auto_ray:
@@ -406,26 +410,27 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
406410
random.shuffle(concepts)
407411

408412
data = []
413+
skipped_expressions = [] # CEs skipped due to reasoner bugs
409414

410415
# Iterate over OWL Class Expressions
411416
for expression in (tqdm_bar := tqdm(concepts, position=0, leave=True)):
412417
try:
413418
dl_str = owl_expression_to_dl(expression)
414-
419+
415420
if args.verbose:
416421
print(f"\n>>> [{type(expression).__name__}] {dl_str}", flush=True)
417422
print(f" GT ...", end="", flush=True)
418-
423+
419424
# Retrieve ground truth results
420425
retrieval_y, runtime_y = concept_retrieval(symbolic_kb, expression)
421-
426+
422427
if args.verbose:
423428
print(f" {len(retrieval_y)} instances in {runtime_y:.3f}s", flush=True)
424429
print(f" DIST...", end="", flush=True)
425-
430+
426431
# Retrieve distributed reasoner results
427432
retrieval_distributed_y, runtime_distributed_y = concept_retrieval(distributed_reasoner, expression)
428-
433+
429434
if args.verbose:
430435
print(f" {len(retrieval_distributed_y)} instances in {runtime_distributed_y:.3f}s", flush=True)
431436

@@ -452,7 +457,18 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
452457
)
453458

454459
except Exception as e:
455-
print(f"\nError processing expression {dl_str}: {e}")
460+
err_str = str(e)
461+
# Detect Openllet ArrayIndexOutOfBoundsException bug
462+
# (known issue: https://github.com/Galigator/openllet/issues/57)
463+
if "ArrayIndexOutOfBoundsException" in err_str:
464+
skipped_expressions.append({
465+
"Expression": dl_str,
466+
"Type": type(expression).__name__,
467+
"Error": err_str.strip(),
468+
})
469+
tqdm_bar.write(f"[SKIPPED] Openllet bug (ArrayIndexOutOfBounds) for: {dl_str}")
470+
else:
471+
print(f"\nError processing expression {dl_str}: {e}")
456472
continue
457473

458474
# Build dataframe from collected results and write CSV once (avoids header/append race)
@@ -499,12 +515,17 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
499515
latex_table_str.append(r"\begin{table}[htbp]")
500516
latex_table_str.append(r"\centering")
501517
latex_table_str.append(r"\small")
502-
latex_table_str.append(r"\begin{tabular}{l r r r r}")
518+
latex_table_str.append(r"\begin{tabular}{l r r r r r r}")
503519
latex_table_str.append(r"\toprule")
504-
latex_table_str.append(r"\textbf{Type} & \textbf{Count} & \textbf{Jaccard} & \textbf{F1} & \textbf{RT Benefits} \\")
520+
latex_table_str.append(r"\textbf{Type} & \textbf{Count} & \textbf{Jaccard} & \textbf{F1} & \textbf{RT GT (s)} & \textbf{RT Dist (s)} & \textbf{Speedup} \\")
505521
latex_table_str.append(r"\midrule")
506522
for idx, row in latex_df.iterrows():
507-
latex_table_str.append(f"{idx:<23} & {int(row['Count']):<3} & {row['Jaccard']:.4f} & {row['F1']:.4f} & {row['RT Benefits']:.4f} \\\\")
523+
rt_gt = row['Runtime Ground Truth']
524+
rt_dist = row['Runtime Distributed']
525+
speedup = rt_gt / max(rt_dist, 1e-6)
526+
latex_table_str.append(
527+
f"{idx:<23} & {int(row['Count']):<3} & {row['Jaccard']:.4f} & {row['F1']:.4f} & {rt_gt:.4f} & {rt_dist:.4f} & {speedup:.2f}$\\times$ \\\\"
528+
)
508529
latex_table_str.append(r"\bottomrule")
509530
latex_table_str.append(r"\end{tabular}")
510531
latex_table_str.append(r"\caption{Comparison of OWL Runtime Metrics}")
@@ -519,13 +540,31 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
519540
f.write(latex_output)
520541
print(f"\nLaTeX table saved to {latex_filename}")
521542

543+
# Report skipped expressions due to reasoner bugs
544+
if skipped_expressions:
545+
print("\n" + "=" * 70)
546+
print(f"SKIPPED EXPRESSIONS (Openllet bug): {len(skipped_expressions)}")
547+
print("=" * 70)
548+
skipped_df = pd.DataFrame(skipped_expressions)
549+
print(f"\nBy Type:")
550+
print(skipped_df["Type"].value_counts().to_string())
551+
print(f"\nAll skipped CEs:")
552+
for entry in skipped_expressions:
553+
print(f" [{entry['Type']}] {entry['Expression']}")
554+
# Save skipped expressions alongside the main report
555+
skipped_path = args.path_report.replace(".csv", "_skipped.csv")
556+
skipped_df.to_csv(skipped_path, index=False)
557+
print(f"\nSkipped expressions saved to {skipped_path}")
558+
else:
559+
print("\nNo expressions were skipped due to reasoner bugs.")
560+
522561
# Assert correctness threshold
523562
mean_jaccard = df["Jaccard Similarity"].mean()
524563
if mean_jaccard >= args.min_jaccard_similarity:
525564
print(f"\n✓ Correctness check PASSED: Mean Jaccard ({mean_jaccard:.4f}) >= threshold ({args.min_jaccard_similarity})")
526565
else:
527566
print(f"\n✗ Correctness check FAILED: Mean Jaccard ({mean_jaccard:.4f}) < threshold ({args.min_jaccard_similarity})")
528-
567+
529568
return mean_jaccard, df["F1"].mean()
530569

531570

0 commit comments

Comments
 (0)