diff --git a/experimental/algorithm/LAGraph_CFL_reachability.c b/experimental/algorithm/LAGraph_CFL_reachability.c new file mode 100644 index 0000000000..868ad810c7 --- /dev/null +++ b/experimental/algorithm/LAGraph_CFL_reachability.c @@ -0,0 +1,365 @@ +//------------------------------------------------------------------------------ +// LAGraph_CFL_reachability.c: Context-Free Language Reachability Matrix-Based Algorithm +//------------------------------------------------------------------------------ +// +// LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause + +// Contributed by Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State University. + +//------------------------------------------------------------------------------ + +// Code is based on the "A matrix-based CFPQ algorithm" described in the following paper: +// * Rustam Azimov, Semyon Grigorev, "Context-Free Path Querying Using Linear Algebra" +// * URL: https://disser.spbu.ru/files/2022/disser_azimov.pdf + +#define LG_FREE_WORK \ + { \ + free(nnzs); \ + GrB_free(&true_scalar); \ + GrB_free(&identity_matrix); \ + free(T); \ + free(indexes); \ + } + +#define LG_FREE_ALL \ + { \ + for (size_t i = 0; i < nonterms_count; i++) { \ + GrB_free(&T[i]); \ + } \ + \ + LG_FREE_WORK; \ + } + +#include "LG_internal.h" +#include + +#define ERROR_RULE(msg) \ + { \ + LG_ASSERT_MSGF(false, GrB_INVALID_VALUE, "Rule with index %ld is invalid. " msg, \ + i); \ + } + +#define ADD_TO_MSG(...) \ + { \ + if (msg_len == 0) { \ + msg_len += \ + snprintf(msg, LAGRAPH_MSG_LEN, \ + "LAGraph failure (file %s, line %d): ", __FILE__, __LINE__); \ + } \ + if (msg_len < LAGRAPH_MSG_LEN) { \ + msg_len += snprintf(msg + msg_len, LAGRAPH_MSG_LEN - msg_len, __VA_ARGS__); \ + } \ + } + +#define ADD_INDEX_TO_ERROR_RULE(rule, i) \ + { \ + rule.len_indexes_str += snprintf(rule.indexes_str + rule.len_indexes_str, \ + LAGRAPH_MSG_LEN - rule.len_indexes_str, \ + rule.count == 0 ? "%ld" : ", %ld", i); \ + rule.count++; \ + } + + + +// LAGraph_CFL_reachability: Context-Free Language Reachability Matrix-Based Algorithm +// +// This function determines the set of vertex pairs (u, v) in a graph (represented by +// adjacency matrices) such that there is a path from u to v, where the edge labels form a +// word from the language generated by the context-free grammar (represented by `rules`). +// +// Terminals and non-terminals are enumerated by integers starting from zero. +// The start non-terminal is the non-terminal with index 0. +// +// Example: +// +// Graph: +// ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ +// │ 0 ├───► 1 ├───► 2 ├───► 3 ├───► 4 │ +// └───┘ a └─┬─┘ a └─▲─┘ b └───┘ b └───┘ +// │ │ +// │ ┌───┐ │ +// a└─► 5 ├─┘b +// └───┘ +// +// Grammar: S -> aSb | ab +// +// There are paths from node [1] to node [3] and from node [1] to node [2] that form the +// word "ab" ([1]-a->[2]-b->[3] and [1]-a->[5]-b->[2]). The word "ab" is in the language +// generated by our context-free grammar, so the pairs (1, 3) and (1, 2) will be included +// in the result. +// +// Note: It doesn't matter how many paths exist from node [A] to node [B] that form a word +// in the language. If at least one path exists, the pair ([A], [B]) will be included in +// the result. +// +// In contrast, the path from node [1] to node [4] forms the word "abb" +// ([1]-a->[2]-b->[3]-b->[4]) and the word "abbb" ([1]-a->[5]-b->[2]-b->[3]-b->[4]). +// The words "aab" and "abbb" are not in the language, so the pair (1, 4) will not be +// included in the result. +// +// With this graph and grammar, we obtain the following results: +// (0, 4) - because there exists a path (0-1-2-3-4) that forms the word "aabb" +// (1, 3) - because there exists a path (1-2-3) that forms "ab" +// (1, 2) - because there exists a path (1-5-2) that forms the word "ab" +// (0, 3) - because there exists a path (0-1-5-2-3) that forms the word "aabb" +GrB_Info LAGraph_CFL_reachability +( + // Output + GrB_Matrix *outputs, // Array of matrices containing results. + // The size of the array must be equal to nonterms_count. + // + // outputs[k]: (i, j) = true if and only if there is a path + // from node i to node j whose edge labels form a word + // derivable from the non-terminal 'k' of the specified CFG. + // Input + const GrB_Matrix *adj_matrices, // Array of adjacency matrices representing the graph. + // The length of this array is equal to the count of + // terminals (terms_count). + // + // adj_matrices[t]: (i, j) == 1 if and only if there + // is an edge between nodes i and j with the label of + // the terminal corresponding to index 't' (where t is + // in the range [0, terms_count - 1]). + int32_t terms_count, // The total number of terminal symbols in the CFG. + int32_t nonterms_count, // The total number of non-terminal symbols in the CFG. + const LAGraph_rule_WCNF *rules, // The rules of the CFG. + size_t rules_count, // The total number of rules in the CFG. + char *msg // Message string for error reporting. +) +{ + // Declare workspace and clear the msg string, if not NULL + GrB_Matrix *T; + bool t_empty_flags[nonterms_count]; // t_empty_flags[i] == true <=> T[i] is empty + GrB_Matrix identity_matrix = NULL; + uint64_t *nnzs = NULL; + LG_CLEAR_MSG; + size_t msg_len = 0; // For error formatting + bool iso_flag = false; + GrB_Index *indexes = NULL; + + + GrB_Scalar true_scalar; + GrB_Scalar_new(&true_scalar, GrB_BOOL); + GrB_Scalar_setElement_BOOL(true_scalar, true); + + LG_TRY(LAGraph_Calloc((void **) &T, nonterms_count, sizeof(GrB_Matrix), msg)); + + LG_ASSERT_MSG(terms_count > 0, GrB_INVALID_VALUE, + "The number of terminals must be greater than zero."); + LG_ASSERT_MSG(nonterms_count > 0, GrB_INVALID_VALUE, + "The number of non-terminals must be greater than zero."); + LG_ASSERT_MSG(rules_count > 0, GrB_INVALID_VALUE, + "The number of rules must be greater than zero."); + LG_ASSERT_MSG(outputs != NULL, GrB_NULL_POINTER, "The outputs array cannot be null."); + LG_ASSERT_MSG(rules != NULL, GrB_NULL_POINTER, "The rules array cannot be null."); + LG_ASSERT_MSG(adj_matrices != NULL, GrB_NULL_POINTER, + "The adjacency matrices array cannot be null."); + + // Find null adjacency matrices + bool found_null = false; + for (int32_t i = 0; i < terms_count; i++) { + if (adj_matrices[i] != NULL) + continue; + + if (!found_null) { + ADD_TO_MSG("Adjacency matrices with these indexes are null: "); + ADD_TO_MSG("%d", i); + } else { + ADD_TO_MSG(", %d", i); + } + + found_null = true; + } + + if (found_null) { + LG_FREE_ALL; + return GrB_NULL_POINTER; + } + + GrB_Index n; + GRB_TRY(GrB_Matrix_ncols(&n, adj_matrices[0])); + + // Create nonterms matrices + for (int32_t i = 0; i < nonterms_count; i++) { + GRB_TRY(GrB_Matrix_new(&T[i], GrB_BOOL, n, n)); + t_empty_flags[i] = true; + } + + // Arrays for processing rules + size_t eps_rules[rules_count], eps_rules_count = 0; // [Variable -> eps] + size_t term_rules[rules_count], term_rules_count = 0; // [Variable -> term] + size_t bin_rules[rules_count], bin_rules_count = 0; // [Variable -> AB] + + // Process rules + typedef struct { + size_t count; + size_t len_indexes_str; + char indexes_str[LAGRAPH_MSG_LEN]; + } rule_error_s; + rule_error_s term_err = {0}; + rule_error_s nonterm_err = {0}; + rule_error_s invalid_err = {0}; + for (size_t i = 0; i < rules_count; i++) { + LAGraph_rule_WCNF rule = rules[i]; + + bool is_rule_eps = rule.prod_A == -1 && rule.prod_B == -1; + bool is_rule_term = rule.prod_A != -1 && rule.prod_B == -1; + bool is_rule_bin = rule.prod_A != -1 && rule.prod_B != -1; + + // Check that all rules are well-formed + if (rule.nonterm < 0 || rule.nonterm >= nonterms_count) { + ADD_INDEX_TO_ERROR_RULE(nonterm_err, i); + } + + // [Variable -> eps] + if (is_rule_eps) { + eps_rules[eps_rules_count++] = i; + + continue; + } + + // [Variable -> term] + if (is_rule_term) { + term_rules[term_rules_count++] = i; + + if (rule.prod_A < -1 || rule.prod_A >= terms_count) { + ADD_INDEX_TO_ERROR_RULE(term_err, i); + } + + continue; + } + + // [Variable -> A B] + if (is_rule_bin) { + bin_rules[bin_rules_count++] = i; + + if (rule.prod_A < -1 || rule.prod_A >= nonterms_count || rule.prod_B < -1 || + rule.prod_B >= nonterms_count) { + ADD_INDEX_TO_ERROR_RULE(nonterm_err, i); + } + + continue; + } + + // [Variable -> _ B] + ADD_INDEX_TO_ERROR_RULE(invalid_err, i); + } + + if (term_err.count + nonterm_err.count + invalid_err.count > 0) { + ADD_TO_MSG("Count of invalid rules: %ld.\n", + term_err.count + nonterm_err.count + invalid_err.count); + + if (nonterm_err.count > 0) { + ADD_TO_MSG("Non-terminals must be in range [0, nonterms_count). "); + ADD_TO_MSG("Indexes of invalid rules: %s\n", nonterm_err.indexes_str) + } + if (term_err.count > 0) { + ADD_TO_MSG("Terminals must be in range [-1, nonterms_count). "); + ADD_TO_MSG("Indexes of invalid rules: %s\n", term_err.indexes_str) + } + if (invalid_err.count > 0) { + ADD_TO_MSG("[Variable -> _ B] type of rule is not acceptable. "); + ADD_TO_MSG("Indexes of invalid rules: %s\n", invalid_err.indexes_str) + } + + LG_FREE_ALL; + return GrB_INVALID_VALUE; + } + + // Rule [Variable -> term] + for (size_t i = 0; i < term_rules_count; i++) { + LAGraph_rule_WCNF term_rule = rules[term_rules[i]]; + GrB_Index adj_matrix_nnz = 0; + GRB_TRY(GrB_Matrix_nvals(&adj_matrix_nnz, adj_matrices[term_rule.prod_A])); + + if (adj_matrix_nnz == 0) { + continue; + } + + GxB_eWiseUnion( + T[term_rule.nonterm], GrB_NULL, GrB_NULL, GxB_PAIR_BOOL, + T[term_rule.nonterm], true_scalar, adj_matrices[term_rule.prod_A], true_scalar, GrB_NULL + ); + + t_empty_flags[term_rule.nonterm] = false; + + #ifdef DEBUG_CFL_REACHBILITY + GxB_Matrix_iso(&iso_flag, T[term_rule.nonterm]); + printf("[TERM] eWiseUnion: NONTERM: %d (ISO: %d)\n", term_rule.nonterm, iso_flag); + #endif + } + + GrB_Vector v_diag; + GRB_TRY(GrB_Vector_new(&v_diag, GrB_BOOL, n)); + GRB_TRY(GrB_Vector_assign_BOOL(v_diag, GrB_NULL, GrB_NULL, true, GrB_ALL, n, NULL)); + GRB_TRY(GrB_Matrix_diag(&identity_matrix, v_diag, 0)); + GRB_TRY(GrB_free(&v_diag)); + + // Rule [Variable -> eps] + for (size_t i = 0; i < eps_rules_count; i++) { + LAGraph_rule_WCNF eps_rule = rules[eps_rules[i]]; + + GxB_eWiseUnion ( + T[eps_rule.nonterm],GrB_NULL,GxB_PAIR_BOOL,GxB_PAIR_BOOL, + T[eps_rule.nonterm],true_scalar,identity_matrix,true_scalar,GrB_NULL + ); + + t_empty_flags[eps_rule.nonterm] = false; + + #ifdef DEBUG_CFL_REACHBILITY + GxB_Matrix_iso(&iso_flag, T[eps_rule.nonterm]); + printf("[EPS] eWiseUnion: NONTERM: %d (ISO: %d)\n", + eps_rule.nonterm, iso_flag); + #endif + } + + // Rule [Variable -> Variable1 Variable2] + LG_TRY(LAGraph_Calloc((void **) &nnzs, nonterms_count, sizeof(uint64_t), msg)); + bool changed = true; + while (changed) { + changed = false; + for (size_t i = 0; i < bin_rules_count; i++) { + LAGraph_rule_WCNF bin_rule = rules[bin_rules[i]]; + + // If one of matrices is empty then their product will be empty + if (t_empty_flags[bin_rule.prod_A] || t_empty_flags[bin_rule.prod_B]) { + continue; + } + + GrB_BinaryOp acc_op = t_empty_flags[bin_rule.nonterm] ? GrB_NULL : GxB_ANY_BOOL; + GRB_TRY(GrB_mxm(T[bin_rule.nonterm], GrB_NULL, acc_op, + GxB_ANY_PAIR_BOOL, T[bin_rule.prod_A], T[bin_rule.prod_B], + GrB_NULL)) + + GrB_Index new_nnz; + GRB_TRY(GrB_Matrix_nvals(&new_nnz, T[bin_rule.nonterm])); + if (new_nnz != 0) t_empty_flags[bin_rule.nonterm] = false; + + changed = changed || (nnzs[bin_rule.nonterm] != new_nnz); + nnzs[bin_rule.nonterm] = new_nnz; + + #ifdef DEBUG_CFL_REACHBILITY + GxB_Matrix_iso(&iso_flag, T[bin_rule.nonterm]); + printf("[TERM1 TERM2] MULTIPLY, S: %d, A: %d, B: %d, " + "I: %ld (ISO: %d)\n", + bin_rule.nonterm, bin_rule.prod_A, bin_rule.prod_B, i, iso_flag); + #endif + + } + } + + #ifdef DEBUG_CFL_REACHBILITY + for (int32_t i = 0; i < nonterms_count; i++) { + printf("MATRIX WITH INDEX %d:\n", i); + GxB_print(T[i], GxB_SUMMARY); + } + #endif + + for (int32_t i = 0; i < nonterms_count; i++) { + outputs[i] = T[i]; + } + + LG_FREE_WORK; + return GrB_SUCCESS; +} diff --git a/experimental/test/test_CFL_reachability.c b/experimental/test/test_CFL_reachability.c new file mode 100644 index 0000000000..084daf7f8a --- /dev/null +++ b/experimental/test/test_CFL_reachability.c @@ -0,0 +1,583 @@ +//------------------------------------------------------------------------------ +// LAGraph/experimental/test/LAGraph_CFL_reachability.c: test cases for Context-Free +// Language Reachability Matrix-Based Algorithm +//------------------------------------------------------------------------------ +// +// LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause + +// Contributed by Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State University. + +//------------------------------------------------------------------------------ + +#include +#include +#include +#include +#include +#include + +#define run_algorithm() \ + LAGraph_CFL_reachability(outputs, adj_matrices, grammar.terms_count, \ + grammar.nonterms_count, grammar.rules, grammar.rules_count, \ + msg) + +#define check_error(error) \ + { \ + retval = run_algorithm(); \ + TEST_CHECK(retval == error); \ + TEST_MSG("retval = %d (%s)", retval, msg); \ + } + +#define check_result(result) \ + { \ + char *expected = output_to_str(0); \ + TEST_CHECK(strcmp(result, expected) == 0); \ + TEST_MSG("Wrong result. Actual: %s", expected); \ + } + +typedef struct { + size_t nonterms_count; + size_t terms_count; + size_t rules_count; + LAGraph_rule_WCNF *rules; +} grammar_t; + +GrB_Matrix *adj_matrices = NULL; +GrB_Matrix *outputs = NULL; +grammar_t grammar = {0, 0, 0, NULL}; +char msg[LAGRAPH_MSG_LEN]; + +void setup() { LAGraph_Init(msg); } + +void teardown(void) { LAGraph_Finalize(msg); } + +void init_outputs() { outputs = calloc(grammar.nonterms_count, sizeof(GrB_Matrix)); } + +char *output_to_str(size_t nonterm) { + GrB_Index nnz = 0; + OK(GrB_Matrix_nvals(&nnz, outputs[nonterm])); + GrB_Index *row = malloc(nnz * sizeof(GrB_Index)); + GrB_Index *col = malloc(nnz * sizeof(GrB_Index)); + bool *val = malloc(nnz * sizeof(GrB_Index)); + OK(GrB_Matrix_extractTuples(row, col, val, &nnz, outputs[nonterm])); + + // 11 - size of " (%ld, %ld)" + char *result_str = malloc(11 * nnz * sizeof(char)); + result_str[0] = '\0'; + for (size_t i = 0; i < nnz; i++) { + sprintf(result_str + strlen(result_str), i == 0 ? "(%ld, %ld)" : " (%ld, %ld)", + row[i], col[i]); + } + + free(row); + free(col); + free(val); + + return result_str; +} + +void free_workspace() { + + for (size_t i = 0; i < grammar.terms_count; i++) { + if (adj_matrices == NULL) + break; + + if (adj_matrices[i] == NULL) + continue; + + GrB_free(&adj_matrices[i]); + } + free(adj_matrices); + adj_matrices = NULL; + + for (size_t i = 0; i < grammar.nonterms_count; i++) { + if (outputs == NULL) + break; + + if (outputs[i] == NULL) + continue; + + GrB_free(&outputs[i]); + } + free(outputs); + outputs = NULL; + + free(grammar.rules); + grammar = (grammar_t){0, 0, 0, NULL}; +} + +//==================== +// Grammars +//==================== + +// S -> aSb | ab in WCNF +// +// Terms: [0 a] [1 b] +// Nonterms: [0 S] [1 A] [2 B] [3 C] +// S -> AB [0 1 2 0] +// S -> AC [0 1 3 0] +// C -> SB [3 0 2 0] +// A -> a [1 0 -1 0] +// B -> b [2 1 -1 0] +void init_grammar_aSb() { + LAGraph_rule_WCNF *rules = calloc(5, sizeof(LAGraph_rule_WCNF)); + rules[0] = (LAGraph_rule_WCNF){0, 1, 2, 0}; + rules[1] = (LAGraph_rule_WCNF){0, 1, 3, 0}; + rules[2] = (LAGraph_rule_WCNF){3, 0, 2, 0}; + rules[3] = (LAGraph_rule_WCNF){1, 0, -1, 0}; + rules[4] = (LAGraph_rule_WCNF){2, 1, -1, 0}; + + grammar = (grammar_t){ + .nonterms_count = 4, .terms_count = 2, .rules_count = 5, .rules = rules}; +} + +// S -> aS | a in WCNF +// +// Terms: [0 a] +// Nonterms: [0 S] +// S -> SS [0 0 0 0] +// S -> a [0 0 -1 0] +void init_grammar_aS() { + LAGraph_rule_WCNF *rules = calloc(2, sizeof(LAGraph_rule_WCNF)); + rules[0] = (LAGraph_rule_WCNF){0, 0, 0, 0}; + rules[1] = (LAGraph_rule_WCNF){0, 0, -1, 0}; + + grammar = (grammar_t){ + .nonterms_count = 1, .terms_count = 1, .rules_count = 2, .rules = rules}; +} + +// Complex grammar +// aaaabbbb or aaabbb +// +// Terms: [0 a] [1 b] +// Nonterms: [0 S] [n Sn] +// S -> S1 S2 [0 1 2 0] +// S -> S15 S16 [0 15 16 0] +// S1 -> S3 S4 [1 3 4 0] +// S2 -> S5 S6 [2 5 6 0] +// S3 -> S7 S8 [3 7 8 0] +// S4 -> S9 S10 [4 9 10 0] +// S5 -> S11 S12 [5 11 12 0] +// S6 -> S13 S14 [6 13 14 0] +// S16 -> S17 S18 [16 17 18 0] +// S17 -> S19 S20 [17 19 20 0] +// S18 -> S21 S22 [18 21 22 0] +// S22 -> S23 S24 [22 23 24 0] +// S7 -> a [7 0 -1 0] +// S8 -> a [8 0 -1 0] +// S9 -> a [9 0 -1 0] +// S10 -> a [10 0 -1 0] +// S11 -> b [11 1 -1 0] +// S12 -> b [12 1 -1 0] +// S13 -> b [13 1 -1 0] +// S14 -> b [14 1 -1 0] +// S15 -> a [15 0 -1 0] +// S19 -> a [19 0 -1 0] +// S20 -> a [20 0 -1 0] +// S21 -> b [21 1 -1 0] +// S23 -> b [23 1 -1 0] +// S24 -> b [24 1 -1 0] +void init_grammar_complex() { + LAGraph_rule_WCNF *rules = calloc(26, sizeof(LAGraph_rule_WCNF)); + rules[0] = (LAGraph_rule_WCNF){0, 1, 2, 0}; + rules[1] = (LAGraph_rule_WCNF){0, 15, 16, 0}; + rules[2] = (LAGraph_rule_WCNF){1, 3, 4, 0}; + rules[3] = (LAGraph_rule_WCNF){2, 5, 6, 0}; + rules[4] = (LAGraph_rule_WCNF){3, 7, 8, 0}; + rules[5] = (LAGraph_rule_WCNF){4, 9, 10, 0}; + rules[6] = (LAGraph_rule_WCNF){5, 11, 12, 0}; + rules[7] = (LAGraph_rule_WCNF){6, 13, 14, 0}; + rules[8] = (LAGraph_rule_WCNF){16, 17, 18, 0}; + rules[9] = (LAGraph_rule_WCNF){17, 19, 20, 0}; + rules[10] = (LAGraph_rule_WCNF){18, 21, 22, 0}; + rules[11] = (LAGraph_rule_WCNF){22, 23, 24, 0}; + rules[12] = (LAGraph_rule_WCNF){7, 0, -1, 0}; + rules[13] = (LAGraph_rule_WCNF){8, 0, -1, 0}; + rules[14] = (LAGraph_rule_WCNF){9, 0, -1, 0}; + rules[15] = (LAGraph_rule_WCNF){10, 0, -1, 0}; + rules[16] = (LAGraph_rule_WCNF){11, 1, -1, 0}; + rules[17] = (LAGraph_rule_WCNF){12, 1, -1, 0}; + rules[18] = (LAGraph_rule_WCNF){13, 1, -1, 0}; + rules[19] = (LAGraph_rule_WCNF){14, 1, -1, 0}; + rules[20] = (LAGraph_rule_WCNF){15, 0, -1, 0}; + rules[21] = (LAGraph_rule_WCNF){19, 0, -1, 0}; + rules[22] = (LAGraph_rule_WCNF){20, 0, -1, 0}; + rules[23] = (LAGraph_rule_WCNF){21, 1, -1, 0}; + rules[24] = (LAGraph_rule_WCNF){23, 1, -1, 0}; + rules[25] = (LAGraph_rule_WCNF){24, 1, -1, 0}; + + grammar = (grammar_t){ + .nonterms_count = 25, .terms_count = 2, .rules_count = 26, .rules = rules}; +} + +//==================== +// Graphs +//==================== + +// Graph: +// +// 0 -a-> 1 +// 1 -a-> 2 +// 2 -a-> 0 +// 0 -b-> 3 +// 3 -b-> 0 +void init_graph_double_cycle() { + adj_matrices = calloc(2, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a, adj_matrix_b; + OK(GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 4, 4)); + OK(GrB_Matrix_new(&adj_matrix_b, GrB_BOOL, 4, 4)); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 1)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 1, 2)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 2, 0)); + + OK(GrB_Matrix_setElement(adj_matrix_b, true, 0, 3)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 3, 0)); + + adj_matrices[0] = adj_matrix_a; + adj_matrices[1] = adj_matrix_b; +} + +// Graph: +// +// 0 -a-> 1 +// 1 -a-> 2 +// 2 -a-> 3 +// 3 -a-> 4 +// 3 -b-> 5 +// 4 -b-> 3 +// 5 -b-> 6 +// 6 -b-> 7 +void init_graph_1() { + adj_matrices = calloc(2, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a, adj_matrix_b; + OK(GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 8, 8)); + OK(GrB_Matrix_new(&adj_matrix_b, GrB_BOOL, 8, 8)); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 1)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 1, 2)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 2, 3)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 3, 4)); + + OK(GrB_Matrix_setElement(adj_matrix_b, true, 3, 5)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 4, 3)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 5, 6)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 6, 7)); + + adj_matrices[0] = adj_matrix_a; + adj_matrices[1] = adj_matrix_b; +} + +// Graph: +// +// 0 -a-> 2 +// 1 -a-> 2 +// 3 -a-> 5 +// 4 -a-> 5 +// 2 -a-> 6 +// 5 -a-> 6 +// 2 -b-> 0 +// 2 -b-> 1 +// 5 -b-> 3 +// 5 -b-> 4 +// 6 -b-> 2 +// 6 -b-> 5 +void init_graph_tree() { + adj_matrices = calloc(2, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a, adj_matrix_b; + OK(GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 7, 7)); + OK(GrB_Matrix_new(&adj_matrix_b, GrB_BOOL, 7, 7)); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 2)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 1, 2)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 3, 5)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 4, 5)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 2, 6)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 5, 6)); + + OK(GrB_Matrix_setElement(adj_matrix_b, true, 2, 0)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 2, 1)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 5, 3)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 5, 4)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 6, 2)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 6, 5)); + + adj_matrices[0] = adj_matrix_a; + adj_matrices[1] = adj_matrix_b; +} + +// Graph: +// +// 0 -a-> 1 +// 1 -a-> 2 +// 2 -a-> 0 +void init_graph_one_cycle() { + adj_matrices = calloc(1, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a; + GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 3, 3); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 1)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 1, 2)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 2, 0)); + + adj_matrices[0] = adj_matrix_a; +} + +// Graph: + +// 0 -a-> 1 +// 1 -a-> 2 +// 2 -b-> 3 +// 3 -b-> 4 +void init_graph_line() { + adj_matrices = calloc(2, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a, adj_matrix_b; + GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 5, 5); + GrB_Matrix_new(&adj_matrix_b, GrB_BOOL, 5, 5); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 1)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 1, 2)); + + OK(GrB_Matrix_setElement(adj_matrix_b, true, 2, 3)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 3, 4)); + + adj_matrices[0] = adj_matrix_a; + adj_matrices[1] = adj_matrix_b; +} + +// Graph: + +// 0 -a-> 0 +// 0 -b-> 1 +// 1 -c-> 2 +void init_graph_2() { + adj_matrices = calloc(3, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a, adj_matrix_b, adj_matrix_c; + GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 3, 3); + GrB_Matrix_new(&adj_matrix_b, GrB_BOOL, 3, 3); + GrB_Matrix_new(&adj_matrix_c, GrB_BOOL, 3, 3); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 0)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 0, 1)); + OK(GrB_Matrix_setElement(adj_matrix_c, true, 1, 2)); + + adj_matrices[0] = adj_matrix_a; + adj_matrices[1] = adj_matrix_b; + adj_matrices[2] = adj_matrix_c; +} + +// Graph: + +// 0 -a-> 1 +// 1 -a-> 0 +// 0 -b-> 0 +void init_graph_3() { + adj_matrices = calloc(2, sizeof(GrB_Matrix)); + GrB_Matrix adj_matrix_a, adj_matrix_b; + GrB_Matrix_new(&adj_matrix_a, GrB_BOOL, 2, 2); + GrB_Matrix_new(&adj_matrix_b, GrB_BOOL, 2, 2); + + OK(GrB_Matrix_setElement(adj_matrix_a, true, 0, 1)); + OK(GrB_Matrix_setElement(adj_matrix_a, true, 1, 0)); + OK(GrB_Matrix_setElement(adj_matrix_b, true, 0, 0)); + + adj_matrices[0] = adj_matrix_a; + adj_matrices[1] = adj_matrix_b; +} + +//==================== +// Tests with valid result +//==================== + +void test_CFL_reachability_cycle(void) { + setup(); + GrB_Info retval; + + init_grammar_aS(); + init_graph_one_cycle(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 0) (0, 1) (0, 2) (1, 0) (1, 1) (1, 2) (2, 0) (2, 1) (2, 2)"); + + free_workspace(); + teardown(); +} + +void test_CFL_reachability_two_cycle(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_double_cycle(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 0) (0, 3) (1, 0) (1, 3) (2, 0) (2, 3)"); + + free_workspace(); + teardown(); +} + +void test_CFL_reachability_labels_more_than_nonterms(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_2(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 1)"); + + free_workspace(); + teardown(); +} + +void test_CFL_reachability_complex_grammar(void) { + setup(); + GrB_Info retval; + + init_grammar_complex(); + init_graph_1(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 7) (1, 6)"); + + free_workspace(); + teardown(); +} + +void test_CFL_reachability_tree(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_tree(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 0) (0, 1) (0, 3) (0, 4) (1, 0) (1, 1) (1, 3) (1, 4) (2, 2) (2, 5) " + "(3, 0) (3, 1) (3, 3) (3, 4) (4, 0) (4, 1) (4, 3) (4, 4) (5, 2) (5, 5)"); + + free_workspace(); + teardown(); +} + +void test_CFL_reachability_line(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_line(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 4) (1, 3)"); + + free_workspace(); + teardown(); +} + +void test_CFL_reachability_two_nodes_cycle(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_3(); + init_outputs(); + + OK(run_algorithm()); + check_result("(0, 0) (1, 0)"); + + free_workspace(); + teardown(); +} + +//==================== +// Tests with invalid result +//==================== + +void test_CFL_reachability_invalid_rules(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_double_cycle(); + init_outputs(); + + // Rule [Variable -> _ B] + grammar.rules[0] = + (LAGraph_rule_WCNF){.nonterm = 0, .prod_A = -1, .prod_B = 1, .index = 0}; + check_error(GrB_INVALID_VALUE); + + // Rule [_ -> A B] + grammar.rules[0] = + (LAGraph_rule_WCNF){.nonterm = -1, .prod_A = 1, .prod_B = 2, .index = 0}; + check_error(GrB_INVALID_VALUE); + + // Rule [C -> A B], where C >= nonterms_count + grammar.rules[0] = + (LAGraph_rule_WCNF){.nonterm = 10, .prod_A = 1, .prod_B = 2, .index = 0}; + check_error(GrB_INVALID_VALUE); + + // Rule [C -> t], where t >= terms_count + grammar.rules[0] = + (LAGraph_rule_WCNF){.nonterm = 0, .prod_A = 10, .prod_B = -1, .index = 0}; + check_error(GrB_INVALID_VALUE); + + free_workspace(); + teardown(); + + return; +} + +void test_CFL_reachability_null_pointers(void) { + setup(); + GrB_Info retval; + + init_grammar_aSb(); + init_graph_double_cycle(); + init_outputs(); + + adj_matrices[0] = NULL; + check_error(GrB_NULL_POINTER); + + adj_matrices = NULL; + check_error(GrB_NULL_POINTER); + + free_workspace(); + init_grammar_aSb(); + init_graph_double_cycle(); + init_outputs(); + + outputs = NULL; + check_error(GrB_NULL_POINTER); + + free_workspace(); + init_grammar_aSb(); + init_graph_double_cycle(); + init_outputs(); + + grammar.rules = NULL; + check_error(GrB_NULL_POINTER); + + free_workspace(); + teardown(); + + return; +} + +TEST_LIST = {{"CFL_reachability_complex_grammar", test_CFL_reachability_complex_grammar}, + {"CFL_reachability_cycle", test_CFL_reachability_cycle}, + {"CFL_reachability_two_cycle", test_CFL_reachability_two_cycle}, + {"CFL_reachability_labels_more_than_nonterms", + test_CFL_reachability_labels_more_than_nonterms}, + {"CFL_reachability_tree", test_CFL_reachability_tree}, + {"CFL_reachability_line", test_CFL_reachability_line}, + {"CFL_reachability_two_nodes_cycle", test_CFL_reachability_two_nodes_cycle}, + {"CFG_reach_basic_invalid_rules", test_CFL_reachability_invalid_rules}, + {"CFG_reachability_null_pointers", test_CFL_reachability_null_pointers}, + {NULL, NULL}}; \ No newline at end of file diff --git a/include/LAGraphX.h b/include/LAGraphX.h index d95a951396..de473b5c62 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -962,6 +962,104 @@ int LAGraph_SquareClustering char *msg ) ; +//------------------------------------------------------------------------------ +// Algorithms for working with CFGs and graphs +//------------------------------------------------------------------------------ + +// Production rule of Context-free grammar in Weak Chomsky Normal Form +// Rule defined by tuple of [NONTERM, PROD_A, PROD_B, INDEX] in Weak Chomsky Normal Form +// Variable -> eps: [NONTERM, -1, -1, INDEX] +// Variable -> term: [NONTERM, TERM, -1, INDEX] +// Variable -> AB: [NONTERM, TERM1, TERM2, INDEX] +// +// Example: +// Terms: [0 a] [1 b] +// Nonterms: [0 S] [1 A] [2 B] [3 C] +// S -> AB [0 1 2 0] +// S -> AC [0 1 3 0] +// C -> SB [3 0 2 0] +// A -> a [1 0 -1 0] +// B -> b [2 1 -1 0] +// S -> eps [0 -1 -1 0] +// +// Warning: +// Variable -> _ B: [NONTERM, -1, TERM, INDEX] is not valid rule and may causes errors + typedef struct { + int32_t nonterm; // prod_A != -1 && prod_B != -1 => Type of Rule is [Variable -> AB] + int32_t prod_A; // prod_A == -1 && prod_B == -1 => Type of Rule is [Variable -> eps] + int32_t prod_B; // prod_A != -1 && prod_B == -1 => Type of Rule is [Variable -> term] + int32_t index; // For rules that can be grouped by index + } LAGraph_rule_WCNF; + + +// LAGraph_CFL_reachability: Context-Free Language Reachability Matrix-Based Algorithm +// +// This function determines the set of vertex pairs (u, v) in a graph (represented by +// adjacency matrices) such that there is a path from u to v, where the edge labels form a +// word from the language generated by the context-free grammar (represented by `rules`). +// +// Terminals and non-terminals are enumerated by integers starting from zero. +// The start non-terminal is the non-terminal with index 0. +// +// Example: +// +// Graph: +// ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ +// │ 0 ├───► 1 ├───► 2 ├───► 3 ├───► 4 │ +// └───┘ a └─┬─┘ a └─▲─┘ b └───┘ b └───┘ +// │ │ +// │ ┌───┐ │ +// a└─► 5 ├─┘b +// └───┘ +// +// Grammar: S -> aSb | ab +// +// There are paths from node [1] to node [3] and from node [1] to node [2] that form the +// word "ab" ([1]-a->[2]-b->[3] and [1]-a->[5]-b->[2]). The word "ab" is in the language +// generated by our context-free grammar, so the pairs (1, 3) and (1, 2) will be included +// in the result. +// +// Note: It doesn't matter how many paths exist from node [A] to node [B] that form a word +// in the language. If at least one path exists, the pair ([A], [B]) will be included in +// the result. +// +// In contrast, the path from node [1] to node [4] forms the word "abb" +// ([1]-a->[2]-b->[3]-b->[4]) and the word "abbb" ([1]-a->[5]-b->[2]-b->[3]-b->[4]). +// The words "aab" and "abbb" are not in the language, so the pair (1, 4) will not be +// included in the result. +// +// With this graph and grammar, we obtain the following results: +// (0, 4) - because there exists a path (0-1-2-3-4) that forms the word "aabb" +// (1, 3) - because there exists a path (1-2-3) that forms "ab" +// (1, 2) - because there exists a path (1-5-2) that forms the word "ab" +// (0, 3) - because there exists a path (0-1-5-2-3) that forms the word "aabb" +GrB_Info LAGraph_CFL_reachability +( + // Output + GrB_Matrix *outputs, // Array of matrices containing results. + // The size of the array must be equal to nonterms_count. + // + // outputs[k]: (i, j) = true if and only if there is a path + // from node i to node j whose edge labels form a word + // derivable from the non-terminal 'k' of the specified CFG. + + // Input + const GrB_Matrix *adj_matrices, // Array of adjacency matrices representing the graph. + // The length of this array is equal to the count of + // terminals (terms_count). + // + // adj_matrices[t]: (i, j) == 1 if and only if there + // is an edge between nodes i and j with the label of + // the terminal corresponding to index 't' (where t is + // in the range [0, terms_count - 1]). + + int32_t terms_count, // The total number of terminal symbols in the CFG. + int32_t nonterms_count, // The total number of non-terminal symbols in the CFG. + const LAGraph_rule_WCNF *rules, // The rules of the CFG. + size_t rules_count, // The total number of rules in the CFG. + char *msg // Message string for error reporting. +); + //------------------------------------------------------------------------------ // a simple example of an algorithm //------------------------------------------------------------------------------