diff --git a/godel-script/README.md b/godel-script/README.md index 63bdb49..de8344b 100644 --- a/godel-script/README.md +++ b/godel-script/README.md @@ -79,10 +79,8 @@ git reset HEAD~ Use command below: ```bash -mkdir build -cd build -cmake .. -make -j +mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release +make -j6 ``` After building, you'll find `build/godel` in the `build` folder. @@ -91,20 +89,18 @@ After building, you'll find `build/godel` in the `build` folder. Use this command for help: -> ./build/godel -h +> godel -h -### Compile Target Soufflé +### Compile GödelScript to Target Soufflé -> ./build/godel -p {godel library directory} {input file} -s {soufflé output file} -Of +> godel -p {godel library directory} {input file} -s {soufflé output file} -O2 -`-Of` is an optimization for join order, we suggest to switch it on. +We suggest to use `-O2` for stable optimizations. -### Directly Run Soufflé +### Directly Run GödelScript -> ./build/godel -p {godel library directory} {input file} -r -Of -f {database directory} +> godel -p {godel library directory} {input file} -r -O2 -f {database directory} -`-Of` is an optimization for join order, we suggest to switch it on. +We suggest to use `-O2` for stable optimizations. `-r` means directly run soufflé. - -`-v` could be used for getting verbose info. diff --git a/godel-script/godel-frontend/src/error/error.cpp b/godel-script/godel-frontend/src/error/error.cpp index b2c48c3..c94a2d4 100644 --- a/godel-script/godel-frontend/src/error/error.cpp +++ b/godel-script/godel-frontend/src/error/error.cpp @@ -231,13 +231,13 @@ void error::warn_ignored_DO_schema(const std::unordered_set& vec) { size_t ignored_count = 0; for(const auto& i : vec) { ++ignored_count; - if (ignored_count > 4) { + if (ignored_count > 8) { break; } std::clog << reset << " " << i << "\n"; } - if (vec.size() > 4) { - std::clog << reset << " ...(" << vec.size()-4 << ")\n"; + if (vec.size() > 8) { + std::clog << reset << " ...(" << vec.size() - 8 << ")\n"; } std::clog << std::endl; } diff --git a/godel-script/godel-frontend/src/ir/inst_combine.cpp b/godel-script/godel-frontend/src/ir/inst_combine.cpp index eabdc67..6b12c6d 100644 --- a/godel-script/godel-frontend/src/ir/inst_combine.cpp +++ b/godel-script/godel-frontend/src/ir/inst_combine.cpp @@ -12,7 +12,7 @@ void inst_combine_pass::visit_store(lir::store* s) { // // ( // ssa_temp_0 = a, - // b = ssa_temp_1, + // ssa_temp_1 = b, // call(ssa_temp_2, ssa_temp_0, ssa_temp_1) // ) // @@ -86,26 +86,67 @@ void inst_combine_pass::visit_compare(lir::compare* c) { } } +void inst_combine_pass::visit_call(lir::call* c) { + if (c->get_func_kind() != lir::call::kind::key_cmp) { + return; + } + if (c->get_function_name() != "key_eq") { + return; + } + + const auto& left = c->get_arguments()[0]; + const auto& right = c->get_arguments()[1]; + + // record this case: + // + // a.key_eq(b.getParent()) + // --> + // ( + // getParent(ssa_temp_0, b), + // a = ssa_temp_0 + // ) + // + // and optimize this case to: + // + // getParent(a, b) + // + if (left.kind==lir::inst_value_kind::variable && + right.kind==lir::inst_value_kind::variable) { + variable_reference_graph[left.content].insert({right.content, c}); + variable_reference_graph[right.content].insert({left.content, c}); + } +} + bool inst_combine_pass::run() { - for(auto impl : ctx->rule_impls) { - scan(impl); - inst_elimination_worker().copy(impl); + for (auto impl : ctx->rule_impls) { + run_on_single_impl(impl); } - for(auto impl : ctx->database_get_table) { - scan(impl); - inst_elimination_worker().copy(impl); + for (auto impl : ctx->database_get_table) { + run_on_single_impl(impl); } - for(auto impl : ctx->schema_get_field) { - scan(impl); - inst_elimination_worker().copy(impl); + for (auto impl : ctx->schema_get_field) { + run_on_single_impl(impl); } - for(auto impl : ctx->schema_data_constraint_impls) { - scan(impl); - inst_elimination_worker().copy(impl); + for (auto impl : ctx->schema_data_constraint_impls) { + run_on_single_impl(impl); } return true; } +void inst_combine_pass::run_on_single_impl(souffle_rule_impl* b) { + auto worker = inst_elimination_worker(); + size_t pass_run_count = 0; + const size_t max_pass_run_count = 16; + scan(b); + worker.copy(b); + ++ pass_run_count; + while (worker.get_eliminated_count() && pass_run_count < max_pass_run_count) { + scan(b); + worker.copy(b); + ++ pass_run_count; + } +} + void inst_combine_pass::scan(souffle_rule_impl* b) { variable_reference_graph.clear(); b->get_block()->accept(this); @@ -265,6 +306,7 @@ void inst_elimination_worker::visit_block(lir::block* node) { for(auto i : node->get_content()) { // skip eliminated instruction if (i->get_flag_eliminated()) { + ++ eliminated_count; continue; } @@ -338,6 +380,8 @@ void inst_elimination_worker::visit_aggregator(lir::aggregator* node) { } void inst_elimination_worker::copy(souffle_rule_impl* impl) { + eliminated_count = 0; + blk.clear(); auto impl_blk = new lir::block(impl->get_block()->get_location()); blk.push_back(impl_blk); @@ -354,4 +398,69 @@ void inst_elimination_worker::copy(souffle_rule_impl* impl) { delete impl_blk; } +void replace_find_call::visit_block(lir::block* node) { + bool has_find_call = false; + for (auto i : node->get_content()) { + if (i->get_kind() != lir::inst_kind::inst_call) { + continue; + } + auto call = reinterpret_cast(i); + if (call->get_func_kind() == lir::call::kind::find && + call->get_function_name() == "find") { + has_find_call = true; + break; + } + } + + if (has_find_call) { + std::vector new_content; + for (auto i : node->get_content()) { + if (i->get_kind() != lir::inst_kind::inst_call) { + new_content.push_back(i); + continue; + } + + auto call = reinterpret_cast(i); + if (call->get_func_kind() != lir::call::kind::find || + call->get_function_name() != "find") { + new_content.push_back(i); + continue; + } + + auto dst = call->get_return(); + auto arg0 = call->get_arguments()[0]; + auto arg1 = call->get_arguments()[1]; + auto new_block = new lir::block(call->get_location()); + new_block->set_use_comma(); + new_content.push_back(new_block); + + new_block->add_new_content(new lir::store(arg0, dst, call->get_location())); + new_block->add_new_content(new lir::store(arg1, arg0, call->get_location())); + + delete i; + } + node->get_mutable_content().swap(new_content); + } else { + for (auto i : node->get_content()) { + i->accept(this); + } + } +} + +bool replace_find_call::run() { + for (auto impl : ctx->rule_impls) { + impl->get_block()->accept(this); + } + for (auto impl : ctx->database_get_table) { + impl->get_block()->accept(this); + } + for (auto impl : ctx->schema_get_field) { + impl->get_block()->accept(this); + } + for (auto impl : ctx->schema_data_constraint_impls) { + impl->get_block()->accept(this); + } + return true; +} + } \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/inst_combine.h b/godel-script/godel-frontend/src/ir/inst_combine.h index b6b0c25..80e68b7 100644 --- a/godel-script/godel-frontend/src/ir/inst_combine.h +++ b/godel-script/godel-frontend/src/ir/inst_combine.h @@ -20,9 +20,11 @@ class inst_combine_pass: public pass { private: void visit_store(lir::store*) override; void visit_compare(lir::compare*) override; + void visit_call(lir::call*) override; private: void scan(souffle_rule_impl*); + void run_on_single_impl(souffle_rule_impl*); public: inst_combine_pass(ir_context& c): pass(pass_kind::ps_inst_combine, c) {} @@ -65,6 +67,7 @@ class combine_worker: public lir::inst_visitor { class inst_elimination_worker: public lir::inst_visitor { private: std::vector blk; + size_t eliminated_count = 0; private: void visit_boolean(lir::boolean* node) override { @@ -111,6 +114,21 @@ class inst_elimination_worker: public lir::inst_visitor { public: void copy(souffle_rule_impl*); + auto get_eliminated_count() const { + return eliminated_count; + } +}; + +class replace_find_call: public pass { +private: + void visit_block(lir::block*) override; + +public: + replace_find_call(ir_context& c): pass(pass_kind::ps_replace_find_call, c) {} + const char* get_name() const override { + return "[Transform] Replace Find Call"; + } + bool run() override; }; } \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/ir_gen.h b/godel-script/godel-frontend/src/ir/ir_gen.h index 66f548d..814e620 100644 --- a/godel-script/godel-frontend/src/ir/ir_gen.h +++ b/godel-script/godel-frontend/src/ir/ir_gen.h @@ -226,9 +226,14 @@ class ir_gen: public ast_visitor { std::vector&, bool); bool visit_for_stmt(for_stmt*) override; + // adjust order of generated IR, to change the join order, make it running faster + // for statement often uses a large set, so this optimization is useful in most cases void optimized_for_stmt_gen(for_stmt*); void unoptimized_for_stmt_gen(for_stmt*); bool visit_let_stmt(let_stmt*) override; + // adjust order of generated IR, to change the join order, make it running faster + // let statement often uses single value or a small set + // so this optimization is not very useful, or even harmful void optimized_let_stmt_gen(let_stmt*); void unoptimized_let_stmt_gen(let_stmt*); diff --git a/godel-script/godel-frontend/src/ir/pass.h b/godel-script/godel-frontend/src/ir/pass.h index 1fadf6f..49316aa 100644 --- a/godel-script/godel-frontend/src/ir/pass.h +++ b/godel-script/godel-frontend/src/ir/pass.h @@ -13,6 +13,7 @@ enum class pass_kind { ps_remove_unused, ps_remove_unused_type, ps_inst_combine, + ps_replace_find_call, ps_flatten_nested_block, ps_aggregator_inline_remark, ps_ungrounded_check, diff --git a/godel-script/godel-frontend/src/ir/pass_manager.cpp b/godel-script/godel-frontend/src/ir/pass_manager.cpp index 46a68c4..da06378 100644 --- a/godel-script/godel-frontend/src/ir/pass_manager.cpp +++ b/godel-script/godel-frontend/src/ir/pass_manager.cpp @@ -29,6 +29,7 @@ void pass_manager::run(ir_context& ctx, const cli::configure& conf) { ordered_pass_list.push_back(new unused_type_alias_remove_pass(ctx)); } if (!conf.count(cli::option::cli_disable_inst_combine)) { + ordered_pass_list.push_back(new replace_find_call(ctx)); ordered_pass_list.push_back(new inst_combine_pass(ctx)); } ordered_pass_list.push_back(new flatten_nested_block(ctx)); diff --git a/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp b/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp index 7623786..5a9e1d3 100644 --- a/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp +++ b/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp @@ -293,7 +293,7 @@ void ungrounded_parameter_checker::report_unused_parameter(const report::span& s bool flag_self_ungrounded = false; // start checking - for(const auto& i : func->ordered_parameter_list) { + for (const auto& i : func->ordered_parameter_list) { // if variable is not a parameter or already used, do not report if (!record.count(i) || record.at(i)) { continue; @@ -315,10 +315,12 @@ void ungrounded_parameter_checker::report_unused_parameter(const report::span& s if (is_native_type(type) && !record_is_set_flag.at(i)) { ungrounded_params += ungrounded_params.length()? ", ":""; ungrounded_params += i; - } else if (i!="self") { + } else if (i != "self") { unused_params += unused_params.length()? ", ":""; unused_params += i; } else { + // unused self, mark it as ungrounded + // so self constraint will be generated to protect this parameter flag_self_ungrounded = true; } } @@ -326,14 +328,14 @@ void ungrounded_parameter_checker::report_unused_parameter(const report::span& s // unused warning report if (unused_params.length()) { err->warn(stmt_loc, - "unused parameter \"" + unused_params + "\" in this branch." + "\"" + unused_params + "\" is unused in this branch." ); } // ungrounded error report if (ungrounded_params.length()) { err->err(stmt_loc, - "ungrounded parameter \"" + ungrounded_params + "\" in this branch." + "\"" + ungrounded_params + "\" is ungrounded in this branch." ); } @@ -381,6 +383,11 @@ bool ungrounded_parameter_checker::check_directly_call_identifier(expr* node) { != ast_class::ac_identifier) { return false; } + // schema instance getting primary key equals to directly using this instance + // e.g. `a.id` in fact equals to `a` itself, so we see this as direct call id + if (is_schema_get_primary_key(real)) { + return true; + } if (!real->get_call_chain().empty()) { return false; } @@ -405,7 +412,12 @@ bool ungrounded_parameter_checker::check_non_binding_binary_operator(binary_oper return node->get_operator_type() == binary_operator::type::add || node->get_operator_type() == binary_operator::type::sub || node->get_operator_type() == binary_operator::type::mult || - node->get_operator_type() == binary_operator::type::div; + node->get_operator_type() == binary_operator::type::div || + node->get_operator_type() == binary_operator::type::compare_not_equal || + node->get_operator_type() == binary_operator::type::compare_less || + node->get_operator_type() == binary_operator::type::compare_less_equal || + node->get_operator_type() == binary_operator::type::compare_great || + node->get_operator_type() == binary_operator::type::compare_great_equal; } bool ungrounded_parameter_checker::visit_call_expr(call_expr* node) { @@ -641,11 +653,13 @@ bool ungrounded_parameter_checker::visit_call_head(call_head* node) { } bool ungrounded_parameter_checker::is_schema_get_primary_key(call_root* node) { - if (node->get_call_head()->get_first_expression()->get_ast_class()!=ast_class::ac_identifier) { + auto head = node->get_call_head(); + // should call a variable + if (head->get_first_expression()->get_ast_class() != ast_class::ac_identifier) { return false; } - const auto& head_type = node->get_call_head()->get_resolve(); + const auto& head_type = head->get_resolve(); // head type should not be global symbol or data-set type if (head_type.is_global || head_type.type.is_set) { return false; @@ -658,16 +672,19 @@ bool ungrounded_parameter_checker::is_schema_get_primary_key(call_root* node) { if (node->get_call_chain().size()<1) { return false; } + + // get type full path name const auto name = head_type.type.full_path_name_without_set(); const auto index = ctx->global.get_index(name); - if (index==global_symbol_table::npos) { + if (index == global_symbol_table::npos) { return false; } - if (ctx->global.get_kind(index)!=symbol_kind::schema) { + if (ctx->global.get_kind(index) != symbol_kind::schema) { return false; } + const auto& sc = ctx->global.get_schema(index); - if (node->get_call_chain()[0]->get_call_type()!=call_expr::type::get_field || + if (node->get_call_chain()[0]->get_call_type() != call_expr::type::get_field || node->get_call_chain()[0]->has_func_call()) { return false; } @@ -677,7 +694,20 @@ bool ungrounded_parameter_checker::is_schema_get_primary_key(call_root* node) { } bool ungrounded_parameter_checker::visit_call_root(call_root* node) { + // we see schema get primary key as call schema itself + // because in generated souffle: + // schema.primary_key = schema + // if schema is not grounded, the primary key is not grounded too + // but we add type constraint for each schema, so mark this as grounded + // except this call is: + // self.primary_key + // self is not always constraint, if marked as grounded + // self will be ungrounded in generated souffle if (is_schema_get_primary_key(node)) { + auto first = node->get_call_head()->get_first_expression(); + if (reinterpret_cast(first)->get_name() != "self") { + node->get_call_head()->accept(this); + } return true; } for(auto i : node->get_call_chain()) {