Skip to content

Commit

Permalink
Moved domain_oid check to prevent an unnecessary extraction rule from…
Browse files Browse the repository at this point in the history
… being processed
  • Loading branch information
mattnowzari committed Feb 26, 2025
1 parent ccedd54 commit a3bd8fe
Showing 1 changed file with 37 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -269,49 +269,50 @@
" config_oid = source[\"configuration_oid\"]\n",
" domain_oid = source[\"domain_oid\"]\n",
"\n",
" all_rules = source[\"rules\"]\n",
" all_url_filters = source[\"url_filters\"]\n",
" # ensure the domain oid actually exists in our in-memory data structure\n",
" if domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]:\n",
" all_rules = source[\"rules\"]\n",
" all_url_filters = source[\"url_filters\"]\n",
"\n",
" # extract url filters\n",
" url_filters = []\n",
" if all_url_filters:\n",
" url_filters = [\n",
" {\n",
" \"type\": all_url_filters[0][\"filter\"],\n",
" \"pattern\": all_url_filters[0][\"pattern\"],\n",
" }\n",
" ]\n",
" # extract url filters\n",
" url_filters = []\n",
" if all_url_filters:\n",
" url_filters = [\n",
" {\n",
" \"type\": all_url_filters[0][\"filter\"],\n",
" \"pattern\": all_url_filters[0][\"pattern\"],\n",
" }\n",
" ]\n",
"\n",
" # extract rulesets\n",
" action_translation_map = {\n",
" \"fixed\": \"set\",\n",
" \"extracted\": \"extract\",\n",
" }\n",
" # extract rulesets\n",
" action_translation_map = {\n",
" \"fixed\": \"set\",\n",
" \"extracted\": \"extract\",\n",
" }\n",
"\n",
" ruleset = {}\n",
" if all_rules:\n",
" ruleset = [\n",
" ruleset = {}\n",
" if all_rules:\n",
" ruleset = [\n",
" {\n",
" \"action\": action_translation_map[\n",
" all_rules[0][\"content_from\"][\"value_type\"]\n",
" ],\n",
" \"field_name\": all_rules[0][\"field_name\"],\n",
" \"selector\": all_rules[0][\"selector\"],\n",
" \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n",
" \"value\": all_rules[0][\"content_from\"][\"value\"],\n",
" \"source\": all_rules[0][\"source_type\"],\n",
" }\n",
" ]\n",
"\n",
" # populate the in-memory data structure\n",
" temp_extraction_rulesets = [\n",
" {\n",
" \"action\": action_translation_map[\n",
" all_rules[0][\"content_from\"][\"value_type\"]\n",
" ],\n",
" \"field_name\": all_rules[0][\"field_name\"],\n",
" \"selector\": all_rules[0][\"selector\"],\n",
" \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n",
" \"value\": all_rules[0][\"content_from\"][\"value\"],\n",
" \"source\": all_rules[0][\"source_type\"],\n",
" \"url_filters\": url_filters,\n",
" \"rules\": ruleset,\n",
" }\n",
" ]\n",
"\n",
" # populate the in-memory data structure\n",
" temp_extraction_rulesets = [\n",
" {\n",
" \"url_filters\": url_filters,\n",
" \"rules\": ruleset,\n",
" }\n",
" ]\n",
"\n",
" if domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]:\n",
" print(\n",
" f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n",
" )\n",
Expand Down

0 comments on commit a3bd8fe

Please sign in to comment.