GoogleCloudPlatform
diff --git a/‎DAGify.py‎
Lines changed: 16 additions & 2 deletions b/‎DAGify.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎dagify/converter/engine.py‎
Lines changed: 8 additions & 6 deletions b/‎dagify/converter/engine.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎dagify/converter/report_generator.py‎
Lines changed: 106 additions & 0 deletions b/‎dagify/converter/report_generator.py‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎dagify/converter/utils.py‎
Lines changed: 107 additions & 1 deletion b/‎dagify/converter/utils.py‎
Lines changed: 107 additions & 1 deletion
diff --git a/‎dagify/test/integration/test_references/009-fast-x/fast_x_reports.py‎
Lines changed: 3 additions & 1 deletion b/‎dagify/test/integration/test_references/009-fast-x/fast_x_reports.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎dagify/test/integration/test_references/010-fast-x/fx_fld_001_app_001_subapp_001.py‎
Lines changed: 3 additions & 1 deletion b/‎dagify/test/integration/test_references/010-fast-x/fx_fld_001_app_001_subapp_001.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎dagify/test/integration/test_references/011-fast-x/fx_fld_001_app_001_subapp_001.py‎
Lines changed: 5 additions & 3 deletions b/‎dagify/test/integration/test_references/011-fast-x/fx_fld_001_app_001_subapp_001.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎dagify/test/integration/test_references/011-fast-x/fx_fld_001_app_002_subapp_001.py‎
Lines changed: 3 additions & 1 deletion b/‎dagify/test/integration/test_references/011-fast-x/fx_fld_001_app_002_subapp_001.py‎
Lines changed: 3 additions & 1 deletion
@@ -13,9 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from dagify.converter import Engine
 import os
 import click
+from dagify.converter import Engine
+from dagify.converter.report_generator import Report
 
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
 
@@ -58,8 +59,14 @@
               show_default="{}".format(
                   os.environ.get("AS_DAG_DIVIDER",
                                  "PARENT_FOLDER")))
+@click.option("-r",
+              "--report-gen", 
+              is_flag=True,
+              default=False,
+              help="Generate report in txt and json format which \
+                gives an overview of job_types converted")
 
-def dagify(source_path, output_path, config_file, templates, dag_divider):
+def dagify(source_path, output_path, config_file, templates, dag_divider,report_gen):
     """Run dagify."""
     print("Demo dagify Engine")
 
@@ -70,6 +77,13 @@ def dagify(source_path, output_path, config_file, templates, dag_divider):
         templates_path=templates,
         dag_divider=dag_divider,
     )
+    if report_gen:
+        Report(
+            source_path=source_path,
+            output_path=output_path,
+            config_file=config_file,
+            templates_path=templates,
+        )
 
 
 if __name__ == '__main__':
 
@@ -16,9 +16,9 @@ config:
   mappings: 
     - job_type: "command"
       template_name: "control-m-command-to-airflow-bash"
-    #- job_type: "command"
-    #  template_name: "control-m-command-to-airflow-ssh"
+    - job_type: "command"
+      template_name: "control-m-command-to-airflow-ssh"
     #- job_type: "command"
     #  template_name: "control-m-command-to-airflow-python"
-    #- job_type: "DUMMY"
-    #  template_name: "control-m-dummy-to-airflow-dummy"
+    - job_type: "sample"
+      template_name: "control-m-dummy-to-airflow-dummy"
@@ -294,8 +294,10 @@ def convert(self):
     def get_template(self, template_name):
         # Validate template_name is Provided
         if template_name is None:
-            raise ValueError("dagify: template name must be provided")
-        template = self.templates.get(template_name, None)
+            #raise ValueError("dagify: template name must be provided")
+            template = self.templates.get("control-m-dummy-to-airflow-dummy",None)
+        else:
+            template = self.templates.get(template_name, None)            
         if template is None:
             raise ValueError(
                 f"dagify: no template with name: '{template_name}' was not found among loaded templates.")
@@ -391,11 +393,11 @@ def generate_airflow_dags(self):
                 for dep in dependencies[dag_divider_value][task]['external']:
                     ext_task_uf = self.uf.get_task_by_attr("JOBNAME_ORIGINAL", dep)
                     dependencies_in_dag_external.append({
-                        'task_name': task,
-                        'ext_dag': ext_task_uf.get_attribute(self.dag_divider),
+                        'task_name': task, 
+                        'ext_dag': ext_task_uf.get_attribute(self.dag_divider), 
                         'ext_dep_task': dep,
                         "marker_name": dep + "_marker_" + ''.join(random.choices('0123456789abcdef', k=4))
-                    })
+                        })
 
             # Calculate external upstream dependencies where a task in the current dag depends on another dag's task
             # Such a dependency will require a DAG Sensor
@@ -437,7 +439,7 @@ def generate_airflow_dags(self):
                 dependencies_int=dependencies_in_dag_internal,
                 dependencies_ext=dependencies_in_dag_external,
                 upstream_dependencies=upstream_dependencies
-            )
+                )
             with open(filename, mode="w", encoding="utf-8") as dag_file:
                 dag_file.write(content)
 
 
@@ -0,0 +1,106 @@
+"""Module providing function to manipulate yaml files"""
+import yaml
+from .utils import (
+    is_directory,
+    generate_report,
+    get_jobtypes_andcount,
+    generate_json,
+    format_table_data
+)
+
+class Report():
+    """Report generating module """
+    def __init__(
+        self,
+        source_path=None,
+        output_path=None,
+        templates_path="./templates",
+        config_file="./config.yaml",
+    ):
+        self.config_file = config_file
+        self.config = {}
+        self.templates = {}
+        self.source_path = source_path
+        self.output_path = output_path
+        self.templates_path = templates_path
+
+        # Run the Proccess
+        self.generate_report()
+
+    def generate_report(self):
+        """Function that generates the json and txt report"""
+        templates_to_validate = []
+        ##Config_File_Info parameters 
+        config_job_types = []
+        config_job_types_count = 0
+        ## Source_file_Info parameters
+        source_files_count = 1
+        source_file_info = []
+        job_types_source= []
+        job_types_source_count = 0
+
+        ## Get the Job_types from config_file
+        config_job_types, config_job_types_count = get_jobtypes_andcount(self.config_file)
+
+
+        ## Get the Job_types from source xml
+        if is_directory(self.source_path) is False:
+            source_file_info.append(self.source_path.split("/")[-1])
+            job_types_source, job_types_source_count = get_jobtypes_andcount(self.source_path)
+
+        ### Get templates INFO
+        with open(self.config_file, encoding="utf-8") as stream:
+            try:
+                self.config = yaml.safe_load(stream)
+            except yaml.YAMLError as exc:
+                raise exc
+        for idx,config in enumerate(self.config["config"]["mappings"]):
+            # Set Command Uppercase
+            self.config["config"]["mappings"][idx]["job_type"] = \
+                self.config["config"]["mappings"][idx]["job_type"].upper()
+            templates_to_validate.append(self.config["config"]["mappings"][idx]["template_name"])
+
+
+        ## Statistics Info parameters 
+        job_types_converted,job_types_not_converted,converted_percentage, \
+            non_converted_percentage = \
+            self.get_statistics(job_types_source,config_job_types)
+        ## Table Info
+        statistics= [
+            f"Percentage of Jobtypes converted: {converted_percentage}%", 
+            f"Percentage of Jobtypes not converted: {non_converted_percentage}%"
+            ]
+        title = "DAGIFY REPORT"
+        columns = ["TASK","INFO","COUNT"]
+        rows = [
+                ["Source_files", source_file_info, source_files_count],
+                ["Source_File_Job_Types", job_types_source, job_types_source_count],
+                ["Config_File_Job_Types", config_job_types, config_job_types_count],
+                ["Job_Types_Converted",job_types_converted,len(job_types_converted)],
+                ["Job_types_Not_Converted",job_types_not_converted,len(job_types_not_converted)],
+                ["Templates_validated", templates_to_validate, len(templates_to_validate)]
+        ]
+        formatted_table_data = format_table_data(title,columns,rows)
+
+        warning_line = "NOTE: If the job_type \
+            is not defined in the config.yaml or \
+            if the job_type does not have a matching template defined, \
+            it would be by default converted into a DUMMYOPERATOR"
+
+        generate_json(statistics,formatted_table_data,self.output_path)
+        generate_report(statistics,title, columns, rows, warning_line,self.output_path)
+
+    def get_statistics(self,source_jt, config_jt):
+        """Function to caluculate the percentage conversion"""
+        converted_percent = 0
+        non_converted_percent = 0
+
+        ## Conversion Info
+        job_types_converted = list(set(config_jt) & set(source_jt))
+        job_types_not_converted = list(set(source_jt) - set(config_jt))
+
+        ## Percentages
+        non_converted_percent = (len(job_types_not_converted)/len(source_jt))*100 
+        converted_percent = 100 - non_converted_percent
+
+        return job_types_converted,job_types_not_converted,converted_percent,non_converted_percent
@@ -14,8 +14,12 @@
 
 import re
 import os
-import yaml
 import pprint
+import xml.etree.ElementTree as ET
+import json
+import yaml
+from prettytable import PrettyTable
+
 
 
 def clean_converter_type(converter_type):
@@ -102,3 +106,105 @@ def display_dict(dict):
         dict (dict): The dictionary to print.
     """
     pprint.pprint(dict)
+
+def count_yaml_files(directory, case_sensitive=True, recursive=False):
+  """
+  Counts the number of YAML files (.yaml or .yml) in a directory.
+
+  Args:
+      directory (str): The path to the directory to search.
+      case_sensitive (bool): Whether the search should be case-sensitive (default: True).
+      recursive (bool): Whether to search subdirectories recursively (default: False).
+
+  Returns:
+      int: The number of YAML files found.
+  """
+
+  count = 0
+  for root, dirs, files in os.walk(directory):
+      for file in files:
+          if (case_sensitive and file.endswith(('.yaml', '.yml'))) or \
+             (not case_sensitive and file.lower().endswith(('.yaml', '.yml'))):
+              count += 1
+      if not recursive:
+          break  # Stop after the first level if not recursive
+
+  return count
+
+def generate_report(lines,title,columns,rows,warning_line,output_dir):
+        """ Function to open a file and write the contents of the report in the file """
+        report = PrettyTable()
+        report.title = title
+        i=0
+        # Column config
+        report.field_names = columns
+        for col in columns:
+            report.align[col] = "l"
+
+        # Row config
+        report.add_rows(rows)
+
+
+        report_file = f"{output_dir}/Detailed-Report.txt"
+        with open(report_file, "w") as final_report:
+            for line in lines:
+                final_report.write(line + '\n')
+            final_report.write(str(report) + '\n')
+            final_report.write(warning_line)
+
+def get_jobtypes_andcount(source_path):
+    """Generic function that calculates the job_types and the count from any input"""
+    unique_job_types = []
+    job_types_source = []
+    job_types_count = 0
+    if source_path.endswith('.xml'):
+        tree = ET.parse(source_path)
+        root = tree.getroot()
+        # Find all JOB elements
+        job_elements = root.findall('.//JOB')
+        # Extract TASKTYPE values and store them in a set to ensure uniqueness
+        job_types_source = list({job.get('TASKTYPE') for job in job_elements})
+        ## Convert all to lowercase for comparision
+        job_types_source = [item.lower() for item in job_types_source]
+        unique_job_types = list(set(job_types_source))
+        job_types_count = len(unique_job_types)
+    elif source_path.endswith('config.yaml'):
+        with open(source_path, 'r') as file:
+            data = yaml.safe_load(file)
+        for mapping in data['config']['mappings']:
+            job_types_source.append(mapping['job_type'])
+        
+        ## Convert all to lowercase for comparision
+        job_types_source = [item.lower() for item in job_types_source]
+        unique_job_types = list(set(job_types_source))
+        job_types_count = len(unique_job_types)
+    return unique_job_types,job_types_count
+
+def format_table_data(title, columns, rows):
+    """Formats table data into a JSON-friendly structure"""
+
+    table_data = {
+        "title": title,
+        "columns": columns,
+        "rows": []
+    }
+
+    for row in rows:
+        row_dict = {}
+        for col_index, value in enumerate(row):
+            row_dict[columns[col_index]] = value
+        table_data["rows"].append(row_dict)
+
+    return table_data
+
+def generate_json(statistics, table_data, output_file_path):
+    """Creates a JSON file with intro text, table data, and conclusion text"""
+
+    data = {
+        "High_Level_Info": statistics,
+        "table_data": table_data,
+    }
+    json_file_path = f"{output_file_path}/report.json"
+    with open(json_file_path, "w") as json_file:
+        json.dump(data, json_file, indent=2)  # indent for better readability
+
@@ -13,7 +13,9 @@
 
 with DAG(
     dag_id="fast_x_reports",
-    schedule_interval="@daily",  # TIMEFROM not found, default schedule set to @daily,
+    start_date=datetime.datetime(2024, 1, 1),
+    #schedule="@daily",
+    schedule_interval='*/5 * * * *',
     catchup=False,
 ) as dag:
 
 
@@ -13,7 +13,9 @@
 
 with DAG(
     dag_id="fx_fld_001_app_001_subapp_001",
-    schedule_interval="@daily",  # TIMEFROM not found, default schedule set to @daily,
+    start_date=datetime.datetime(2024, 1, 1),
+    #schedule="@daily",
+    schedule_interval='*/5 * * * *',
     catchup=False,
 ) as dag:
 
 
@@ -13,7 +13,9 @@
 
 with DAG(
     dag_id="fx_fld_001_app_001_subapp_001",
-    schedule_interval="@daily",  # TIMEFROM not found, default schedule set to @daily,
+    start_date=datetime.datetime(2024, 1, 1),
+    #schedule="@daily",
+    schedule_interval='*/5 * * * *',
     catchup=False,
 ) as dag:
 
@@ -46,8 +48,8 @@
 
     # Airflow Downstream Task Dependencies (external dags)
 
-    fx_fld_001_app_002_subapp_002_job_003_marker_a61c = ExternalTaskMarker(
-        task_id="fx_fld_001_app_002_subapp_002_job_003_marker_a61c",
+    fx_fld_001_app_002_subapp_002_job_003_marker
+        task_id="fx_fld_001_app_002_subapp_002_job_003_marker
         external_dag_id='fx_fld_001_app_002_subapp_002',
         external_task_id='fx_fld_001_app_002_subapp_002_job_003'
     )
 
@@ -13,7 +13,9 @@
 
 with DAG(
     dag_id="fx_fld_001_app_002_subapp_001",
-    schedule_interval="@daily",  # TIMEFROM not found, default schedule set to @daily,
+    start_date=datetime.datetime(2024, 1, 1),
+    #schedule="@daily",
+    schedule_interval='*/5 * * * *',
     catchup=False,
 ) as dag: