Skip to content

Commit

Permalink
[gaarf-py] Add support for remote writes for CSV and JSON writers
Browse files Browse the repository at this point in the history
Change-Id: I76c0b3bff5051dff1d314426b4361468ea188f99
  • Loading branch information
AVMarkin committed Jun 12, 2024
1 parent 7f9a5f0 commit c1b0471
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 28 deletions.
50 changes: 39 additions & 11 deletions py/gaarf/io/writers/csv_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,41 @@
import os
from typing import Literal

import smart_open

from gaarf.io import formatter
from gaarf.io.writers.abs_writer import AbsWriter
from gaarf.io.writers import file_writer
from gaarf.report import GaarfReport


class CsvWriter(AbsWriter):
class CsvWriter(file_writer.FileWriter):
"""Writes Gaarf Report to CSV.
Attributes:
destination_folder: Destination where CSV files are stored.
delimiter: CSV delimiter.
quotechar: CSV writer quotechar.
quoting: CSV writer quoting.
"""

def __init__(
self,
destination_folder: str = os.getcwd(),
destination_folder: str | os.PathLike = os.getcwd(),
delimiter: str = ',',
quotechar: str = '"',
quoting: Literal[0] = csv.QUOTE_MINIMAL,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.destination_folder = destination_folder
"""Initializes CsvWriter based on a destination_folder.
Args:
destination_folder: Destination where CSV files are stored.
delimiter: CSV delimiter.
quotechar: CSV writer quotechar.
quoting: CSV writer quoting.
kwargs: Optional keyword arguments to initialize writer.
"""
super().__init__(destination_folder=destination_folder, **kwargs)
self.delimiter = delimiter
self.quotechar = quotechar
self.quoting = quoting
Expand All @@ -47,13 +66,22 @@ def __str__(self):
)

def write(self, report: GaarfReport, destination: str) -> str:
"""Writes Gaarf report to a CSV file.
Args:
report: Gaarf report.
destination: Base file name report should be written to.
Returns:
Full path where data are written.
"""
report = self.format_for_write(report)
destination = formatter.format_extension(destination, new_extension='.csv')
if not os.path.isdir(self.destination_folder):
os.makedirs(self.destination_folder)
self.create_dir()
logging.debug('Writing %d rows of data to %s', len(report), destination)
with open(
os.path.join(self.destination_folder, destination),
output_path = os.path.join(self.destination_folder, destination)
with smart_open.open(
output_path,
encoding='utf-8',
mode='w',
) as file:
Expand All @@ -65,5 +93,5 @@ def write(self, report: GaarfReport, destination: str) -> str:
)
writer.writerow(report.column_names)
writer.writerows(report.results)
logging.debug('Writing to %s is completed', destination)
return f'[CSV] - at {destination}'
logging.debug('Writing to %s is completed', output_path)
return f'[CSV] - at {output_path}'
46 changes: 46 additions & 0 deletions py/gaarf/io/writers/file_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for writing data to a file."""

import os

from gaarf.io.writers.abs_writer import AbsWriter


class FileWriter(AbsWriter):
"""Writes Gaarf Report to a local or remote file.
Attributes:
destination_folder: Destination where output file is stored.
"""

def __init__(
self,
destination_folder: str | os.PathLike = os.getcwd(),
**kwargs: str,
) -> None:
"""Initializes FileWriter based on destination folder."""
super().__init__(**kwargs)
self.destination_folder = str(destination_folder)

def create_dir(self) -> None:
"""Creates folders if needed or destination is not remote."""
if (
not os.path.isdir(self.destination_folder)
and '://' not in self.destination_folder
):
os.makedirs(self.destination_folder)

def write(self) -> None:
return
36 changes: 19 additions & 17 deletions py/gaarf/io/writers/json_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,45 +19,47 @@
import logging
import os

import smart_open

import gaarf
from gaarf.io import formatter
from gaarf.io.writers import abs_writer
from gaarf.io.writers import file_writer


class JsonWriter(abs_writer.AbsWriter):
class JsonWriter(file_writer.FileWriter):
"""Writes Gaarf Report to JSON.
Attributes:
destination_folder: A local folder where JSON files are stored.
destination_folder: Destination where JSON files are stored.
"""

def __init__(
self, destination_folder: str = os.getcwd(), **kwargs: str
self, destination_folder: str | os.PathLike = os.getcwd(), **kwargs: str
) -> None:
"""Initializes JsonWriter based on a destination_folder.
Args:
destination_folder: A local folder where JSON files are stored.
Returns: Description of return.
destination_folder: A local folder where JSON files are stored.
kwargs: Optional keyword arguments to initialize writer.
"""
super().__init__(**kwargs)
self.destination_folder = destination_folder
super().__init__(destination_folder=destination_folder, **kwargs)

def write(self, report: gaarf.report.GaarfReport, destination: str) -> str:
"""Writes Gaarf report to a JSON file.
Args:
report: Gaarf report.
destination: Base file name report should be written to.
report: Gaarf report.
destination: Base file name report should be written to.
Returns:
Base filename where data are written.
"""
report = self.format_for_write(report)
destination = formatter.format_extension(destination, new_extension='.json')
if not os.path.isdir(self.destination_folder):
os.makedirs(self.destination_folder)
self.create_dir()
logging.debug('Writing %d rows of data to %s', len(report), destination)
with open(
os.path.join(self.destination_folder, destination), 'w', encoding='utf-8'
) as f:
output_path = os.path.join(self.destination_folder, destination)
with smart_open.open(output_path, 'w', encoding='utf-8') as f:
json.dump(report.to_list(row_type='dict'), f)
logging.debug('Writing to %s is completed', destination)
return f'[JSON] - at {destination}'
logging.debug('Writing to %s is completed', output_path)
return f'[JSON] - at {output_path}'
13 changes: 13 additions & 0 deletions py/tests/unit/io/writers/test_csv_writer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import pytest
Expand Down
33 changes: 33 additions & 0 deletions py/tests/unit/io/writers/test_file_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pathlib

import pytest

from gaarf.io.writers import file_writer


class TestFileWriter:
def test_create_dir_from_local_path_creates_folder(self, tmp_path):
destination_folder = tmp_path / 'destination_folder'
writer = file_writer.FileWriter(destination_folder=destination_folder)
writer.create_dir()
assert destination_folder.is_dir()

def test_create_dir_from_remote_path_does_not_create_folder(self):
destination_folder = 'gs://fake-bucket'
writer = file_writer.FileWriter(destination_folder=destination_folder)
writer.create_dir()
expected_path = pathlib.Path(destination_folder)
assert not expected_path.is_dir()
13 changes: 13 additions & 0 deletions py/tests/unit/io/writers/test_json_writer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import json
Expand Down

0 comments on commit c1b0471

Please sign in to comment.