Skip to content

Commit dd676b7

Browse files
feat: Tiff file reader. (#77)
Used Irregular tags which are parsed from the file
1 parent 48564f6 commit dd676b7

2 files changed

Lines changed: 68 additions & 0 deletions

File tree

converter_app/readers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .dwl import DWLReader
2424
from .ebl import EblReader
2525
from .cfx import CfxReader
26+
from .tif import TifReader
2627
from .jcamp_conv_reader import JcampReader
2728
from .pdf import PdfReader
2829
from .lithoz_pdf import PdfLithozReader
@@ -85,6 +86,7 @@ def match_reader(self, file):
8586
registry.register(DWLReader)
8687
registry.register(EblReader)
8788
registry.register(CfxReader)
89+
registry.register(TifReader)
8890
registry.register(JcampReader)
8991
registry.register(PdfReader)
9092
registry.register(PdfLithozReader)

converter_app/readers/tif.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import logging
2+
import re
3+
from .base import Reader
4+
5+
logger = logging.getLogger(__name__)
6+
7+
UNIT_EXTENSION = "_unit"
8+
9+
10+
class TifReader(Reader):
11+
identifier = 'tif_reader'
12+
priority = 96
13+
_parsed_values = None
14+
15+
16+
def check(self):
17+
result = False
18+
if self.file.suffix.lower() == '.tif' and self.file.mime_type == 'image/tiff':
19+
self._parsed_values = self._read_img()
20+
result = self._parsed_values is not None and len(self._parsed_values) > 0
21+
logger.debug('result=%s', result)
22+
return result
23+
def _read_img(self):
24+
txt = re.sub(r'\\x[0-9a-f]{2}', '', self.file.content.__str__())
25+
26+
txt = re.sub(r'^.+@@@@@@0\\r\\n', '', txt)
27+
lines = re.split(r'\\r\\n', txt)
28+
del lines[-1]
29+
return [x.split('=') for x in lines]
30+
31+
32+
def get_value(self, value):
33+
if self.float_de_pattern.match(value):
34+
# remove any digit group seperators and replace the comma with a period
35+
return value.replace('.', '').replace(',', '.')
36+
if self.float_us_pattern.match(value):
37+
# just remove the digit group seperators
38+
return value.replace(',', '')
39+
else:
40+
return None
41+
42+
def get_tables(self):
43+
tables = []
44+
table = self.append_table(tables)
45+
for val in self._parsed_values:
46+
if len(val) == 1:
47+
num_val = self.get_value(val[0])
48+
if num_val is not None:
49+
table['rows'].append([len(table['rows']), len(table['rows']), float(num_val)])
50+
else:
51+
table['metadata'][val[0]] = '='.join(val[1:])
52+
table['header'].append(f"{'='.join(val)}")
53+
54+
table['columns'].append({
55+
'key': '1',
56+
'name': 'Idx'
57+
})
58+
table['columns'].append({
59+
'key': '2',
60+
'name': 'Number'
61+
})
62+
63+
table['metadata']['rows'] = str(len(table['rows']))
64+
table['metadata']['columns'] = str(len(table['columns']))
65+
66+
return tables

0 commit comments

Comments
 (0)