Skip to content

Commit 1f6ce51

Browse files
authored
Script for creating the data catalog
1 parent aab6f19 commit 1f6ce51

File tree

1 file changed

+94
-0
lines changed

1 file changed

+94
-0
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Import libraries
2+
import os
3+
import glob
4+
import re
5+
6+
import pandas as pd
7+
8+
from docx import Document
9+
from docx.enum.style import WD_STYLE_TYPE
10+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_LINE_SPACING
11+
from docx.shared import Inches, Pt, RGBColor
12+
13+
# Data directory
14+
# data_dir = 'D:/river_quality/data'
15+
16+
version = 1.3
17+
proj_dir = fr'\\export.hpc.ut.ee\gis\holgerv\GRQA_v{version}'
18+
data_dir = os.path.join(proj_dir, f'GRQA_data_v{version}')
19+
meta_dir = os.path.join(proj_dir, 'GRQA_meta')
20+
fig_dir = os.path.join(proj_dir, 'GRQA_figures')
21+
22+
# Parameter codes and names
23+
# param_stats = pd.read_csv(os.path.join(data_dir, 'GRQA/meta/GRQA_param_stats.csv'), sep=';')
24+
param_stats = pd.read_csv(os.path.join(meta_dir, 'stats', 'GRQA_param_stats.csv'), sep=';')
25+
param_codes = list(param_stats['Parameter code'])
26+
param_names = list(param_stats['Parameter name'])
27+
param_dict = dict(zip(param_codes, param_names))
28+
29+
# Word document object
30+
document = Document()
31+
32+
# Subheadings based on figure name strings
33+
subheadings = {
34+
'spatial_dist': 'Spatial distribution',
35+
'temporal_hist': 'Temporal distribution',
36+
'hist': 'Distribution',
37+
'box': 'Box plot',
38+
'availability': 'Monthly time series availability',
39+
'continuity': 'Monthly time series continuity',
40+
'median': 'Spatial distribution of yearly median'
41+
}
42+
43+
# Add new heading styles
44+
styles = document.styles
45+
46+
new_heading_style = styles.add_style('New Title', WD_STYLE_TYPE.PARAGRAPH)
47+
new_heading_style.base_style = styles['Title']
48+
font = new_heading_style.font
49+
font.name = 'Arial'
50+
font.size = Pt(20)
51+
font.color.rgb = RGBColor.from_string('000000')
52+
font.bold = True
53+
54+
new_heading_style = styles.add_style('New Heading 1', WD_STYLE_TYPE.PARAGRAPH)
55+
new_heading_style.base_style = styles['Heading 1']
56+
font = new_heading_style.font
57+
font.name = 'Arial'
58+
font.size = Pt(16)
59+
font.color.rgb = RGBColor.from_string('000000')
60+
61+
new_heading_style = styles.add_style('New Heading 2', WD_STYLE_TYPE.PARAGRAPH)
62+
new_heading_style.base_style = styles['Heading 2']
63+
font = new_heading_style.font
64+
font.name = 'Arial'
65+
font.size = Pt(12)
66+
font.color.rgb = RGBColor.from_string('000000')
67+
68+
# Add title
69+
# document.add_paragraph('Data catalog of Global River Water Quality Archive (GRQA)', style='New Title')
70+
title = f'Data catalog of Global River Water Quality Archive (GRQA) v{version}'
71+
document.add_paragraph(title, style='New Title')
72+
paragraph = document.paragraphs[-1]
73+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
74+
75+
# Figure width
76+
width = Inches(5.9527559055)
77+
78+
# Add all figures to the Word document with corresponding headings
79+
for param_code in param_codes:
80+
document.add_paragraph('{} ({})'.format(param_dict[param_code], param_code), style='New Heading 1')
81+
# figures = glob.glob(os.path.join(data_dir, 'GRQA/figures/{}_GRQA*.png'.format(param_code)))
82+
figures = glob.glob(os.path.join(fig_dir, '{}_GRQA*.png'.format(param_code)))
83+
for fig in figures:
84+
string = re.search(r'{}_GRQA_(.*?).png'.format(param_code), fig).group(1)
85+
subheading = '{} of {} observation values'.format(subheadings[string], param_code)
86+
document.add_paragraph(subheading, style='New Heading 2')
87+
document.add_picture(fig, width=width)
88+
paragraph = document.paragraphs[-1]
89+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
90+
paragraph_format = paragraph.paragraph_format
91+
paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
92+
93+
# Save document
94+
document.save(os.path.join(proj_dir, f'GRQA_data_catalog_v{version}.docx'))

0 commit comments

Comments
 (0)