Skip to content

Commit 5a80896

Browse files
committed
Initial commit
1 parent 3236c14 commit 5a80896

File tree

7 files changed

+480
-0
lines changed

7 files changed

+480
-0
lines changed

checkers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
5+
class Checker:
6+
pass
7+
8+
9+
class ShellscriptChecker(Checker):
10+
pass

config.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
5+
# global configs
6+
config = {}
7+
config["MailServerHost"] = "smtp.example.com"
8+
config["MailServerPort"] = 25
9+
config["MailServerFrom"] = "[email protected]"
10+
11+
12+
# global secrets
13+
secrets = {}
14+
secrets["RspaceApiKey"] = ""
15+
secrets["LimesurveyUser"] = ""
16+
secrets["LimesurveyPassword"] = ""
17+

datenbiene.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
import sys
5+
import pandas as pd
6+
import logging
7+
import argparse
8+
import json
9+
from jinja2 import Template
10+
from sources import *
11+
from sinks import *
12+
from checkers import *
13+
14+
15+
__author__ = "Torsten Stöter"
16+
__copyright__ = "Copyright 2025 " + __author__
17+
__license__ = "GPL-3.0-only"
18+
__version__ = "1.0"
19+
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
def str_to_class(classname):
25+
return getattr(sys.modules[__name__], classname)
26+
27+
28+
def cli(args):
29+
parser = argparse.ArgumentParser()
30+
parser.add_argument(
31+
"-e",
32+
"--excel",
33+
required=True,
34+
help="Path to the Excel file containing Sources and Sinks sheets.",
35+
)
36+
parser.add_argument(
37+
"-j",
38+
"--json",
39+
required=True,
40+
help="Path to the JSON file containing key-value pairs.",
41+
)
42+
args = parser.parse_args(args)
43+
44+
sources_df = pd.read_excel(args.excel, "Sources")
45+
checkers_df = pd.read_excel(args.excel, "Checkers")
46+
sinks_df = pd.read_excel(args.excel, "Sinks")
47+
48+
with open(args.json) as f:
49+
keyvals = json.load(f)
50+
print(keyvals)
51+
52+
return sources_df, checkers_df, sinks_df, keyvals
53+
54+
55+
def main(sources_df, checkers_df, sinks_df, keyvals):
56+
57+
# apply Jinja2 template rendering to every cell using key-value pairs
58+
sources_df = sources_df.map(
59+
lambda x: Template(x).render(keyvals) if not pd.isnull(x) else x
60+
)
61+
print(sources_df)
62+
sinks_df = sinks_df.map(
63+
lambda x: Template(x).render(keyvals) if not pd.isnull(x) else x
64+
)
65+
print(sinks_df)
66+
67+
# process all sources
68+
sources = []
69+
for idx, row in sources_df.iterrows():
70+
71+
if pd.isnull(row["Source"]):
72+
continue
73+
74+
try:
75+
CS = str_to_class(row["Source"] + "Source")
76+
except AttributeError: # class not found, use generic source
77+
CS = GenericSource
78+
79+
addopt = (
80+
json.loads(row["AdditionalOptions"])
81+
if not pd.isnull(row["AdditionalOptions"])
82+
else {}
83+
)
84+
path_patterns = (
85+
row["PathPatterns"].split(";") if not pd.isnull(row["PathPatterns"]) else []
86+
)
87+
file_patterns = (
88+
row["FilePatterns"].split(";") if not pd.isnull(row["FilePatterns"]) else []
89+
)
90+
src = CS(row["Name"], path_patterns, file_patterns, keyvals | addopt)
91+
92+
print(src.get_filelist())
93+
sources.append(src)
94+
95+
# run all checkers
96+
# TODO
97+
98+
# process all sinks
99+
for idx, row in sinks_df.iterrows():
100+
print(row)
101+
if pd.isnull(row["Sink"]):
102+
continue
103+
104+
try:
105+
CS = str_to_class(row["Sink"] + "Sink")
106+
except AttributeError: # class not found, use generic source
107+
CS = Sink
108+
109+
snk = CS(row["Target"], sources, keyvals)
110+
sources = snk.store()
111+
112+
113+
if __name__ == "__main__":
114+
inputs = cli(sys.argv[1:])
115+
main(*inputs)

example.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"study": "multi-modal-study-01",
3+
"subject": "ab12",
4+
"date": 20250202
5+
}

example.xlsx

9.32 KB
Binary file not shown.

sinks.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
import os
5+
import tarfile
6+
import hashlib
7+
import filetype
8+
import owncloud
9+
import smtplib
10+
from email.message import EmailMessage
11+
from config import config
12+
from sources import Source
13+
14+
15+
class Sink:
16+
def __init__(self, target, sources, additional_options=None):
17+
self.target = target
18+
self.sources = sources
19+
20+
def store(self):
21+
return self.sources
22+
23+
24+
class TarballSink(Sink):
25+
def __init__(self, target, sources, additional_options):
26+
super().__init__(target, sources, additional_options)
27+
28+
self.subject = additional_options["subject"].lower()
29+
# date string should be in format 20250124: here we strip all non-digits
30+
self.date = int(
31+
"".join([c for c in str(additional_options["date"]) if c.isdigit()])
32+
)
33+
self.study = additional_options["study"]
34+
35+
self.prefix = f"{self.subject}_{self.date}"
36+
37+
def store(self):
38+
39+
if not os.path.isdir(self.target):
40+
os.mkdir(self.target)
41+
42+
session_dir = os.path.join(self.target, self.prefix)
43+
os.mkdir(session_dir)
44+
45+
tarfiles = []
46+
47+
for src in self.sources:
48+
print(src.name)
49+
filelist = src.get_filelist()
50+
compress = 9
51+
52+
# disbale compression for videos
53+
if "video" in src.name.lower():
54+
compress = 0
55+
for f in filelist:
56+
if not os.path.isfile(f):
57+
continue
58+
kind = filetype.guess(f)
59+
print(f, kind)
60+
if kind and "video" in kind.mime:
61+
compress = 0
62+
63+
tarfilename = os.path.join(
64+
session_dir, self.prefix + "_" + src.name + ".tar.gz"
65+
)
66+
67+
tar = tarfile.open(tarfilename, "w:gz", compresslevel=compress)
68+
69+
for f in src.get_filelist():
70+
print(f"Storing {f} to {tar} ...")
71+
tar.add(f, arcname=os.path.basename(f))
72+
tar.close()
73+
74+
tarfiles.append(tarfilename)
75+
76+
print("Computing checksums for ", tarfiles)
77+
78+
hashlines = []
79+
for tf in tarfiles:
80+
with open(tf, "rb") as f:
81+
digest = hashlib.file_digest(f, "sha256").hexdigest()
82+
hashlines.append(f"SHA256 ({os.path.basename(tf)}) = {digest}\n")
83+
84+
checksum_file = os.path.join(session_dir, self.prefix + "_Checksums.txt")
85+
with open(checksum_file, "w") as f:
86+
f.writelines(hashlines)
87+
88+
tarfiles.append(checksum_file)
89+
90+
return [Source("TarballSink", [session_dir], tarfiles)]
91+
92+
93+
class NextcloudSink(Sink):
94+
def __init__(self, target, sources, additional_options=None):
95+
super().__init__(target, sources, additional_options)
96+
97+
def store(self):
98+
print("Uploading to NexCloud: " + self.target)
99+
100+
oc = owncloud.Client.from_public_link(self.target)
101+
for src in self.sources:
102+
for f in src.get_filelist():
103+
print("Uploading... " + f)
104+
oc.put_file("/" + os.path.basename(f), f, chunked=False)
105+
106+
return self.sources
107+
108+
109+
class EmailSink(Sink):
110+
def __init__(self, target, sources, additional_options=None):
111+
super().__init__(target, sources, additional_options)
112+
113+
self.subject = additional_options["subject"].lower()
114+
# date string should be in format 20250124: here we strip all non-digits
115+
self.date = int(
116+
"".join([c for c in str(additional_options["date"]) if c.isdigit()])
117+
)
118+
self.study = additional_options["study"]
119+
120+
def store(self):
121+
msg = EmailMessage()
122+
msg.set_content("Hello World!")
123+
msg["Subject"] = (
124+
"Datenbiene: " + self.study + " " + self.subject + " " + str(self.date)
125+
)
126+
msg["From"] = config["MailServerFrom"]
127+
msg["To"] = self.target
128+
129+
# send the message via our own SMTP server
130+
s = smtplib.SMTP(config["MailServerHost"], port=config["MailServerPort"])
131+
s.send_message(msg)
132+
s.quit()
133+
134+
return self.sources
135+
136+
137+
class OmeroSink(Sink):
138+
pass
139+
140+
141+
class BidsSink(Sink):
142+
pass

0 commit comments

Comments
 (0)