-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp_export_OSF_toPDF.py
More file actions
228 lines (202 loc) · 9.92 KB
/
app_export_OSF_toPDF.py
File metadata and controls
228 lines (202 loc) · 9.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
'''
## =================================================================================================
## Title: Streamlit App to Download OSF Project to PDF ##
## Project: ##
## Export OSF Project to PDF - Centre for Open Science (CoS) & University of Manchester (UoM)##
## UoM Team: ##
## Ramiro Bravo, Sarah Jaffa, Benito Matischen ##
## Author(s): ##
## Ramiro Bravo - ramiro.bravo@manchester.ac.uk - ramirobravo@gmail.com ##
## Create date: ##
## July-2025 ##
## Description: ##
## The Streamlit app serves as the front end application allowing users to download OSF ##
## project in PDF format. ##
## Parameters: ##
## OSF Project URL: Provide the URL of the project fro exapmle: https://osf.io/kzc68/ ##
## Select API environment: Production or Test ##
## Token Source: Provided via .env file or entering the OSF API token. ##
## OSF API Key: Allows users to enter (paste) the API key for private repositories ##
## Running App locally: Recomended to use a python virtual environment ##
## $ source ./venv/bin/activate ##
## $ streamlit run app_export_OSF_toPDF.py ##
## ##
## =================================================================================================
'''
from urllib.error import HTTPError, URLError
import streamlit as st
import tempfile
import osfexport
import shutil
from datetime import datetime
import os
API_HOST = "https://api.osf.io/v2"
PROJECT_GROUPS = ["All projects where I'm a Contributor", "Single Project"]
pat = ''
project_id = ''
st.set_page_config(page_title="OSF PDF Export Tool", layout="centered")
st.title("🔄 OSF Project to PDF")
# Store if ID changes to ensure form for single projects resets in this case
if 'current_id' not in st.session_state:
st.session_state.current_id = ''
# Store if user has checked visibility to avoid disappearing PAT section
if 'checked_if_public' not in st.session_state:
st.session_state.checked_if_public = False
# Store result of the is_public check to avoid repeating API calls
if 'is_public' not in st.session_state:
st.session_state.is_public = False
#REMOVE THE SETTING OPTIONS
st.markdown("""
<style>
.reportview-container {
margin-top: -2em;
}
#MainMenu {visibility: hidden;}
.stDeployButton {display:none;}
footer {visibility: hidden;}
#stDecoration {display:none;}
</style>
""", unsafe_allow_html=True)
def get_error_message(error):
"""
Choose an error message to show based on the type of error.
Parameters
-----------------
error: HTTPError, URLError
The actual error show a nicer message for.
Returns
-----------------
String error message to display to the user.
"""
if isinstance(error, HTTPError):
if error.code == 401:
message = """We couldn't authenticate you with the personal access token.
If you already have access to the OSF, please check the token is correct."""
elif error.code == 404:
message = "The project couldn't be found. Please check the URL/project ID is correct."
elif error.code == 403:
if project_id:
message = """Please check you are a contributor for this private project.
If you are, does your token have the \"osf.full_read\" permission?"""
else:
message = """Does your personal access token have the \"osf.full_read\" permission?
This is needed to allow access to your projects with this token."""
elif error.code == 429:
message = "Too many requests to the API, please try again in a few minutes."
else:
message = f"""Unexpected error HTTP {error.code} - {error.msg}. Please try again later."""
else:
message = f"Unexpected error connecting to the OSF: {error.reason}. Please try again later."
return f"Exporting failed as an error occurred: {message}"
# Choose to export multiple or single project - ask for id if needed
st.subheader("🔐 OSF Project Type")
project_group = st.radio("Choose projects to export:", PROJECT_GROUPS)
if project_group == PROJECT_GROUPS[1]:
project_url = st.text_input(
"📁 Enter OSF Project URL or ID:",
placeholder="e.g. 'https://osf.io/abcde/' OR 'abcde'"
)
project_id = osfexport.extract_project_id(project_url) if project_url else ''
if project_id:
st.info(f"Exporting Project with ID: {project_id}")
# Check if ID has changed and require rechecking visibility if it has
if st.session_state.current_id != project_id:
st.session_state.checked_if_public = False
st.session_state.is_public = False
st.session_state.current_id = project_id
def check_visibility():
try:
st.session_state.is_public = osfexport.is_public(f'{API_HOST}/nodes/{project_id}/')
st.session_state.checked_if_public = True
except (HTTPError, URLError) as e:
msg = get_error_message(e)
st.error(msg)
is_id_check_ready = st.button(
"Check Project is Public", type="secondary",
disabled=False if project_id else True,
on_click=check_visibility
)
if project_group == PROJECT_GROUPS[1] and st.session_state.checked_if_public:
if not st.session_state.is_public:
st.info("To export a private project, you will need to provide a Personal Access Token (PAT).")
st.subheader("🔑 OSF Token")
pat = st.text_input("Enter your OSF API token:", type="password")
else:
st.success("The project is public, no token is required.")
if project_group == PROJECT_GROUPS[0]:
st.info("To export all projects, you will need to provide a Personal Access Token (PAT).")
st.subheader("🔑 OSF Token")
pat = st.text_input("Enter your OSF API token:", type="password")
# Valid states for exporting:
# Export multiple AND PAT given
# Export single AND public
# Export single AND not public and PAT given
valid_export_all = project_group == PROJECT_GROUPS[0] and pat
valid_export_public = project_group == PROJECT_GROUPS[1] and st.session_state.is_public
valid_export_private = project_group == PROJECT_GROUPS[1] and pat
valid_export_state = valid_export_all or valid_export_public or valid_export_private
submitted = st.button(
"Export to PDF", type="primary",
disabled=False if valid_export_state else True
)
def download_export_files(pat='', project_id=''):
"""
Try to download export files and handle any errors that occur.
Parameters
----------------------
- pat: str
Personal Access Token to use for authentication.
- project_id: str
Optional ID of a project to export.
Returns
----------------------
None
"""
with st.spinner("Generating PDF... Please wait."):
try:
projects, root_nodes = osfexport.get_nodes(
pat=pat,
project_id=project_id
)
if not root_nodes:
st.error("No projects found.")
except (HTTPError, URLError) as e:
msg = get_error_message(e)
st.error(msg)
return
# Step 2: Generate the PDFs to a temp folder
with tempfile.TemporaryDirectory(delete=True) as tmpdir:
pdf_count = 0 # Track number of files for better user messages
paths = []
for root_idx in root_nodes:
pdf_obj, pdf_path = osfexport.write_pdf(
projects,
root_idx=root_idx,
folder=tmpdir
)
pdf_count += 1
paths.append(pdf_path)
# Step 3: Create zip file/PDF and display download link
if pdf_count > 1:
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
zip_filename = f'osf_projects_exported_{timestamp}'
archive = shutil.make_archive(zip_filename, 'zip', root_dir=tmpdir)
st.info(f"📦 {pdf_count} PDF{'s' if pdf_count > 1 else ''} generated and compressed")
with open(archive, "rb") as file:
st.download_button(
label=f"📄 Download {'all PDFs' if pdf_count > 1 else 'PDF'} as ZIP",
data=file,
file_name=f"{zip_filename}.zip",
mime="application/zip"
)
else:
with open(paths[0], "rb") as f:
st.download_button(
label=f"📄 Download PDF for {projects[0]['metadata']['title']}",
data=f,
file_name=os.path.basename(paths[0]),
mime="application/pdf"
)
st.success("✅ PDFs Generated!")
if submitted:
download_export_files(pat=pat, project_id=project_id)