Skip to content

Commit 7911517

Browse files
authored
HAI-3593 Fix comma error in attachement file names (#1130)
* HAI-3593 Encode attachment file name properly so that commas (or other special characters) work. Add a startup component to fix existing files in Azure Blob Storage.
1 parent 248ac4d commit 7911517

14 files changed

Lines changed: 733 additions & 34 deletions

File tree

.nvmrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v20.10.0

services/hanke-service/src/integrationTest/kotlin/fi/hel/haitaton/hanke/attachment/application/ApplicationAttachmentControllerITest.kt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import io.mockk.justRun
3636
import io.mockk.verifyOrder
3737
import io.mockk.verifySequence
3838
import java.util.UUID
39+
import org.hamcrest.Matchers.containsString
3940
import org.junit.jupiter.api.AfterEach
4041
import org.junit.jupiter.api.BeforeEach
4142
import org.junit.jupiter.api.Nested
@@ -140,8 +141,13 @@ class ApplicationAttachmentControllerITest(@Autowired override val mockMvc: Mock
140141

141142
getAttachmentContent(attachmentId = attachmentId)
142143
.andExpect(status().isOk)
144+
.andExpect(header().string(CONTENT_DISPOSITION, containsString("attachment")))
143145
.andExpect(
144-
header().string(CONTENT_DISPOSITION, "attachment; filename=$FILE_NAME_PDF")
146+
header()
147+
.string(
148+
CONTENT_DISPOSITION,
149+
containsString("filename*=UTF-8''$FILE_NAME_PDF"),
150+
)
145151
)
146152
.andExpect(content().bytes(DUMMY_DATA))
147153

@@ -289,7 +295,7 @@ class ApplicationAttachmentControllerITest(@Autowired override val mockMvc: Mock
289295
private fun postAttachment(
290296
applicationId: Long = APPLICATION_ID,
291297
attachmentType: ApplicationAttachmentType = MUU,
292-
file: MockMultipartFile = testFile()
298+
file: MockMultipartFile = testFile(),
293299
): ResultActions {
294300
return mockMvc.perform(
295301
multipart("/hakemukset/$applicationId/liitteet")
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package fi.hel.haitaton.hanke.attachment.azure
2+
3+
import org.testcontainers.containers.GenericContainer
4+
import org.testcontainers.utility.DockerImageName
5+
6+
/**
7+
* Singleton object providing a shared Azurite container for integration tests.
8+
*
9+
* The container is lazily initialized on first access and reused across all tests. This avoids the
10+
* overhead of starting multiple Azurite containers during test execution.
11+
*/
12+
object AzuriteTestContainer {
13+
14+
/** Shared Azurite container instance. Lazily initialized and started on first access. */
15+
val container: GenericContainer<*> by lazy {
16+
GenericContainer(DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite"))
17+
.withExposedPorts(10000)
18+
.also { it.start() }
19+
}
20+
21+
/**
22+
* Gets the Azure Blob Storage connection string for the Azurite container.
23+
*
24+
* This connection string uses the well-known Azurite development credentials and points to the
25+
* exposed blob storage endpoint.
26+
*/
27+
fun getConnectionString(): String {
28+
return "BlobEndpoint=http://${container.host}:${container.firstMappedPort}/devstoreaccount1;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
29+
}
30+
}

services/hanke-service/src/integrationTest/kotlin/fi/hel/haitaton/hanke/attachment/azure/BlobFileClientITest.kt

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,16 @@ import com.azure.storage.blob.BlobContainerClient
55
import com.azure.storage.blob.BlobServiceClient
66
import fi.hel.haitaton.hanke.attachment.common.FileClientTest
77
import fi.hel.haitaton.hanke.attachment.common.TestFile
8-
import org.junit.jupiter.api.AfterAll
98
import org.junit.jupiter.api.BeforeAll
109
import org.junit.jupiter.api.BeforeEach
1110
import org.junit.jupiter.api.TestInstance
1211
import org.springframework.http.MediaType
1312
import org.springframework.test.context.ActiveProfiles
14-
import org.testcontainers.containers.GenericContainer
15-
import org.testcontainers.utility.DockerImageName
1613

1714
@ActiveProfiles("test")
1815
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
1916
class BlobFileClientITest : FileClientTest() {
2017

21-
private lateinit var azuriteContainer: GenericContainer<*>
2218
private lateinit var serviceClient: BlobServiceClient
2319
private lateinit var hankeAttachmentClient: BlobContainerClient
2420

@@ -28,17 +24,12 @@ class BlobFileClientITest : FileClientTest() {
2824
Containers(
2925
decisions = "paatokset-test",
3026
hakemusAttachments = "hakemusliitteet-test",
31-
hankeAttachments = "hankeliitteet-test"
27+
hankeAttachments = "hankeliitteet-test",
3228
)
3329

3430
@BeforeAll
3531
fun setup() {
36-
azuriteContainer =
37-
GenericContainer(DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite"))
38-
.withExposedPorts(10000)
39-
azuriteContainer.start()
40-
val connectionString =
41-
"BlobEndpoint=http://${azuriteContainer.host}:${azuriteContainer.firstMappedPort}/devstoreaccount1;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
32+
val connectionString = AzuriteTestContainer.getConnectionString()
4233
serviceClient = AzureContainerServiceClient(connectionString, "").blobServiceClient()
4334
hankeAttachmentClient = serviceClient.getBlobContainerClient(containers.hankeAttachments)
4435

@@ -58,11 +49,6 @@ class BlobFileClientITest : FileClientTest() {
5849
}
5950
}
6051

61-
@AfterAll
62-
fun teardown() {
63-
azuriteContainer.stop()
64-
}
65-
6652
override fun listBlobs(container: Container): List<TestFile> =
6753
when (container) {
6854
Container.HAKEMUS_LIITTEET ->
@@ -78,7 +64,7 @@ class BlobFileClientITest : FileClientTest() {
7864
MediaType.parseMediaType(it.properties.contentType),
7965
it.properties.contentLength.toInt(),
8066
it.properties.contentDisposition,
81-
BinaryData.fromString("")
67+
BinaryData.fromString(""),
8268
)
8369
}
8470
.toList()
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
package fi.hel.haitaton.hanke.attachment.common
2+
3+
import assertk.assertThat
4+
import assertk.assertions.contains
5+
import assertk.assertions.isEqualTo
6+
import assertk.assertions.isFalse
7+
import assertk.assertions.isTrue
8+
import com.azure.storage.blob.BlobServiceClient
9+
import com.azure.storage.blob.models.BlobHttpHeaders
10+
import fi.hel.haitaton.hanke.attachment.azure.AzureContainerServiceClient
11+
import fi.hel.haitaton.hanke.attachment.azure.AzuriteTestContainer
12+
import fi.hel.haitaton.hanke.attachment.azure.BlobFileClient
13+
import fi.hel.haitaton.hanke.attachment.azure.Container
14+
import fi.hel.haitaton.hanke.attachment.azure.Containers
15+
import org.junit.jupiter.api.BeforeAll
16+
import org.junit.jupiter.api.BeforeEach
17+
import org.junit.jupiter.api.Test
18+
import org.junit.jupiter.api.TestInstance
19+
import org.springframework.http.MediaType
20+
import org.springframework.test.context.ActiveProfiles
21+
22+
@ActiveProfiles("test")
23+
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
24+
class BlobMetadataFixerITest {
25+
26+
private lateinit var blobServiceClient: BlobServiceClient
27+
private lateinit var fileClient: BlobFileClient
28+
private lateinit var fixer: BlobMetadataFixer
29+
30+
// Use separate test container names to avoid interfering with other tests
31+
private val containers: Containers =
32+
Containers(
33+
decisions = "paatokset-fixer-test",
34+
hakemusAttachments = "hakemusliitteet-fixer-test",
35+
hankeAttachments = "hankeliitteet-fixer-test",
36+
)
37+
38+
@BeforeAll
39+
fun setup() {
40+
val connectionString = AzuriteTestContainer.getConnectionString()
41+
blobServiceClient = AzureContainerServiceClient(connectionString, "").blobServiceClient()
42+
fileClient = BlobFileClient(blobServiceClient, containers)
43+
fixer = BlobMetadataFixer(blobServiceClient, containers)
44+
45+
for (container in containers) {
46+
blobServiceClient.getBlobContainerClient(container).createIfNotExists()
47+
}
48+
}
49+
50+
@BeforeEach
51+
fun cleanupBlobs() {
52+
// Clean up any existing test blobs
53+
for (containerName in containers) {
54+
val containerClient = blobServiceClient.getBlobContainerClient(containerName)
55+
containerClient.listBlobs().forEach { blob ->
56+
if (blob.name.startsWith("test-fixer/")) {
57+
containerClient.getBlobClient(blob.name).delete()
58+
}
59+
}
60+
}
61+
}
62+
63+
@Test
64+
fun `fixAllBlobs scans all containers and returns combined statistics`() {
65+
// Upload test blobs with properly encoded filenames
66+
fileClient.upload(
67+
Container.HANKE_LIITTEET,
68+
"test-fixer/file1.pdf",
69+
"file1.pdf",
70+
MediaType.APPLICATION_PDF,
71+
"content1".toByteArray(),
72+
)
73+
fileClient.upload(
74+
Container.HAKEMUS_LIITTEET,
75+
"test-fixer/file2.pdf",
76+
"file2.pdf",
77+
MediaType.APPLICATION_PDF,
78+
"content2".toByteArray(),
79+
)
80+
81+
val result = fixer.fixAllBlobs()
82+
83+
// Should scan at least the 2 test blobs we uploaded
84+
assertThat(result.scannedCount).isEqualTo(2)
85+
// These are already properly encoded, so no fixes needed
86+
assertThat(result.fixedCount).isEqualTo(0)
87+
assertThat(result.errorCount).isEqualTo(0)
88+
}
89+
90+
@Test
91+
fun `fixBlobsInContainer fixes blobs with unencoded commas`() {
92+
val container = Container.HANKE_LIITTEET
93+
val path = "test-fixer/file,with,commas.pdf"
94+
val filename = "file,with,commas.pdf"
95+
96+
// Upload blob with properly encoded filename
97+
fileClient.upload(
98+
container,
99+
path,
100+
filename,
101+
MediaType.APPLICATION_PDF,
102+
"content".toByteArray(),
103+
)
104+
105+
// Manually corrupt the Content-Disposition header to simulate old format
106+
val containerClient = blobServiceClient.getBlobContainerClient(containers.hankeAttachments)
107+
val blobClient = containerClient.getBlobClient(path)
108+
val corruptedHeaders = BlobHttpHeaders()
109+
corruptedHeaders.setContentType(MediaType.APPLICATION_PDF_VALUE)
110+
// Simulate old format with unencoded comma
111+
corruptedHeaders.setContentDisposition("attachment; filename*=UTF-8''file,with,commas.pdf")
112+
blobClient.setHttpHeaders(corruptedHeaders)
113+
114+
// Verify it needs fixing
115+
val beforeProperties = blobClient.properties
116+
assertThat(BlobMetadataFixer.needsFixing(beforeProperties.contentDisposition)).isTrue()
117+
118+
// Run fixer
119+
val result = fixer.fixBlobsInContainer(container)
120+
121+
// Verify it was fixed
122+
assertThat(result.scannedCount).isEqualTo(1)
123+
assertThat(result.fixedCount).isEqualTo(1)
124+
assertThat(result.errorCount).isEqualTo(0)
125+
126+
// Verify the header is now properly encoded (no unencoded commas)
127+
val afterProperties = blobClient.properties
128+
assertThat(BlobMetadataFixer.needsFixing(afterProperties.contentDisposition)).isFalse()
129+
}
130+
131+
@Test
132+
fun `fixBlobsInContainer fixes blobs with simple filename format`() {
133+
val container = Container.HANKE_LIITTEET
134+
val path = "test-fixer/simple.pdf"
135+
136+
// Upload blob
137+
fileClient.upload(
138+
container,
139+
path,
140+
"simple.pdf",
141+
MediaType.APPLICATION_PDF,
142+
"content".toByteArray(),
143+
)
144+
145+
// Manually set old simple format
146+
val containerClient = blobServiceClient.getBlobContainerClient(containers.hankeAttachments)
147+
val blobClient = containerClient.getBlobClient(path)
148+
val oldHeaders = BlobHttpHeaders()
149+
oldHeaders.setContentType(MediaType.APPLICATION_PDF_VALUE)
150+
oldHeaders.setContentDisposition("attachment; filename=\"simple.pdf\"")
151+
blobClient.setHttpHeaders(oldHeaders)
152+
153+
// Verify it needs fixing
154+
val beforeProperties = blobClient.properties
155+
assertThat(BlobMetadataFixer.needsFixing(beforeProperties.contentDisposition)).isTrue()
156+
157+
// Run fixer
158+
val result = fixer.fixBlobsInContainer(container)
159+
160+
// Verify it was fixed
161+
assertThat(result.scannedCount).isEqualTo(1)
162+
assertThat(result.fixedCount).isEqualTo(1)
163+
assertThat(result.errorCount).isEqualTo(0)
164+
165+
// Verify the header now includes RFC 5987 format
166+
val afterProperties = blobClient.properties
167+
assertThat(BlobMetadataFixer.needsFixing(afterProperties.contentDisposition)).isFalse()
168+
assertThat(afterProperties.contentDisposition).contains("filename*=UTF-8''simple.pdf")
169+
}
170+
171+
@Test
172+
fun `fixBlobsInContainer skips blobs with no Content-Disposition`() {
173+
val container = Container.HANKE_LIITTEET
174+
val path = "test-fixer/no-disposition.pdf"
175+
176+
// Upload blob
177+
fileClient.upload(
178+
container,
179+
path,
180+
"test.pdf",
181+
MediaType.APPLICATION_PDF,
182+
"content".toByteArray(),
183+
)
184+
185+
// Remove Content-Disposition
186+
val containerClient = blobServiceClient.getBlobContainerClient(containers.hankeAttachments)
187+
val blobClient = containerClient.getBlobClient(path)
188+
val headers = BlobHttpHeaders()
189+
headers.setContentType(MediaType.APPLICATION_PDF_VALUE)
190+
headers.setContentDisposition(null)
191+
blobClient.setHttpHeaders(headers)
192+
193+
// Run fixer
194+
val result = fixer.fixBlobsInContainer(container)
195+
196+
// Should scan but not fix (just warn)
197+
assertThat(result.scannedCount).isEqualTo(1)
198+
assertThat(result.fixedCount).isEqualTo(0)
199+
assertThat(result.errorCount).isEqualTo(0)
200+
}
201+
202+
@Test
203+
fun `fixBlobsInContainer counts errors when filename extraction fails`() {
204+
val container = Container.HANKE_LIITTEET
205+
val path = "test-fixer/invalid-disposition.pdf"
206+
207+
// Upload blob
208+
fileClient.upload(
209+
container,
210+
path,
211+
"test.pdf",
212+
MediaType.APPLICATION_PDF,
213+
"content".toByteArray(),
214+
)
215+
216+
// Set invalid Content-Disposition that can't be parsed
217+
val containerClient = blobServiceClient.getBlobContainerClient(containers.hankeAttachments)
218+
val blobClient = containerClient.getBlobClient(path)
219+
val headers = BlobHttpHeaders()
220+
headers.setContentType(MediaType.APPLICATION_PDF_VALUE)
221+
headers.setContentDisposition("invalid-disposition-format")
222+
blobClient.setHttpHeaders(headers)
223+
224+
// Run fixer
225+
val result = fixer.fixBlobsInContainer(container)
226+
227+
// Should scan and encounter error (can't extract filename)
228+
assertThat(result.scannedCount).isEqualTo(1)
229+
assertThat(result.fixedCount).isEqualTo(0)
230+
assertThat(result.errorCount).isEqualTo(1)
231+
}
232+
233+
@Test
234+
fun `fixBlobsInContainer skips properly encoded blobs`() {
235+
val container = Container.HANKE_LIITTEET
236+
val path = "test-fixer/already-encoded.pdf"
237+
238+
// Upload blob with properly encoded filename containing special characters
239+
fileClient.upload(
240+
container,
241+
path,
242+
"file,with,commas.pdf",
243+
MediaType.APPLICATION_PDF,
244+
"content".toByteArray(),
245+
)
246+
247+
// Verify it's properly encoded
248+
val containerClient = blobServiceClient.getBlobContainerClient(containers.hankeAttachments)
249+
val blobClient = containerClient.getBlobClient(path)
250+
val beforeProperties = blobClient.properties
251+
assertThat(BlobMetadataFixer.needsFixing(beforeProperties.contentDisposition)).isFalse()
252+
253+
// Run fixer
254+
val result = fixer.fixBlobsInContainer(container)
255+
256+
// Should scan but not fix
257+
assertThat(result.scannedCount).isEqualTo(1)
258+
assertThat(result.fixedCount).isEqualTo(0)
259+
assertThat(result.errorCount).isEqualTo(0)
260+
261+
// Verify header unchanged
262+
val afterProperties = blobClient.properties
263+
assertThat(afterProperties.contentDisposition)
264+
.isEqualTo(beforeProperties.contentDisposition)
265+
}
266+
}

0 commit comments

Comments
 (0)