Skip to content

Commit bdcd59f

Browse files
authored
Merge pull request #121 from internetarchive/issue-51
Adding Spectrogram and Waveform
2 parents 489ef9b + 10d0cf3 commit bdcd59f

File tree

3 files changed

+137
-2
lines changed

3 files changed

+137
-2
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import requests
2+
from iiify import resolver
3+
4+
def getAudioItems():
5+
"""
6+
Used to find audio items that have a waveform to check their size.
7+
It uses the advanced search to find items and then uses the image api
8+
to find the width and height and check it against the expected size of 800,200
9+
"""
10+
item_count = 0
11+
waveform_item_count = 0
12+
waveform_file_count = 0
13+
for page in range(5):
14+
audio = resolver.search("-collection:(stream_only) AND mediatype:(audio)".replace(":", "%3A"), page=page)
15+
for item in audio["response"]["docs"]:
16+
item_count += 1
17+
identifier = item["identifier"]
18+
metadata = requests.get(f"https://archive.org/metadata/{item["identifier"]}").json()
19+
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
20+
(originals, derivatives) = resolver.sortDerivatives(metadata)
21+
hasWaveform=False
22+
for file in [f for f in originals if f['format'] in resolver.AUDIO_FORMATS]:
23+
if file['name'] in derivatives and "PNG" in derivatives[file['name']]:
24+
waveform_file_count += 1
25+
filename = derivatives[file['name']]["PNG"]["name"]
26+
imgId = f"{identifier}/{filename}".replace("/", "%2f")
27+
imgUrl = f"{resolver.IMG_SRV}/3/{imgId}/info.json"
28+
try:
29+
infojson = requests.get(imgUrl).json()
30+
if infojson['width'] == 800 and infojson['height'] == 200:
31+
print (f"EXPECTED {identifier} waveform: {filename}")
32+
else:
33+
print (f"DIFFERENT {identifier} waveform: {filename} different size: {infojson['width']}, {infojson['height']} from {imgUrl}")
34+
except Exception as error:
35+
print (f"Failed to get {imgUrl}")
36+
print (error)
37+
38+
hasWaveform=True
39+
if hasWaveform:
40+
waveform_item_count += 1
41+
42+
print (f"Number of items checked: {item_count}")
43+
print (f"Number of waveforms {waveform_file_count} from {waveform_item_count} items")
44+
45+
if __name__ == "__main__":
46+
"""
47+
Run this as:
48+
python -m iiify.loadtest.checkWaveformSizes
49+
"""
50+
getAudioItems()

iiify/resolver.py

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import requests
55
from .configs import options, cors, approot, cache_root, media_root, apiurl, LINKS
6-
from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage, AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef, ResourceItem1, CanvasRef
6+
from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage, AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef, ResourceItem1, AccompanyingCanvas, CanvasRef
77
from urllib.parse import urlparse, parse_qs, quote
88
import json
99
import math
@@ -477,7 +477,7 @@ def addSeeAlso(manifest, identifier, files):
477477
"Djvu XML": "OCR Data",
478478
"Scandata": "OCR Data",
479479
"Archive BitTorrent": "Torrent",
480-
"Metadata": "Metadata",
480+
"Metadata": "Metadata"
481481
}
482482

483483
for file in files:
@@ -490,6 +490,60 @@ def addSeeAlso(manifest, identifier, files):
490490
"format": seeAlso['format']
491491
})
492492

493+
def addWaveform(identifier, slugged_id, filename, hard_code_size=True):
494+
"""
495+
Create an IIIF AccompanyingCanvas representing a waveform image.
496+
497+
This function generates an IIIF AccompanyingCanvas containing a waveform image,
498+
associated with an audio file. By default, the image dimensions are hardcoded,
499+
but if `hard_code_size` is False, the image's width and height will be retrieved
500+
dynamically from a IIIF image server.
501+
502+
Parameters:
503+
identifier (str): The archive.org identifier for the resource (e.g. item ID).
504+
slugged_id (str): A slugified version of the identifier used in the canvas ID.
505+
filename (str): The filename of the waveform image (PNG).
506+
hard_code_size (bool): If True, sets the image size to 800x200; if False, fetches dimensions from IIIF image server.
507+
508+
Returns:
509+
AccompanyingCanvas: An IIIF-compliant AccompanyingCanvas object representing the waveform image.
510+
"""
511+
512+
# This should be the Wave form
513+
accompanying_canvas = AccompanyingCanvas(
514+
id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas/accompanying",
515+
label={ "en": ["Waveform"]}
516+
)
517+
if hard_code_size:
518+
width = 800
519+
height = 200
520+
body = ResourceItem(id=f"https://archive.org/download/{identifier}/{filename.replace(' ', '%20')}", type="Image", width=width, height=height)
521+
body.format = "image/jpeg"
522+
else:
523+
imgId = f"{identifier}/{filename}".replace('/','%2f')
524+
imgURL = f"{IMG_SRV}/3/{imgId}".replace(' ', '%20')
525+
# Find the width and height from the image server
526+
body = ResourceItem(id="http://example.com", type="Image")
527+
infoJson = body.set_hwd_from_iiif(imgURL)
528+
529+
service = ServiceItem(id=infoJson['id'], profile=infoJson['profile'], type=infoJson['type'])
530+
body.service = [service]
531+
body.id = f'{infoJson["id"]}/full/max/0/default.jpg'
532+
body.format = "image/jpeg"
533+
534+
width = infoJson['width']
535+
height = infoJson['height']
536+
537+
annotation = Annotation(id=f"{accompanying_canvas.id}/anno", motivation='painting', body=body, target=accompanying_canvas.id)
538+
539+
annotationPage = AnnotationPage(id=f"{accompanying_canvas.id}/annoPage")
540+
annotationPage.add_item(annotation)
541+
542+
accompanying_canvas.add_item(annotationPage)
543+
accompanying_canvas.height = height
544+
accompanying_canvas.width = width
545+
546+
return accompanying_canvas
493547

494548
def addRendering(manifest, identifier, files):
495549
manifest.rendering = []
@@ -784,6 +838,18 @@ def create_manifest3(identifier, domain=None, page=None):
784838
label={"none": [format]},
785839
duration=float(file['length']))
786840
body.items.append(r)
841+
842+
if "Spectrogram" in derivatives[file['name']]:
843+
c.seeAlso = [{
844+
"id": f"https://archive.org/download/{identifier}/{normalised_id.replace(' ', '%20')}_spectrogram.png",
845+
"type": "Image",
846+
"label": {"en": ["Spectrogram"]},
847+
"format": "image/png"
848+
}]
849+
850+
if "PNG" in derivatives[file['name']]:
851+
# This should be the Wave form
852+
c.accompanyingCanvas = addWaveform(identifier, slugged_id, derivatives[file['name']]["PNG"]["name"])
787853
else:
788854
# todo: deal with instances where there are no derivatives for whatever reason
789855
body = ResourceItem(

tests/test_audio.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,24 @@ def test_audio_no_derivatives(self):
1818

1919
self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}")
2020

21+
def test_spectrogram_waveforms(self):
22+
resp = self.test_app.get("/iiif/3/hhfbc-cyl26/manifest.json?recache=True")
23+
self.assertEqual(resp.status_code, 200)
24+
manifest = resp.json
25+
26+
for canvas in manifest['items']:
27+
self.assertTrue('seeAlso' in canvas)
28+
spectrogram = canvas['seeAlso'][0]
29+
self.assertEqual(spectrogram["format"], "image/png")
30+
self.assertEqual(spectrogram["label"]["en"][0], "Spectrogram")
31+
32+
self.assertTrue('accompanyingCanvas' in canvas)
33+
accCanvas = canvas['accompanyingCanvas']
34+
self.assertEqual(accCanvas["type"], "Canvas")
35+
self.assertEqual(accCanvas["label"]["en"][0], "Waveform")
36+
self.assertTrue("height" in accCanvas and accCanvas["height"] == 200)
37+
self.assertTrue("width" in accCanvas and accCanvas["width"] == 800)
38+
2139
def test_multi_track_audio_gets_ranges(self):
2240
resp = self.test_app.get("/iiif/Weirdos_demo-1978/manifest.json")
2341
self.assertEqual(resp.status_code, 200)
@@ -33,3 +51,4 @@ def test_single_track_audio_gets_no_ranges(self):
3351
manifest = resp.json
3452

3553
self.assertNotIn("structures", manifest, "Expected single file audio to have no structures or ranges.")
54+

0 commit comments

Comments
 (0)