@@ -5,13 +5,26 @@ import (
55 "io"
66 "net/http"
77 "net/url"
8+ "regexp"
89 "strings"
10+ "sync"
911 "time"
1012
13+ "github.com/CorentinB/warc"
1114 "github.com/PuerkitoBio/goquery"
1215 "github.com/internetarchive/Zeno/internal/pkg/utils"
1316)
1417
18+ var (
19+ playerVersion string
20+ playerVersionLock sync.Mutex
21+ playerRegex * regexp.Regexp
22+ )
23+
24+ func init () {
25+ playerRegex = regexp .MustCompile (`"//ssl\.p\.jwpcdn\.com[^"]+\.js"` )
26+ }
27+
1528type APIResponse struct {
1629 ID string `json:"id"`
1730 Title string `json:"title"`
@@ -69,7 +82,7 @@ func IsAPIURL(req *http.Request) bool {
6982 return strings .Contains (utils .URLToString (req .URL ), "apipartner.ina.fr" ) && ! strings .Contains (utils .URLToString (req .URL ), "playerConfigurations.json" )
7083}
7184
72- func ExtractPlayerURLs (doc * goquery.Document ) []* url.URL {
85+ func ExtractPlayerURLs (doc * goquery.Document , c * warc. CustomHTTPClient ) []* url.URL {
7386 var assets []string
7487
7588 doc .Find ("div[data-type=player]" ).Each (func (i int , s * goquery.Selection ) {
@@ -86,9 +99,82 @@ func ExtractPlayerURLs(doc *goquery.Document) []*url.URL {
8699 }
87100 })
88101
102+ assets = append (assets , getJWPlayerURLs (c )... )
103+
89104 return utils .StringSliceToURLSlice (assets )
90105}
91106
107+ func getJWPlayerURLs (c * warc.CustomHTTPClient ) (URLs []string ) {
108+ playerVersionLock .Lock ()
109+ defer playerVersionLock .Unlock ()
110+
111+ if playerVersion == "" {
112+ resp , err := c .Get ("https://player-hub.ina.fr/version" )
113+ if err != nil {
114+ return URLs
115+ }
116+ defer resp .Body .Close ()
117+
118+ if resp .StatusCode != http .StatusOK {
119+ return URLs
120+ }
121+
122+ body , err := io .ReadAll (resp .Body )
123+ if err != nil {
124+ return URLs
125+ }
126+
127+ playerVersion = string (body )
128+
129+ URLs = append (URLs ,
130+ "https://player-hub.ina.fr/dist/ina-player.min.js?version=" + playerVersion ,
131+ "https://player-hub.ina.fr/dist/player-default-skin.min.css?version=" + playerVersion ,
132+ "https://player-hub.ina.fr/assets/player/svg/pause.svg" ,
133+ "https://player-hub.ina.fr/assets/player/svg/play.svg" ,
134+ "https://player-hub.ina.fr/assets/player/svg/backward.svg" ,
135+ "https://player-hub.ina.fr/assets/player/svg/forward.svg" ,
136+ )
137+
138+ // Get the JWPlayer JS code
139+ playerResp , err := c .Get ("https://player-hub.ina.fr/js/jwplayer/jwplayer.js?version=" + playerVersion )
140+ if err != nil {
141+ return URLs
142+ }
143+ defer playerResp .Body .Close ()
144+
145+ if playerResp .StatusCode != http .StatusOK {
146+ return URLs
147+ }
148+
149+ // Find the JWPlayer assets in the JS file
150+ body , err = io .ReadAll (playerResp .Body )
151+ if err != nil {
152+ return URLs
153+ }
154+
155+ matches := playerRegex .FindAllString (string (body ), - 1 )
156+
157+ // Clean up the matches (remove quotes)
158+ for _ , match := range matches {
159+ URLs = append (URLs , "https:" + match [1 :len (match )- 1 ])
160+ }
161+
162+ URLs = append (URLs , "https://ssl.p.jwpcdn.com/player/v/" + extractJWPlayerVersion (string (body ))+ "/jwplayer.core.controls.html5.js" )
163+ }
164+
165+ return URLs
166+ }
167+
168+ func extractJWPlayerVersion (body string ) string {
169+ lines := strings .Split (body , "\n " )
170+ for _ , line := range lines {
171+ if strings .Contains (line , "JW Player version" ) {
172+ return strings .Split (line , "JW Player version " )[1 ]
173+ }
174+ }
175+ return ""
176+ }
177+
92178func ExtractMedias (resp * http.Response ) ([]* url.URL , error ) {
93179 var assets []string
94180
0 commit comments