diff --git a/build.gradle b/build.gradle index 94f260d2..c4027aee 100644 --- a/build.gradle +++ b/build.gradle @@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17 targetCompatibility = JavaVersion.VERSION_17 group = 'de.mediathekview' archivesBaseName = "MServer" -version = '3.1.232' +version = '3.1.233' def jarName = 'MServer.jar' def mainClass = 'mServer.Main' diff --git a/src/main/java/mServer/crawler/FilmeSuchen.java b/src/main/java/mServer/crawler/FilmeSuchen.java index f4f97bdd..f0be06b5 100644 --- a/src/main/java/mServer/crawler/FilmeSuchen.java +++ b/src/main/java/mServer/crawler/FilmeSuchen.java @@ -139,11 +139,15 @@ public synchronized void filmeBeimSenderLaden(ListeFilme listeFilme) { initStart(listeFilme); // die mReader nach Prio starten mrStarten(0); - if (!Config.getStop()) { - // waren und wenn Suchlauf noch nicht abgebrochen weiter mit dem Rest - mrWarten(4*60);//4*60); - mrStarten(1); + if (mediathekListe.stream().filter(mr -> mr.getStartPrio() == 1).count() == 0) { allStarted = true; + } else { + if (!Config.getStop()) { + // waren und wenn Suchlauf noch nicht abgebrochen weiter mit dem Rest + mrWarten(4*60);//4*60); + mrStarten(1); + allStarted = true; + } } } diff --git a/src/main/java/mServer/crawler/sender/ard/ArdConstants.java b/src/main/java/mServer/crawler/sender/ard/ArdConstants.java index 3a149181..b7691f2f 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdConstants.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdConstants.java @@ -5,7 +5,7 @@ public class ArdConstants { public static final String API_URL = "https://api.ardmediathek.de"; public static final String BASE_URL = "https://api.ardmediathek.de/public-gateway"; - public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/"; + public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/%s?embedded=true&mcV6=true"; public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false"; public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s"; @@ -37,6 +37,8 @@ public class ArdConstants { "phoenix" }; + public static final String[] IGNORED_SENDER = new String[] {"zdf", "kika", "3sat", "arte"}; + public static final String WEBSITE_URL = "https://www.ardmediathek.de/video/%s"; public static final String BASE_URL_SUBTITLES = "https://classic.ardmediathek.de"; diff --git a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java index 812419d5..dc45ca01 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java @@ -42,6 +42,11 @@ protected synchronized void meldungThreadUndFertig() { mlibFilmeSuchen.meldenFertig(Const.HR); mlibFilmeSuchen.meldenFertig(Const.BR); mlibFilmeSuchen.meldenFertig("rbtv"); + mlibFilmeSuchen.meldenFertig("ONE"); + mlibFilmeSuchen.meldenFertig("ARD-alpha"); + mlibFilmeSuchen.meldenFertig("Funk.net"); + mlibFilmeSuchen.meldenFertig(Const.SR); + mlibFilmeSuchen.meldenFertig(Const.PHOENIX); } super.meldungThreadUndFertig(); diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java index 8b6eb3ec..5ef87560 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java @@ -1,35 +1,34 @@ package mServer.crawler.sender.ard.json; -import com.google.gson.JsonArray; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; +import com.google.gson.*; + +import mServer.crawler.sender.ard.ArdFilmInfoDto; + import java.lang.reflect.Type; import java.util.HashSet; import java.util.Set; -import mServer.crawler.sender.ard.ArdFilmInfoDto; -public class ArdDayPageDeserializer extends ArdTeasersDeserializer implements JsonDeserializer> { +public class ArdDayPageDeserializer extends ArdTeasersDeserializer + implements JsonDeserializer> { private static final String ELEMENT_TEASERS = "teasers"; @Override - public Set deserialize(JsonElement jsonElement, Type type, JsonDeserializationContext context) { - Set results = new HashSet<>(); + public Set deserialize( + final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { + final Set results = new HashSet<>(); if (!jsonElement.isJsonArray()) { return results; } - JsonObject element0 = jsonElement.getAsJsonArray().get(0).getAsJsonObject(); + final JsonObject firstElement = jsonElement.getAsJsonArray().get(0).getAsJsonObject(); - if (element0.has(ELEMENT_TEASERS)) { - JsonArray teasers = element0.get(ELEMENT_TEASERS).getAsJsonArray(); + if (firstElement.has(ELEMENT_TEASERS)) { + final JsonArray teasers = firstElement.get(ELEMENT_TEASERS).getAsJsonArray(); results.addAll(parseTeasers(teasers)); } return results; } - } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdErrorDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdErrorDeserializer.java index 3c06fa84..d451dde4 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdErrorDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdErrorDeserializer.java @@ -1,13 +1,11 @@ package mServer.crawler.sender.ard.json; -import com.google.gson.JsonArray; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; +import com.google.gson.*; + +import mServer.crawler.sender.base.JsonUtils; + import java.lang.reflect.Type; import java.util.Optional; -import mServer.crawler.sender.base.JsonUtils; public class ArdErrorDeserializer implements JsonDeserializer> { @@ -17,13 +15,14 @@ public class ArdErrorDeserializer implements JsonDeserializer deserialize(JsonElement jsonElement, Type type, JsonDeserializationContext context) { + public Optional deserialize( + final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { if (!JsonUtils.hasElements(jsonElement, ELEMENT_ERRORS)) { return Optional.empty(); } - JsonArray errors = jsonElement.getAsJsonObject().get(ELEMENT_ERRORS).getAsJsonArray(); + final JsonArray errors = jsonElement.getAsJsonObject().get(ELEMENT_ERRORS).getAsJsonArray(); if (errors.size() > 0) { return parseError(errors.get(0).getAsJsonObject()); } @@ -31,15 +30,17 @@ public Optional deserialize(JsonElement jsonElement, Type type, return Optional.empty(); } - private Optional parseError(JsonObject error) { - Optional message = JsonUtils.getAttributeAsString(error, ATTRIBUTE_MESSAGE); + private Optional parseError(final JsonObject error) { + final Optional message = JsonUtils.getAttributeAsString(error, ATTRIBUTE_MESSAGE); Optional code = Optional.empty(); if (JsonUtils.hasElements(error, ELEMENT_EXTENSIONS)) { - code = JsonUtils.getAttributeAsString(error.get(ELEMENT_EXTENSIONS).getAsJsonObject(), ATTRIBUTE_CODE); + code = + JsonUtils.getAttributeAsString( + error.get(ELEMENT_EXTENSIONS).getAsJsonObject(), ATTRIBUTE_CODE); } - ArdErrorInfoDto result = new ArdErrorInfoDto(code.orElse(""), message.orElse("")); + final ArdErrorInfoDto result = new ArdErrorInfoDto(code.orElse(""), message.orElse("")); return Optional.of(result); } } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdErrorInfoDto.java b/src/main/java/mServer/crawler/sender/ard/json/ArdErrorInfoDto.java index 54b9dc6b..725aa0ed 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdErrorInfoDto.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdErrorInfoDto.java @@ -4,10 +4,10 @@ public class ArdErrorInfoDto { - private String code; - private String message; + private final String code; + private final String message; - public ArdErrorInfoDto(String code, String message) { + public ArdErrorInfoDto(final String code, final String message) { this.code = code; this.message = message; } @@ -21,14 +21,13 @@ public String getCode() { } @Override - public boolean equals(Object o) { + public boolean equals(final Object o) { if (this == o) { return true; } - if (!(o instanceof ArdErrorInfoDto)) { + if (!(o instanceof ArdErrorInfoDto that)) { return false; } - ArdErrorInfoDto that = (ArdErrorInfoDto) o; return Objects.equals(code, that.code) && Objects.equals(message, that.message); } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdFilmDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdFilmDeserializer.java index 93e74e1b..3e565d8e 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdFilmDeserializer.java @@ -7,8 +7,9 @@ import com.google.gson.JsonObject; import de.mediathekview.mlib.Const; import de.mediathekview.mlib.daten.DatenFilm; -import de.mediathekview.mlib.tool.Log; import java.lang.reflect.Type; +import java.net.MalformedURLException; +import java.net.URL; import java.time.Duration; import java.time.LocalDateTime; import java.time.ZoneId; @@ -16,17 +17,21 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.ArrayList; +import java.util.EnumMap; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + import mServer.crawler.CrawlerTool; import mServer.crawler.sender.ard.ArdConstants; import mServer.crawler.sender.ard.ArdFilmDto; import mServer.crawler.sender.ard.ArdFilmInfoDto; import mServer.crawler.sender.base.JsonUtils; import mServer.crawler.sender.base.Qualities; -import mServer.crawler.sender.base.UrlUtils; import org.apache.logging.log4j.LogManager; public class ArdFilmDeserializer implements JsonDeserializer> { @@ -42,15 +47,33 @@ public class ArdFilmDeserializer implements JsonDeserializer> { private static final String ELEMENT_SHOW = "show"; private static final String ELEMENT_TEASERS = "teasers"; private static final String ELEMENT_WIDGETS = "widgets"; + private static final String[] ELEMENT_SUBTITLES = {"mediaCollection","embedded","subtitles"}; + private static final String ELEMENT_SOURCES = "sources"; + private static final String ELEMENT_STREAMS = "streams"; + private static final String ELEMENT_MEDIA = "media"; + private static final String ELEMENT_AUDIO = "audios"; + private static final String ATTRIBUTE_BROADCAST = "broadcastedOn"; - private static final String ATTRIBUTE_DURATION = "_duration"; + private static final String[] ATTRIBUTE_DURATION = {"meta","duration"}; + private static final String[] ATTRIBUTE_DURATION_SEC = {"meta","durationSeconds"}; private static final String ATTRIBUTE_ID = "id"; private static final String ATTRIBUTE_NAME = "name"; private static final String ATTRIBUTE_PARTNER = "partner"; private static final String ATTRIBUTE_SYNOPSIS = "synopsis"; private static final String ATTRIBUTE_TITLE = "title"; - + private static final String ATTRIBUTE_URL = "url"; + private static final String ATTRIBUTE_RESOLUTION_H = "maxHResolutionPx"; + private static final String ATTRIBUTE_MIME = "mimeType"; + private static final String ATTRIBUTE_KIND = "kind"; + + private static final String MARKER_VIDEO_MP4 = "video/mp4"; + private static final String MARKER_VIDEO_STANDARD = "standard"; + private static final String MARKER_VIDEO_CATEGORY_MAIN = "main"; + private static final String MARKER_VIDEO_CATEGORY_MPEG = "application/vnd.apple.mpegurl"; + private static final String MARKER_VIDEO_AD = "audio-description"; + private static final String MARKER_VIDEO_DGS = "sign-language"; + private static final DateTimeFormatter DATE_FORMAT = DateTimeFormatter.ofPattern("dd.MM.yyyy"); private static final DateTimeFormatter TIME_FORMAT @@ -68,12 +91,15 @@ public class ArdFilmDeserializer implements JsonDeserializer> { ADDITIONAL_SENDER.put("hr", Const.HR); ADDITIONAL_SENDER.put("br", Const.BR); ADDITIONAL_SENDER.put("radio_bremen", "rbtv"); - } - - private final ArdVideoInfoJsonDeserializer videoDeserializer; - - public ArdFilmDeserializer() { - videoDeserializer = new ArdVideoInfoJsonDeserializer(); + ADDITIONAL_SENDER.put("tagesschau24", Const.ARD); + ADDITIONAL_SENDER.put("das_erste", Const.ARD); + ADDITIONAL_SENDER.put("one", "ONE"); // ONE + ADDITIONAL_SENDER.put("ard-alpha", "ARD-alpha"); // ARD-alpha + ADDITIONAL_SENDER.put("funk", "Funk.net"); // Funk.net + ADDITIONAL_SENDER.put("sr", Const.SR); + ADDITIONAL_SENDER.put("phoenix", Const.PHOENIX); + ADDITIONAL_SENDER.put("ard", Const.ARD); + //IGNORED_SENDER "zdf", "kika", "3sat", "arte" } private static Optional getMediaCollectionObject(final JsonObject itemObject) { @@ -113,9 +139,14 @@ private static Optional parseTopic(final JsonObject playerPageObject) { private Optional parseTitle(final JsonObject playerPageObject) { Optional title = JsonUtils.getAttributeAsString(playerPageObject, ATTRIBUTE_TITLE); if (title.isPresent()) { - return Optional.of(title.get().replace("Hörfassung", "Audiodeskription")); + String[] replaceWords = {" - Hörfassung", " (mit Gebärdensprache)", " mit Gebärdensprache"," (mit Audiodeskription)", "Audiodeskription"}; + String cleanTitle = title.get().trim(); + for (String replaceWord : replaceWords) { + cleanTitle = cleanTitle.replace(replaceWord, ""); + } + cleanTitle = cleanTitle.trim(); + return Optional.of(cleanTitle); } - return title; } @@ -138,14 +169,34 @@ private static Optional parseDate(final JsonObject playerPageObje private static Optional parseDuration(final JsonObject playerPageObject) { final Optional mediaCollectionObject = getMediaCollectionObject(playerPageObject); - if (mediaCollectionObject.isPresent() && mediaCollectionObject.get().has(ATTRIBUTE_DURATION)) { - final long durationValue = mediaCollectionObject.get().get(ATTRIBUTE_DURATION).getAsLong(); - return Optional.of(Duration.ofSeconds(durationValue)); + if (mediaCollectionObject.isPresent()) { + final Optional durationElement = JsonUtils.getElement(mediaCollectionObject.get(), ATTRIBUTE_DURATION); + final Optional durationElementSec = JsonUtils.getElement(mediaCollectionObject.get(), ATTRIBUTE_DURATION_SEC); + if (durationElement.isPresent()) { + return Optional.of(Duration.ofSeconds(durationElement.get().getAsLong())); + } else if (durationElementSec.isPresent()) { + return Optional.of(Duration.ofSeconds(durationElementSec.get().getAsLong())); + } } - return Optional.empty(); } + private Optional prepareSubtitleUrl(final JsonElement embeddedElement) { + Optional subtitle = JsonUtils.getElement(embeddedElement, ELEMENT_SUBTITLES); + if (subtitle.isEmpty() || !subtitle.get().isJsonArray() || (subtitle.get().getAsJsonArray().size() == 0)) + return Optional.empty(); + Optional sources = JsonUtils.getElement(subtitle.get().getAsJsonArray().get(0), ELEMENT_SOURCES); + if (sources.isEmpty() || !sources.get().isJsonArray()) + return Optional.empty(); + Set urls = new HashSet<>(); + for (JsonElement url : sources.get().getAsJsonArray()) { + JsonUtils.getElementValueAsString(url, ATTRIBUTE_URL).ifPresent(urls::add); + } + return urls.stream() + .filter(s -> !s.endsWith(".vtt")) + .findFirst(); + } + @Override public List deserialize( final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { @@ -170,42 +221,111 @@ public List deserialize( = JsonUtils.getAttributeAsString(itemObject, ATTRIBUTE_SYNOPSIS); final Optional date = parseDate(itemObject); final Optional duration = parseDuration(itemObject); - final Optional videoInfo = parseVideoUrls(itemObject); final Optional partner = parsePartner(itemObject); + final Optional> videoInfoStandard = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4); + final Optional> videoInfoAdaptive = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_CATEGORY_MPEG); + final Optional> videoInfoAD = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_AD, MARKER_VIDEO_MP4); + final Optional> videoInfoDGS = parseVideoUrls(itemObject, MARKER_VIDEO_DGS, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4); + final Optional subtitles = prepareSubtitleUrl(itemObject); + + if (topic.isEmpty() || title.isEmpty() || partner.isEmpty() || ADDITIONAL_SENDER.get(partner.get()) == null) { + if (partner.isPresent() && ADDITIONAL_SENDER.get(partner.get()) == null) { + LOG.warn("Missing Partner " + partner.get()); + } + return films; + } + + if(videoInfoStandard.isEmpty() && videoInfoAD.isEmpty() && videoInfoDGS.isEmpty() && videoInfoAdaptive.isPresent()) { + // UUAAAARRGGGG - SAD + Map qualitiesUrls = videoInfoAdaptive.get().entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> { + try { + return new URL(entry.getValue()); + } catch (MalformedURLException e) { + LOG.error("failed converting string {} to url", entry.getValue(), e); + return null; + } + })); + if (!qualitiesUrls.containsKey(Qualities.NORMAL)) { + qualitiesUrls.put(Qualities.NORMAL, qualitiesUrls.entrySet().stream().findFirst().get().getValue()); + } + // + ArdVideoInfoJsonDeserializer.loadM3U8(qualitiesUrls); + // + Map fallback = qualitiesUrls.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString())); + + if (fallback.size() > 0) { + final ArdFilmDto filmDto + = new ArdFilmDto( + createFilm( + ADDITIONAL_SENDER.get(partner.get()), + topic.get(), + title.get(), + description, + date, + duration, + fallback, + subtitles)); + films.add(filmDto); + } + } - if (topic.isPresent() - && title.isPresent() - && videoInfo.isPresent() - && videoInfo.get().getVideoUrls().size() > 0) { - // add film to ARD + if (videoInfoStandard.isPresent() && videoInfoStandard.get().size() > 0) { + // add film standard final ArdFilmDto filmDto = new ArdFilmDto( createFilm( - Const.ARD, + ADDITIONAL_SENDER.get(partner.get()), topic.get(), title.get(), description, date, duration, - videoInfo.get())); + videoInfoStandard.get(), + subtitles)); if (widgets.size() > 1) { parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject()); } films.add(filmDto); - - if (partner.isPresent() && ADDITIONAL_SENDER.containsKey(partner.get())) { - // add film to other sender (like RBB) - DatenFilm additionalFilm - = createFilm( - ADDITIONAL_SENDER.get(partner.get()), - topic.get(), - title.get(), - description, - date, - duration, - videoInfo.get()); - films.add(new ArdFilmDto(additionalFilm)); + } + // + if (videoInfoAD.isPresent() && videoInfoAD.get().size() > 0) { + // add film ad + final ArdFilmDto filmDto + = new ArdFilmDto( + createFilm( + ADDITIONAL_SENDER.get(partner.get()), + topic.get(), + title.get() + " (Audiodeskription)", + description, + date, + duration, + videoInfoAD.get(), + subtitles)); + if (widgets.size() > 1) { + parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject()); } + films.add(filmDto); + } + // + if (videoInfoDGS.isPresent() && videoInfoDGS.get().size() > 0) { + // add film standard + final ArdFilmDto filmDto + = new ArdFilmDto( + createFilm( + ADDITIONAL_SENDER.get(partner.get()), + topic.get(), + title.get() + " (Gebärdensprache)", + description, + date, + duration, + videoInfoDGS.get(), + subtitles)); + if (widgets.size() > 1) { + parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject()); + } + films.add(filmDto); } return films; @@ -231,10 +351,6 @@ private Optional parsePartner(JsonObject playerPageObject) { return Optional.empty(); } - private static String prepareSubtitleUrl(final String url) { - return UrlUtils.addDomainIfMissing(url, ArdConstants.BASE_URL_SUBTITLES); - } - private void parseRelatedFilms(final ArdFilmDto filmDto, final JsonObject playerPageObject) { if (playerPageObject.has(ELEMENT_TEASERS)) { final JsonElement teasersElement = playerPageObject.get(ELEMENT_TEASERS); @@ -244,8 +360,7 @@ private void parseRelatedFilms(final ArdFilmDto filmDto, final JsonObject player final Optional id = JsonUtils.getAttributeAsString(teasersItemObject, ATTRIBUTE_ID); if (id.isPresent()) { - final String url = ArdConstants.ITEM_URL + id.get(); - + final String url = String.format(ArdConstants.ITEM_URL, id.get()); filmDto.addRelatedFilm(new ArdFilmInfoDto(id.get(), url, 0)); } } @@ -260,16 +375,19 @@ private DatenFilm createFilm( final Optional description, final Optional date, final Optional duration, - final ArdVideoInfoDto videoInfo) { + final Map videoUrls, + final Optional sub) { LocalDateTime time = date.orElse(LocalDateTime.now()); String dateValue = time.format(DATE_FORMAT); String timeValue = time.format(TIME_FORMAT); + + String baseUrl = videoUrls.get(Qualities.NORMAL); + baseUrl = baseUrl != null ? baseUrl : videoUrls.get(Qualities.SMALL); + baseUrl = baseUrl != null ? baseUrl : videoUrls.get(Qualities.HD); - Map videoUrls = videoInfo.getVideoUrls(); - - DatenFilm film = new DatenFilm(sender, topic, "", title, videoInfo.getDefaultVideoUrl(), "", + DatenFilm film = new DatenFilm(sender, topic, "", title, baseUrl, "", dateValue, timeValue, duration.orElse(Duration.ZERO).getSeconds(), description.orElse("")); if (videoUrls.containsKey(Qualities.SMALL)) { CrawlerTool.addUrlKlein(film, videoUrls.get(Qualities.SMALL)); @@ -277,21 +395,51 @@ private DatenFilm createFilm( if (videoUrls.containsKey(Qualities.HD)) { CrawlerTool.addUrlHd(film, videoUrls.get(Qualities.HD)); } - if (videoInfo.getSubtitleUrlOptional().isPresent()) { - CrawlerTool.addUrlSubtitle(film, videoInfo.getSubtitleUrl()); + if (sub.isPresent()) { + CrawlerTool.addUrlSubtitle(film, sub.get()); } return film; } - private Optional parseVideoUrls(final JsonObject playerPageObject) { + + private Optional> parseVideoUrls(final JsonObject playerPageObject, String streamType, String aduioType, String mimeType) { final Optional mediaCollectionObject = getMediaCollectionObject(playerPageObject); - if (mediaCollectionObject.isPresent()) { - final ArdVideoInfoDto videoDto - = videoDeserializer.deserialize(mediaCollectionObject.get(), null, null); - return Optional.of(videoDto); + if (mediaCollectionObject.isEmpty()) + return Optional.empty(); + final Optional streams = JsonUtils.getElement(mediaCollectionObject.get(), ELEMENT_STREAMS); + if (streams.isEmpty() || !streams.get().isJsonArray() || (streams.get().getAsJsonArray().size() == 0)) + return Optional.empty(); + // + Map videoInfo = new EnumMap<>(Qualities.class); + for (JsonElement streamsCategory : streams.get().getAsJsonArray()) { + final Optional streamKind = JsonUtils.getElementValueAsString(streamsCategory, ATTRIBUTE_KIND); + final Optional media = JsonUtils.getElement(streamsCategory, ELEMENT_MEDIA); + if (media.isEmpty() || !media.get().isJsonArray() || (media.get().getAsJsonArray().size() == 0)) + return Optional.empty(); + if (streamKind.orElse("").equalsIgnoreCase(streamType)) { + for (JsonElement video : media.get().getAsJsonArray()) { + Optional mime = JsonUtils.getElementValueAsString(video, ATTRIBUTE_MIME); + if (mime.isPresent() && mime.get().equalsIgnoreCase(mimeType)) { + Optional audios = JsonUtils.getElement(video, ELEMENT_AUDIO); + if (audios.isPresent() && audios.get().isJsonArray() && audios.get().getAsJsonArray().size() > 0) { + Optional kind = JsonUtils.getElementValueAsString(audios.get().getAsJsonArray().get(0), ATTRIBUTE_KIND); + Optional resh = JsonUtils.getElementValueAsString(video, ATTRIBUTE_RESOLUTION_H); + Optional url = JsonUtils.getElementValueAsString(video, ATTRIBUTE_URL); + if (url.isPresent() && resh.isPresent() && kind.isPresent() && kind.get().equalsIgnoreCase(aduioType)) { + Qualities resolution = Qualities.getResolutionFromWidth(Integer.parseInt(resh.get())); + if(!videoInfo.containsKey(resolution)) { // do not overwrite 1920 with 1280 res + videoInfo.put(resolution, url.get()); + } + } + } + } + } + } } - - return Optional.empty(); + if (videoInfo.size() == 0) { + return Optional.empty(); + } + return Optional.of(videoInfo); } } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTeasersDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTeasersDeserializer.java index 71c834e0..7fb83d56 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdTeasersDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTeasersDeserializer.java @@ -3,13 +3,16 @@ import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; -import java.util.HashSet; -import java.util.Optional; -import java.util.Set; import mServer.crawler.sender.ard.ArdConstants; import mServer.crawler.sender.ard.ArdFilmInfoDto; import mServer.crawler.sender.base.JsonUtils; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + abstract class ArdTeasersDeserializer { private static final String ELEMENT_LINKS = "links"; @@ -18,37 +21,38 @@ abstract class ArdTeasersDeserializer { private static final String ATTRIBUTE_ID = "id"; private static final String ATTRIBUTE_NUMBER_OF_CLIPS = "numberOfClips"; - Set parseTeasers(JsonArray teasers) { - Set results = new HashSet<>(); - for (JsonElement teaserElement : teasers) { - JsonObject teaserObject = teaserElement.getAsJsonObject(); - Optional id; - int numberOfClips = 0; - - if (JsonUtils - .checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { - JsonObject targetObject = teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get( - ELEMENT_TARGET).getAsJsonObject(); - id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID); - } else { - id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); - } - - if (teaserObject.has(ATTRIBUTE_NUMBER_OF_CLIPS)) { - numberOfClips = teaserObject.get(ATTRIBUTE_NUMBER_OF_CLIPS).getAsInt(); - } - - if (id.isPresent()) { - results.add(createFilmInfo(id.get(), numberOfClips)); - } - } + Set parseTeasers(final JsonArray teasers) { + return StreamSupport.stream(teasers.spliterator(), true) + .map(JsonElement::getAsJsonObject) + .map(this::toFilmInfo) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + } + + private ArdFilmInfoDto toFilmInfo(final JsonObject teaserObject) { + return toId(teaserObject) + .map(id -> createFilmInfo(id, getNumberOfClips(teaserObject))) + .orElse(null); + } - return results; + private int getNumberOfClips(final JsonObject teaserObject) { + if (teaserObject.has(ATTRIBUTE_NUMBER_OF_CLIPS)) { + return teaserObject.get(ATTRIBUTE_NUMBER_OF_CLIPS).getAsInt(); + } + return 0; } - private ArdFilmInfoDto createFilmInfo(String id, int numberOfClips) { - final String url = ArdConstants.ITEM_URL + id; + private Optional toId(final JsonObject teaserObject) { + if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { + final JsonObject targetObject = + teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); + return JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID); + } + return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); + } + private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) { + final String url = String.format(ArdConstants.ITEM_URL, id); return new ArdFilmInfoDto(id, url, numberOfClips); } } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicPageDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicPageDeserializer.java index b2183a51..bf14da3b 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicPageDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicPageDeserializer.java @@ -46,12 +46,12 @@ public ArdTopicInfoDto deserialize( } private int getChildElementAsIntOrNullIfNotExist( - final JsonElement parentElement, final String childElementName) { + final JsonElement parentElement, final String childElementName) { if (parentElement == null || parentElement.isJsonNull()) { return 0; } return getJsonElementAsIntOrNullIfNotExist( - parentElement.getAsJsonObject().get(childElementName)); + parentElement.getAsJsonObject().get(childElementName)); } private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) { diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java index 73fa2dbc..18a61a67 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java @@ -20,7 +20,6 @@ public class ArdTopicsDeserializer implements JsonDeserializer deserialize( - JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { + JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { final Set result = new HashSet<>(); if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) { @@ -43,17 +42,17 @@ public Set deserialize( private Optional parseWidget(final JsonElement compilation) { if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) { final JsonElement selfLink = - compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF); + compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF); final Optional id = - JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID); + JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID); if (id.isPresent()) { return Optional.of( - new CrawlerUrlDTO( - String.format( - ArdConstants.TOPICS_COMPILATION_URL, - sender, - id.get(), - ArdConstants.TOPICS_COMPILATION_PAGE_SIZE))); + new CrawlerUrlDTO( + String.format( + ArdConstants.TOPICS_COMPILATION_URL, + sender, + id.get(), + ArdConstants.TOPICS_COMPILATION_PAGE_SIZE))); } } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java index 12870c8c..958686ef 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java @@ -29,8 +29,6 @@ public class ArdTopicsLetterDeserializer implements JsonDeserializer sender.equalsIgnoreCase(attributeAsString.get())); } } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdVideoInfoJsonDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdVideoInfoJsonDeserializer.java index a9ef781a..77b9e447 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdVideoInfoJsonDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdVideoInfoJsonDeserializer.java @@ -58,7 +58,7 @@ public ArdVideoInfoDto deserialize(final JsonElement aJsonElement, final Type aT return videoInfo; } - private void loadM3U8(Map resolutionUrlMap) { + public static void loadM3U8(Map resolutionUrlMap) { final URL m3u8File = resolutionUrlMap.get(Qualities.NORMAL); final Optional m3u8Content = readContent(m3u8File); resolutionUrlMap.clear(); diff --git a/src/main/java/mServer/crawler/sender/base/Qualities.java b/src/main/java/mServer/crawler/sender/base/Qualities.java index dd70a8b0..74e3af9a 100644 --- a/src/main/java/mServer/crawler/sender/base/Qualities.java +++ b/src/main/java/mServer/crawler/sender/base/Qualities.java @@ -15,4 +15,17 @@ public String getDescription() { return description; } + + public static Qualities getResolutionFromWidth(final int width) { + if (width >= 2160) { + return Qualities.UHD; + } + if (width >= 1280) { + return Qualities.HD; + } + if (width >= 720) { + return Qualities.NORMAL; + } + return Qualities.SMALL; + } }