From d70b127a9aea8837d4f94ee467e641e2a8cf5ae5 Mon Sep 17 00:00:00 2001 From: bopol Date: Thu, 13 Feb 2020 23:34:58 +0100 Subject: [PATCH 01/15] reintroduce content language for youtube --- .../services/youtube/YoutubeService.java | 5 ++-- .../extractors/YoutubeStreamExtractor.java | 13 +++++------ .../YoutubeStreamExtractorDefaultTest.java | 23 +++++++++++++++++++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 6137f02931..6d00a971c7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -166,14 +166,13 @@ public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) // https://www.youtube.com/picker_ajax?action_language_json=1 private static final List SUPPORTED_LANGUAGES = Localization.listFrom( - "en-GB" - /*"af", "am", "ar", "az", "be", "bg", "bn", "bs", "ca", "cs", "da", "de", + "af", "am", "ar", "az", "be", "bg", "bn", "bs", "ca", "cs", "da", "de", "el", "en", "en-GB", "es", "es-419", "es-US", "et", "eu", "fa", "fi", "fil", "fr", "fr-CA", "gl", "gu", "hi", "hr", "hu", "hy", "id", "is", "it", "iw", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml", "mn", "mr", "ms", "my", "ne", "nl", "no", "pa", "pl", "pt", "pt-PT", "ro", "ru", "si", "sk", "sl", "sq", "sr", "sr-Latn", "sv", "sw", "ta", "te", "th", "tr", - "uk", "ur", "uz", "vi", "zh-CN", "zh-HK", "zh-TW", "zu"*/ + "uk", "ur", "uz", "vi", "zh-CN", "zh-HK", "zh-TW", "zu" ); // https://www.youtube.com/picker_ajax?action_country_json=1 diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index ea870d7157..e68c817b44 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -41,6 +41,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.schabi.newpipe.extractor.utils.JsonUtils.getString; + /* * Created by Christian Schabesberger on 06.08.15. * @@ -87,7 +89,7 @@ public class SubtitlesException extends ContentNotAvailableException { private JsonObject playerArgs; @Nonnull private final Map videoInfoPage = new HashMap<>(); - private JsonObject playerResponse; + public JsonObject playerResponse; @Nonnull private List subtitlesInfos = new ArrayList<>(); @@ -107,8 +109,7 @@ public YoutubeStreamExtractor(StreamingService service, LinkHandler linkHandler) public String getName() throws ParsingException { assertPageFetched(); try { - return playerResponse.getObject("videoDetails").getString("title"); - + return getString(playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getObject("title"), "simpleText"); } catch (Exception e) { // fallback HTML method String name = null; @@ -183,12 +184,10 @@ public String getThumbnailUrl() throws ParsingException { public Description getDescription() throws ParsingException { assertPageFetched(); try { - // first try to get html-formatted description - return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML); + return new Description(getString(playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getObject("description"), "simpleText"), Description.PLAIN_TEXT); } catch (Exception e) { try { - // fallback to raw non-html description - return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); + return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML); } catch (Exception ignored) { throw new ParsingException("Could not get the description", e); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index e2c0c7bbfe..0e9343da4f 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.Utils; @@ -308,4 +309,26 @@ public void testGetFrames() throws ExtractionException { } } } + + public static class LocalizationTest { + private static YoutubeStreamExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance(), new Localization("de")); + extractor = (YoutubeStreamExtractor) YouTube + .getStreamExtractor("https://www.youtube.com/watch?v=BWQ0BFVuSXA"); + extractor.fetchPage(); + } + + @Test + public void testGetName() throws ParsingException { + assertEquals("SKAM FRANCE EP.6 S5: Sonntag, 16:21 Uhr - Was jetzt?", extractor.getName()); + } + + @Test + public void testGetDescription() throws ParsingException { + assertTrue(extractor.getDescription().getContent().contains("Folgen Sie france.tv auf:")); + } + } } From 0d17625bf8f07c3278c331cac48c4da8d89e565a Mon Sep 17 00:00:00 2001 From: bopol Date: Fri, 14 Feb 2020 19:44:10 +0100 Subject: [PATCH 02/15] refix subscriber count --- .../extractors/YoutubeChannelExtractor.java | 34 ++++++++++++++----- .../youtube/YoutubeChannelExtractorTest.java | 22 ++++++------ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index d675cb2554..7fbae8b030 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -6,23 +6,27 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Parser; -import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nonnull; import java.io.IOException; +import static org.schabi.newpipe.extractor.utils.Utils.mixedNumberWordToLong; + /* * Created by Christian Schabesberger on 25.07.16. * @@ -81,7 +85,8 @@ public String getUrl() throws ParsingException { public String getId() throws ParsingException { try { return doc.select("meta[itemprop=\"channelId\"]").first().attr("content"); - } catch (Exception ignored) {} + } catch (Exception ignored) { + } // fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO) try { @@ -137,19 +142,32 @@ public String getFeedUrl() throws ParsingException { @Override public long getSubscriberCount() throws ParsingException { - + long subCount = -1; final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); if (el != null) { + // If the element is null, the channel have the subscriber count disabled String elTitle = el.attr("title"); try { - return Utils.mixedNumberWordToLong(elTitle); + subCount = mixedNumberWordToLong(elTitle); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } - } else { - // If the element is null, the channel have the subscriber count disabled - return -1; + + if (!getExtractorLocalization().getLanguageCode().equals("en") && subCount < 1000 && subCount != -1) { + //if it's not gathered from English page, and if shortened (https://support.google.com/youtube/thread/6543166) + //see https://github.com/TeamNewPipe/NewPipe/issues/2632 + Downloader dl = NewPipe.getDownloader(); + String aboutUrl = "https://m.youtube.com/channel/" + getId() + "/about"; + try { + Response response = dl.get(aboutUrl, new Localization("en", "gb")); + Document docEN = YoutubeParsingHelper.parseAndCheckPage(aboutUrl, response); + subCount = mixedNumberWordToLong(docEN.select(".subscribed").attr("title")); + } catch (IOException | ReCaptchaException e) { + e.printStackTrace(); + } + } } + return subCount; } @Override @@ -196,7 +214,7 @@ public InfoItemsPage getPage(String pageUrl) throws IOException, } private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl) - throws ParsingException { + throws ParsingException { String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html"); if (!loadMoreHtmlDataRaw.isEmpty()) { return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl)); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index 251ae246e4..fbb4647ab6 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -1,14 +1,5 @@ package org.schabi.newpipe.extractor.services.youtube; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.schabi.newpipe.extractor.ExtractorAsserts.assertIsSecureUrl; -import static org.schabi.newpipe.extractor.ServiceList.YouTube; -import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestGetPageInNewExtractor; -import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestMoreItems; -import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestRelatedItems; - import org.junit.BeforeClass; import org.junit.Test; import org.schabi.newpipe.DownloaderTestImpl; @@ -20,6 +11,11 @@ import org.schabi.newpipe.extractor.services.BaseChannelExtractorTest; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; +import static org.junit.Assert.*; +import static org.schabi.newpipe.extractor.ExtractorAsserts.assertIsSecureUrl; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; +import static org.schabi.newpipe.extractor.services.DefaultTests.*; + /** * Test for {@link ChannelExtractor} */ @@ -29,12 +25,17 @@ public static class Gronkh implements BaseChannelExtractorTest { @BeforeClass public static void setUp() throws Exception { - NewPipe.init(DownloaderTestImpl.getInstance()); + NewPipe.init(DownloaderTestImpl.getInstance(), new Localization("ru")); extractor = (YoutubeChannelExtractor) YouTube .getChannelExtractor("http://www.youtube.com/user/Gronkh"); extractor.fetchPage(); } + @Test + public void testGetSubscribersCount() throws Exception { + assertTrue(extractor.getSubscriberCount() >= 4880000); + } + /*////////////////////////////////////////////////////////////////////////// // Extractor //////////////////////////////////////////////////////////////////////////*/ @@ -489,7 +490,6 @@ public void testSubscriberCount() throws Exception { } - public static class RandomChannel implements BaseChannelExtractorTest { private static YoutubeChannelExtractor extractor; From 2eb9f5c6f04e38e8a0d75c769294c9418dd45226 Mon Sep 17 00:00:00 2001 From: bopol Date: Sat, 15 Feb 2020 21:00:07 +0100 Subject: [PATCH 03/15] HashMap for abbreviations --- .../YoutubeAbbreviationSubCountMap.java | 136 ++ .../extractors/YoutubeChannelExtractor.java | 20 +- .../extractors/YoutubeStreamExtractor.java | 2 +- .../schabi/newpipe/extractor/utils/Utils.java | 84 +- .../youtube/YoutubeSubscriberTest.java | 1178 +++++++++++++++++ .../newpipe/extractor/utils/UtilsTest.java | 10 + 6 files changed, 1405 insertions(+), 25 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java new file mode 100644 index 0000000000..3195caaf70 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java @@ -0,0 +1,136 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import java.util.HashMap; + +public class YoutubeAbbreviationSubCountMap { + + //should be safe until someone has 1 billion subscribers on YouTube + public static final HashMap abbreviationSubscribersCount = new HashMap<>(); + public static String englishMillionAbbreviation = "M"; + public static String englishThousandAbbreviation = "K"; + public static String tenThousandAbbreviation = "万"; + public static String hundredThousandAbbreviation = "ল"; + public static String tenMillionAbbreviation = "ক"; + public static String hundredMillionAbbreviation = "億"; + + static { + abbreviationSubscribersCount.put(englishThousandAbbreviation, englishThousandAbbreviation); //az, iw, en + abbreviationSubscribersCount.put(englishMillionAbbreviation, englishMillionAbbreviation); //iw, en + abbreviationSubscribersCount.put(tenThousandAbbreviation, tenThousandAbbreviation); + abbreviationSubscribersCount.put(hundredMillionAbbreviation, hundredMillionAbbreviation); + abbreviationSubscribersCount.put(tenMillionAbbreviation, tenMillionAbbreviation); + abbreviationSubscribersCount.put(hundredMillionAbbreviation, hundredMillionAbbreviation); + + abbreviationSubscribersCount.put(" k", englishThousandAbbreviation); //af + abbreviationSubscribersCount.put(" m", englishMillionAbbreviation); //af, is + abbreviationSubscribersCount.put(" ሺ", englishThousandAbbreviation); //am + abbreviationSubscribersCount.put(" ሜትር", englishMillionAbbreviation); //am + abbreviationSubscribersCount.put(" ألف", englishThousandAbbreviation); //ar + abbreviationSubscribersCount.put(" مليون", englishMillionAbbreviation); //ar + abbreviationSubscribersCount.put(" mln", englishMillionAbbreviation); //az, et, lt, nl, pl, sq, uz + abbreviationSubscribersCount.put(" тыс", englishThousandAbbreviation); //be, ru + abbreviationSubscribersCount.put(" млн", englishMillionAbbreviation); //be, bg, kk, ky, ru, uk + abbreviationSubscribersCount.put(" хил", englishThousandAbbreviation); //bg + abbreviationSubscribersCount.put(" হা", englishThousandAbbreviation); //bn + abbreviationSubscribersCount.put(" hilj", englishThousandAbbreviation); //bs, sr + abbreviationSubscribersCount.put(" mil", englishMillionAbbreviation); //bs, cs, hr, ro, sk, sr-Latn + abbreviationSubscribersCount.put("m", englishThousandAbbreviation); //ca + abbreviationSubscribersCount.put(" M", englishMillionAbbreviation); //ca, es, eu, and many more + abbreviationSubscribersCount.put(" tis", englishThousandAbbreviation); //cs, hr, sk, sl + abbreviationSubscribersCount.put(" mio", englishMillionAbbreviation); //da, sl + abbreviationSubscribersCount.put(" Mio", englishMillionAbbreviation); //de + abbreviationSubscribersCount.put(" χιλ", englishThousandAbbreviation); //el + abbreviationSubscribersCount.put(" εκ", englishMillionAbbreviation); //el + abbreviationSubscribersCount.put(" tuh", englishThousandAbbreviation); //et + abbreviationSubscribersCount.put(" هزار", englishThousandAbbreviation); //fa + abbreviationSubscribersCount.put(" میلیون", englishMillionAbbreviation); //fa + abbreviationSubscribersCount.put(" t", englishThousandAbbreviation); //fi + abbreviationSubscribersCount.put(" milj", englishMillionAbbreviation); //fi, lv + abbreviationSubscribersCount.put(" હજાર", englishThousandAbbreviation); //gu + abbreviationSubscribersCount.put(" हज़ार", englishThousandAbbreviation); //hi + abbreviationSubscribersCount.put(" E", englishThousandAbbreviation); //hu + abbreviationSubscribersCount.put(" հզր", englishThousandAbbreviation); //hy + abbreviationSubscribersCount.put(" մլն", englishMillionAbbreviation); //hy + abbreviationSubscribersCount.put(" rb", englishThousandAbbreviation); //id + abbreviationSubscribersCount.put(" jt", englishMillionAbbreviation); //id + abbreviationSubscribersCount.put(" þ", englishThousandAbbreviation); //is + abbreviationSubscribersCount.put(" Mln", englishMillionAbbreviation); //it + abbreviationSubscribersCount.put(" ათ", englishThousandAbbreviation); //ka + abbreviationSubscribersCount.put(" მლნ", englishMillionAbbreviation); //ka + abbreviationSubscribersCount.put(" мың", englishThousandAbbreviation); //kk + abbreviationSubscribersCount.put("ពាន់", englishThousandAbbreviation); //km + abbreviationSubscribersCount.put(" ពាន់", englishThousandAbbreviation); //km + abbreviationSubscribersCount.put(" លាន", englishMillionAbbreviation); //km + abbreviationSubscribersCount.put("ಸಾ", englishThousandAbbreviation); //kn + abbreviationSubscribersCount.put("ಮಿ", englishMillionAbbreviation); //kn + abbreviationSubscribersCount.put("천", englishThousandAbbreviation); //ko + abbreviationSubscribersCount.put(" миң", englishThousandAbbreviation); //ky + abbreviationSubscribersCount.put(" ກີບ", englishThousandAbbreviation); //lo + abbreviationSubscribersCount.put(" ພັນ", englishThousandAbbreviation); //lo + abbreviationSubscribersCount.put(" ລ້ານ", englishMillionAbbreviation); //lo + abbreviationSubscribersCount.put(" tūkst", englishThousandAbbreviation); //lt, lv + abbreviationSubscribersCount.put(" илј", englishThousandAbbreviation); //mk + abbreviationSubscribersCount.put(" мил", englishMillionAbbreviation); //mk, sr + abbreviationSubscribersCount.put(" мянга", englishThousandAbbreviation); //mn + abbreviationSubscribersCount.put(" сая", englishMillionAbbreviation); //mn + abbreviationSubscribersCount.put(" ह", englishThousandAbbreviation); //mr + abbreviationSubscribersCount.put("ထောင်", englishThousandAbbreviation); //my + abbreviationSubscribersCount.put("သန်း", englishMillionAbbreviation); //my + abbreviationSubscribersCount.put(" हजार", englishThousandAbbreviation); //ne + abbreviationSubscribersCount.put("k", englishThousandAbbreviation); //no + abbreviationSubscribersCount.put(" mill", englishMillionAbbreviation); //no + abbreviationSubscribersCount.put(" ਹਜ਼ਾਰ", englishThousandAbbreviation); //pa + abbreviationSubscribersCount.put(" tys", englishThousandAbbreviation); //pl + abbreviationSubscribersCount.put(" mi", englishMillionAbbreviation); //pt + abbreviationSubscribersCount.put(" K", englishThousandAbbreviation); //ro + abbreviationSubscribersCount.put("ද", englishThousandAbbreviation); //si + abbreviationSubscribersCount.put("මි", englishMillionAbbreviation); //si + abbreviationSubscribersCount.put(" mijë", englishThousandAbbreviation); //sq + abbreviationSubscribersCount.put(" хиљ", englishThousandAbbreviation); //sr-Latn + abbreviationSubscribersCount.put(" mn", englishMillionAbbreviation); //sv + abbreviationSubscribersCount.put("elfu ", englishThousandAbbreviation); //sw + abbreviationSubscribersCount.put("ஆ", englishThousandAbbreviation); //ta + abbreviationSubscribersCount.put("மி", englishMillionAbbreviation); //ta + abbreviationSubscribersCount.put("వే", englishThousandAbbreviation); //te + abbreviationSubscribersCount.put("మి", englishMillionAbbreviation); //te + abbreviationSubscribersCount.put(" พัน", englishThousandAbbreviation); //th + abbreviationSubscribersCount.put(" ล้าน", englishMillionAbbreviation); //th + abbreviationSubscribersCount.put(" B", englishThousandAbbreviation); //tr + abbreviationSubscribersCount.put(" Mn", englishMillionAbbreviation); //tr + abbreviationSubscribersCount.put(" тис", englishThousandAbbreviation); //uk + abbreviationSubscribersCount.put(" ہزار", englishThousandAbbreviation); //ur + abbreviationSubscribersCount.put(" ming", englishThousandAbbreviation); //uz + abbreviationSubscribersCount.put(" N", englishThousandAbbreviation); //vi + abbreviationSubscribersCount.put(" Tr", englishMillionAbbreviation); //vi + + abbreviationSubscribersCount.put("만", tenThousandAbbreviation); //ko + abbreviationSubscribersCount.put("万", tenThousandAbbreviation); //ja, zh-CN + abbreviationSubscribersCount.put("萬", tenThousandAbbreviation); //zh-TW + + abbreviationSubscribersCount.put(" লা", hundredThousandAbbreviation); //bn + abbreviationSubscribersCount.put(" લાખ", hundredThousandAbbreviation); //gu + abbreviationSubscribersCount.put(" लाख", hundredThousandAbbreviation); //hi, mr, ne + abbreviationSubscribersCount.put(" ਲੱਖ", hundredThousandAbbreviation); //pa + abbreviationSubscribersCount.put(" لاکھ", hundredThousandAbbreviation); //ur + abbreviationSubscribersCount.put("သိန်း", hundredThousandAbbreviation); //my + abbreviationSubscribersCount.put(" แสน", hundredThousandAbbreviation); //th + + abbreviationSubscribersCount.put(" কো", tenMillionAbbreviation); //bn + abbreviationSubscribersCount.put(" કરોડ", tenMillionAbbreviation); //gu + abbreviationSubscribersCount.put(" क॰", tenMillionAbbreviation); //hi + abbreviationSubscribersCount.put(" कोटी", tenMillionAbbreviation); //mr + abbreviationSubscribersCount.put("ကုဋေ", tenMillionAbbreviation); //my + abbreviationSubscribersCount.put(" करोड", tenMillionAbbreviation); //ne + abbreviationSubscribersCount.put(" ਕਰੋੜ", tenMillionAbbreviation); //pa + abbreviationSubscribersCount.put(" کروڑ", tenMillionAbbreviation); //ur + + abbreviationSubscribersCount.put("億", hundredMillionAbbreviation); //ja, zh-TW + abbreviationSubscribersCount.put("억", hundredMillionAbbreviation); //ko + abbreviationSubscribersCount.put("亿", hundredMillionAbbreviation); //zh-CN + + abbreviationSubscribersCount.put(" م", englishMillionAbbreviation); //an + abbreviationSubscribersCount.put("ሜ", englishMillionAbbreviation); //am + abbreviationSubscribersCount.put(" М", englishMillionAbbreviation); //mk, narrow non-breaking space, ie U+202F + + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 7fbae8b030..b6b47ccb14 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -54,6 +54,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private Document doc; + public Document getDoc() { + return doc; + } + public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); } @@ -148,24 +152,10 @@ public long getSubscriberCount() throws ParsingException { // If the element is null, the channel have the subscriber count disabled String elTitle = el.attr("title"); try { - subCount = mixedNumberWordToLong(elTitle); + subCount = mixedNumberWordToLong(elTitle, getExtractorLocalization()); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } - - if (!getExtractorLocalization().getLanguageCode().equals("en") && subCount < 1000 && subCount != -1) { - //if it's not gathered from English page, and if shortened (https://support.google.com/youtube/thread/6543166) - //see https://github.com/TeamNewPipe/NewPipe/issues/2632 - Downloader dl = NewPipe.getDownloader(); - String aboutUrl = "https://m.youtube.com/channel/" + getId() + "/about"; - try { - Response response = dl.get(aboutUrl, new Localization("en", "gb")); - Document docEN = YoutubeParsingHelper.parseAndCheckPage(aboutUrl, response); - subCount = mixedNumberWordToLong(docEN.select(".subscribed").attr("title")); - } catch (IOException | ReCaptchaException e) { - e.printStackTrace(); - } - } } return subCount; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index e68c817b44..02de18bcf8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -89,7 +89,7 @@ public class SubtitlesException extends ContentNotAvailableException { private JsonObject playerArgs; @Nonnull private final Map videoInfoPage = new HashMap<>(); - public JsonObject playerResponse; + private JsonObject playerResponse; @Nonnull private List subtitlesInfos = new ArrayList<>(); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 3489b6d603..303ae7d964 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -1,15 +1,21 @@ package org.schabi.newpipe.extractor.utils; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.Localization; + import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; import java.util.List; -import org.schabi.newpipe.extractor.exceptions.ParsingException; +import static org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeAbbreviationSubCountMap.abbreviationSubscribersCount; public class Utils { + private static final String HTTP = "http://"; + private static final String HTTPS = "https://"; + private Utils() { //no instance } @@ -27,6 +33,24 @@ public static String removeNonDigitCharacters(String toRemove) { return toRemove.replaceAll("\\D+", ""); } + /** + *

Remove a number from a string.

+ *

Examples:

+ *
    + *
  • "123" -> "123"
  • + *
  • "1.23K" -> "K"
  • + *
  • "1.23 M" -> " M"
  • + *
+ * Pay attention, it may remove the final dot. + * eg: "8,93 хил." -> " хил" + * + * @param toRemove string to remove a number + * @return a string that contains only not a number + */ + public static String removeNumber(String toRemove) { + return toRemove.replaceAll("[0-9,.]", ""); + } + /** *

Convert a mixed number word to a long.

*

Examples:

@@ -35,6 +59,7 @@ public static String removeNonDigitCharacters(String toRemove) { *
  • 1.23K -> 1230
  • *
  • 1.23M -> 1230000
  • * + * * @param numberWord string to be converted to a long * @return a long * @throws NumberFormatException @@ -43,15 +68,24 @@ public static String removeNonDigitCharacters(String toRemove) { public static long mixedNumberWordToLong(String numberWord) throws NumberFormatException, ParsingException { String multiplier = ""; try { - multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMBkmb])+", numberWord, 2); - } catch(ParsingException ignored) {} + multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMBkmb万লক億])+", numberWord, 2); + } catch (ParsingException ignored) { + } double count = Double.parseDouble(Parser.matchGroup1("([\\d]+([\\.,][\\d]+)?)", numberWord) .replace(",", ".")); switch (multiplier.toUpperCase()) { case "K": return (long) (count * 1e3); + case "万": //10K + return (long) (count * 1e4); + case "ল": //100K + return (long) (count * 1e5); case "M": return (long) (count * 1e6); + case "ক": //10M + return (long) (count * 1e7); + case "億": //100M + return (long) (count * 1e8); case "B": return (long) (count * 1e9); default: @@ -59,6 +93,41 @@ public static long mixedNumberWordToLong(String numberWord) throws NumberFormatE } } + //does the same as the function above, but for the 80 languages supported by YouTube. + public static long mixedNumberWordToLong(String numberWord, Localization loc) throws NumberFormatException, ParsingException { + String langCode = loc.getLanguageCode(); + String abbreviation = removeNumber(numberWord); + + //special case for portugal, " mil" is the abbreviation for thousand, but is Million for many other languages + if (langCode.equals("pt") && abbreviation.equals(" mil")) { + numberWord = numberWord.replace(" mil", "K"); + } + //special case for languages written right to left + else if (langCode.equals("sw") && abbreviation.equals("elfu ")) { + numberWord = moveAtRight("elfu ", numberWord); + } else if (langCode.equals("si")) { + numberWord = moveAtRight(abbreviation, numberWord); + } + + try { //special cases where it gives a number directly for some languages, or with a dot or a comma, or space + String maybeAlreadyNumber = numberWord.replaceAll("([ .,])", ""); //dot, comma or narrow non-breaking space, ie U+202Fw + return Long.parseLong(maybeAlreadyNumber); + } catch (NumberFormatException e) { + //the number had an abbreviation, so it will be handled below + } + + if (!langCode.equals("en")) { + numberWord = numberWord.replace(abbreviation, abbreviationSubscribersCount.get(abbreviation)); + } + return mixedNumberWordToLong(numberWord); + } + + public static String moveAtRight(String toMove, String whole) { + whole = whole.replace(toMove, ""); + whole += toMove; + return whole; + } + /** * Check if the url matches the pattern. * @@ -82,9 +151,6 @@ public static void printErrors(List errors) { } } - private static final String HTTP = "http://"; - private static final String HTTPS = "https://"; - public static String replaceHttpWithHttps(final String url) { if (url == null) return null; @@ -165,17 +231,17 @@ public static boolean isHTTP(URL url) { return setsNoPort || usesDefaultPort; } - + public static String removeUTF8BOM(String s) { if (s.startsWith("\uFEFF")) { s = s.substring(1); } if (s.endsWith("\uFEFF")) { - s = s.substring(0, s.length()-1); + s = s.substring(0, s.length() - 1); } return s; } - + public static String getBaseUrl(String url) throws ParsingException { URL uri; try { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java new file mode 100644 index 0000000000..83e97a42e8 --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -0,0 +1,1178 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.localization.Localization; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; +import static org.schabi.newpipe.extractor.utils.Utils.removeNumber; + +/** + * Test for {@link } + * and specifically YoutubeChannelExtractor.getSubscriberCount() + * in all the languages supported by YouTube. + * Takes a long time because we need to test make 146 requests to YouTube + * DON'T RUN ON MOBILE DATA + */ + +/* +pattern for functions name: +testlangcodeRegionabbreviation +eg: +testenk = english thousand +testfrCam = French (Canada) million +testzhTwk = Chinese (Taiwan) thousand + */ + +/* +Commenting the whole file because otherwise it will slow down too much the CI test. +And also, often one up to three tests fail if you launch the whole tests, because some requests fail. + */ + +/* +public class YoutubeSubscriberTest { + + private static final String channelThousand = "https://www.youtube.com/channel/UC_Fh8kvtkVPkeihBs42jGcA"; + private static final String channelMillion = "https://www.youtube.com/channel/UC-J-KZfRV8c13fOCkhXdLiQ"; + private static long countMillion; + private static long countThousand; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance(), Localization.DEFAULT); + + YoutubeChannelExtractor extractorMillion = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelMillion); + extractorMillion.fetchPage(); + countMillion = extractorMillion.getSubscriberCount(); + + YoutubeChannelExtractor extractorThousand = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelThousand); + extractorThousand.fetchPage(); + countThousand = extractorThousand.getSubscriberCount(); + } + + public static String getSubscriberCount(YoutubeChannelExtractor extractor) { + //fetches and return number abbreviation + //eg 26,8 k + Document doc = extractor.getDoc(); + Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); + return el.attr("title"); + } + + public static String getAbbreviation(String count) { + return removeNumber(count); + } + + public static String getAbbreviation(YoutubeChannelExtractor extractor) { + return getAbbreviation(getSubscriberCount(extractor)); + } + + public YoutubeChannelExtractor getExtractor(Localization loc, String channelUrl) throws ExtractionException, IOException { + NewPipe.init(DownloaderTestImpl.getInstance(), loc); + YoutubeChannelExtractor extractor = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelUrl); + extractor.fetchPage(); + return extractor; + } + + public YoutubeChannelExtractor getExtractor(String type, Localization loc) throws ExtractionException, IOException { + if (type.equals("k")) { + return getExtractorThousand(loc); + } else if (type.equals("m")) { + return getExtractorMillion(loc); + } + return null; + } + + public YoutubeChannelExtractor getExtractorMillion(Localization loc) throws ExtractionException, IOException { + return getExtractor(loc, channelMillion); + } + + public YoutubeChannelExtractor getExtractorThousand(Localization loc) throws ExtractionException, IOException { + return getExtractor(loc, channelThousand); + } + + public void ut(YoutubeChannelExtractor extractor) { + String subscriberCount = getSubscriberCount(extractor); + System.out.println(extractor.getExtractorLocalization() + ": " + subscriberCount); + System.out.println(getAbbreviation(subscriberCount)); +// System.out.println("abbreviation =\"" + getAbbreviation(getSubscriberCount(extractor)) + "\""); + } + + public void buildthousand(Localization loc) throws IOException, ExtractionException { + String languageCode = loc.getLanguageCode(); + System.out.println(); + YoutubeChannelExtractor current = getExtractor("k", loc); + ut(current); + String abr = getAbbreviation(getSubscriberCount(current)); + System.out.println(" abbreviationSubscribersCount.put(\"" + abr + "\", englishThousandAbbreviation); //" + + languageCode); + String s = " @Test\n" + + " public void test" + languageCode + "k() throws IOException, ExtractionException {\n" + + " YoutubeChannelExtractor extractor = getExtractor(\"k\", new Localization(\"" + languageCode + + "\"));\n" + " ut(extractor);\n" + + " assertEquals(countThousand, extractor.getSubscriberCount());\n" + + " }"; + System.out.println(s + "\n"); + } + + public void buildmillion(Localization loc) throws IOException, ExtractionException { + String languageCode = loc.getLanguageCode(); + String s = " @Test\n" + + " public void test" + languageCode + "m() throws IOException, ExtractionException {\n" + + " YoutubeChannelExtractor extractor = getExtractor(\"m\", new Localization(\"" + languageCode + + "\"));\n" + " ut(extractor);\n" + + " assertEquals(countMillion, extractor.getSubscriberCount());\n" + + " }"; + System.out.println(s); + System.out.println(); + YoutubeChannelExtractor current = getExtractor("m", loc); + ut(current); + String abr = getAbbreviation(getSubscriberCount(current)); + System.out.println(" abbreviationSubscribersCount.put(\"" + abr + "\", englishMillionAbbreviation); //" + + languageCode); + } + + @Test + public void testafk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("af")); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testafm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("af")); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testamk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("am")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testamm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("am")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testark() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ar")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testarm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ar")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testazk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("az")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testazm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("az")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testbek() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("be")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testbem() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("be")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testbgk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("bg")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testbgm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("bg")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testbnk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("bn")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testbnm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("bn")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testbsk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("bs")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testbsm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("bs")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testcak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ca")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testcam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ca")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testcsk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("cs")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testcsm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("cs")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testdak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("da")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testdam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("da")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testdek() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("de")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testdem() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("de")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testelk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("el")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testelm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("el")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testes419k() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("es", "419")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testesUSk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("es", "US")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testesUSm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("es", "US")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testes419m() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("es", "419")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + + @Test + public void testesk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("es")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testesm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("es")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testetk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("et")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testetm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("et")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testeuk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("eu")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testeum() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("eu")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testfak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fa")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testfam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fa")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testfik() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fi")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testfim() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fi")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testfrk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fr")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testfrm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fr")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testfrCak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fr", "CA")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testfrCam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fr", "CA")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testglk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("gl")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testglm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("gl")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testguk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("gu")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testgum() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("gu")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testhik() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hi")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testhim() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hi")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testhrk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hr")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testhrm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hr")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testhuk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hu")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testhum() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hu")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testhyk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hy")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testhym() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hy")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testidk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("id")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testidm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("id")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testisk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("is")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testism() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("is")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testitk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("it")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testitm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("it")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testiwk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("iw")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testiwm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("iw")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testjak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ja")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testjam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ja")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testkak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ka")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testkam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ka")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testkmk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("km")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testkmm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("km")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testknk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("kn")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testknm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("kn")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testkok() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ko")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testkom() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ko")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testkyk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ky")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testkym() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ky")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testlok() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("lo")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testlom() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("lo")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testltk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("lt")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testltm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("lt")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testlvk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("lv")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testlvm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("lv")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testmkk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("mk")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testmkm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("mk")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testmnk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("mn")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testmnm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("mn")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testmrk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("mr")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testmrm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("mr")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testmyk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("my")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testmym() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("my")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testnek() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ne")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testnem() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ne")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testnlk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("nl")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testnlm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("nl")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testnok() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("no")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testnom() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("no")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testpak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("pa")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testpam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("pa")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testplk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("pl")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testplm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("pl")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testptk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("pt")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testptm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("pt")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testrok() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ro")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testrom() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ro")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testruk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ru")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testrum() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ru")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testsik() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("si")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testsim() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("si")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testskk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sk")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testskm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sk")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testslk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sl")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testslm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sl")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testsqk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sq")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testsqm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sq")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testsrk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sr")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testsrm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sr")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testsrLatnk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sr", "Latn")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testsrLatnm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sr", "Latn")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testsvk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sv")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testsvm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sv")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testswk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sw")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testswm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sw")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testtak() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ta")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testtam() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ta")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testtek() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("te")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testtem() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("te")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testthk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("th")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testthm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("th")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testtrk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("tr")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testtrm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("tr")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testukk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("uk")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testukm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("uk")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testurk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ur")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testurm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ur")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testuzk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("uz")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testuzm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("uz")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testvik() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("vi")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testvim() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("vi")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testzhCnk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zh", "CN")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testzhCnm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zh", "CN")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testzhHkk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zh", "HK")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testzhHkm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zh", "HK")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testzhTwk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zh", "TW")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testzhTwm() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zh", "TW")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void testzuk() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zu")); + ut(extractor); + assertEquals(countThousand, extractor.getSubscriberCount()); + } + + @Test + public void testzum() throws IOException, ExtractionException { + YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zu")); + ut(extractor); + assertEquals(countMillion, extractor.getSubscriberCount()); + } + + @Test + public void build() throws IOException, ExtractionException, InterruptedException { + Localization current = YoutubeService.SUPPORTED_LANGUAGES.get(79); + buildthousand(current); + buildmillion(current); + System.out.println(); + Thread.sleep(500); + System.out.println(); + } +} + +*/ \ No newline at end of file diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java index 5788674458..151216c8db 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java @@ -4,6 +4,10 @@ import org.junit.Test; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; + import static org.junit.Assert.assertEquals; public class UtilsTest { @@ -15,4 +19,10 @@ public void testMixedNumberWordToLong() throws JsonParserException, ParsingExcep assertEquals(10.5e6, Utils.mixedNumberWordToLong("10,5M"), 0.0); assertEquals(1.5e9, Utils.mixedNumberWordToLong("1,5B"), 0.0); } + + public static void createFile(String path, String content) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(path)); + writer.write(content); + writer.close(); + } } From f5d1952ced4b8f64941a440cef7ec8f5134bd207 Mon Sep 17 00:00:00 2001 From: bopol Date: Sun, 16 Feb 2020 18:29:20 +0100 Subject: [PATCH 04/15] added comment for explaination --- .../extractors/YoutubeAbbreviationSubCountMap.java | 12 ++++++------ .../youtube/extractors/YoutubeChannelExtractor.java | 4 ---- .../youtube/extractors/YoutubeStreamExtractor.java | 1 + .../org/schabi/newpipe/extractor/utils/Utils.java | 2 +- .../youtube/YoutubeChannelExtractorTest.java | 1 - .../services/youtube/YoutubeSubscriberTest.java | 10 ++++++---- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java index 3195caaf70..32912155dd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java @@ -6,12 +6,12 @@ public class YoutubeAbbreviationSubCountMap { //should be safe until someone has 1 billion subscribers on YouTube public static final HashMap abbreviationSubscribersCount = new HashMap<>(); - public static String englishMillionAbbreviation = "M"; - public static String englishThousandAbbreviation = "K"; - public static String tenThousandAbbreviation = "万"; - public static String hundredThousandAbbreviation = "ল"; - public static String tenMillionAbbreviation = "ক"; - public static String hundredMillionAbbreviation = "億"; + public static final String englishMillionAbbreviation = "M"; + public static final String englishThousandAbbreviation = "K"; + public static final String tenThousandAbbreviation = "万"; + public static final String hundredThousandAbbreviation = "ল"; + public static final String tenMillionAbbreviation = "ক"; + public static final String hundredMillionAbbreviation = "億"; static { abbreviationSubscribersCount.put(englishThousandAbbreviation, englishThousandAbbreviation); //az, iw, en diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index b6b47ccb14..19286bc609 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -54,10 +54,6 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private Document doc; - public Document getDoc() { - return doc; - } - public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 02de18bcf8..0e2212a11d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -184,6 +184,7 @@ public String getThumbnailUrl() throws ParsingException { public Description getDescription() throws ParsingException { assertPageFetched(); try { + //JSON first because formatting is better, see https://github.com/TeamNewPipe/NewPipeExtractor/pull/257#discussion_r379828770 return new Description(getString(playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getObject("description"), "simpleText"), Description.PLAIN_TEXT); } catch (Exception e) { try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 303ae7d964..cf5ca2f266 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -37,7 +37,7 @@ public static String removeNonDigitCharacters(String toRemove) { *

    Remove a number from a string.

    *

    Examples:

    *
      - *
    • "123" -> "123"
    • + *
    • "123" -> ""
    • *
    • "1.23K" -> "K"
    • *
    • "1.23 M" -> " M"
    • *
    diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index fbb4647ab6..e032842ccc 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -489,7 +489,6 @@ public void testSubscriberCount() throws Exception { } } - public static class RandomChannel implements BaseChannelExtractorTest { private static YoutubeChannelExtractor extractor; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index 83e97a42e8..12c033342e 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -35,7 +35,11 @@ /* Commenting the whole file because otherwise it will slow down too much the CI test. -And also, often one up to three tests fail if you launch the whole tests, because some requests fail. +And also, often one up to three tests fail if you launch the whole tests, because some requests fail +(it could be reCAPTCHAs) but they, as of today (2020-02-16) success if you run each one. + +To run the test (to investigate maybe future problems), +You should create public Document getDoc() { return this.doc;} method in YoutubeChannelExtractor. */ /* @@ -1166,7 +1170,7 @@ public void testzum() throws IOException, ExtractionException { @Test public void build() throws IOException, ExtractionException, InterruptedException { - Localization current = YoutubeService.SUPPORTED_LANGUAGES.get(79); + Localization current = YouTube.getSupportedLocalizations().get(79); buildthousand(current); buildmillion(current); System.out.println(); @@ -1174,5 +1178,3 @@ public void build() throws IOException, ExtractionException, InterruptedExceptio System.out.println(); } } - -*/ \ No newline at end of file From 0fd7484502bc815d30385192b2de3cc786849c9b Mon Sep 17 00:00:00 2001 From: bopol Date: Sun, 16 Feb 2020 21:52:55 +0100 Subject: [PATCH 05/15] Refactor AbbreviationHashMap, add javadoc for it and comment some other files related --- .../AbbreviationHashMap.java} | 100 +++++++++++------- .../schabi/newpipe/extractor/utils/Utils.java | 10 +- .../youtube/YoutubeSubscriberTest.java | 22 ++-- 3 files changed, 76 insertions(+), 56 deletions(-) rename extractor/src/main/java/org/schabi/newpipe/extractor/{services/youtube/extractors/YoutubeAbbreviationSubCountMap.java => utils/AbbreviationHashMap.java} (90%) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java similarity index 90% rename from extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java rename to extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java index 32912155dd..d0ca785159 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeAbbreviationSubCountMap.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java @@ -1,13 +1,36 @@ -package org.schabi.newpipe.extractor.services.youtube.extractors; +package org.schabi.newpipe.extractor.utils; import java.util.HashMap; -public class YoutubeAbbreviationSubCountMap { + +/** + * Map matching abbreviations with their English equivalents + * + * @author B0pol + *

    + * By using this map, you can replace the abbreviations used for numbers in the 80 languages supported by YouTube + * With their English equivalent. + *

    + *

    + * Some language use more abbreviations for numbers: east-asian languages have abbreviations for ten thousand, + * and hundred million, indo-arabic languages have abbreviations for a hundred thousand and ten million, + * then we replace ten thousand by {@link #tenThousandAbbreviation}, + * hundred thousand by {@link #hundredThousandAbbreviation}, + * ten million by {@link #tenMillionAbbreviation}, + * hundred million by {@link #hundredMillionAbbreviation}. + *

    + *

    + * The languages using the abbreviation is commented with the language code at the left. + * @see Wikipedia page of language codes + *

    + */ +public class AbbreviationHashMap { //should be safe until someone has 1 billion subscribers on YouTube public static final HashMap abbreviationSubscribersCount = new HashMap<>(); public static final String englishMillionAbbreviation = "M"; public static final String englishThousandAbbreviation = "K"; + public static final String tenThousandAbbreviation = "万"; public static final String hundredThousandAbbreviation = "ল"; public static final String tenMillionAbbreviation = "ক"; @@ -16,91 +39,93 @@ public class YoutubeAbbreviationSubCountMap { static { abbreviationSubscribersCount.put(englishThousandAbbreviation, englishThousandAbbreviation); //az, iw, en abbreviationSubscribersCount.put(englishMillionAbbreviation, englishMillionAbbreviation); //iw, en - abbreviationSubscribersCount.put(tenThousandAbbreviation, tenThousandAbbreviation); - abbreviationSubscribersCount.put(hundredMillionAbbreviation, hundredMillionAbbreviation); abbreviationSubscribersCount.put(tenMillionAbbreviation, tenMillionAbbreviation); - abbreviationSubscribersCount.put(hundredMillionAbbreviation, hundredMillionAbbreviation); abbreviationSubscribersCount.put(" k", englishThousandAbbreviation); //af - abbreviationSubscribersCount.put(" m", englishMillionAbbreviation); //af, is abbreviationSubscribersCount.put(" ሺ", englishThousandAbbreviation); //am - abbreviationSubscribersCount.put(" ሜትር", englishMillionAbbreviation); //am abbreviationSubscribersCount.put(" ألف", englishThousandAbbreviation); //ar - abbreviationSubscribersCount.put(" مليون", englishMillionAbbreviation); //ar - abbreviationSubscribersCount.put(" mln", englishMillionAbbreviation); //az, et, lt, nl, pl, sq, uz abbreviationSubscribersCount.put(" тыс", englishThousandAbbreviation); //be, ru - abbreviationSubscribersCount.put(" млн", englishMillionAbbreviation); //be, bg, kk, ky, ru, uk abbreviationSubscribersCount.put(" хил", englishThousandAbbreviation); //bg abbreviationSubscribersCount.put(" হা", englishThousandAbbreviation); //bn abbreviationSubscribersCount.put(" hilj", englishThousandAbbreviation); //bs, sr - abbreviationSubscribersCount.put(" mil", englishMillionAbbreviation); //bs, cs, hr, ro, sk, sr-Latn abbreviationSubscribersCount.put("m", englishThousandAbbreviation); //ca - abbreviationSubscribersCount.put(" M", englishMillionAbbreviation); //ca, es, eu, and many more abbreviationSubscribersCount.put(" tis", englishThousandAbbreviation); //cs, hr, sk, sl - abbreviationSubscribersCount.put(" mio", englishMillionAbbreviation); //da, sl - abbreviationSubscribersCount.put(" Mio", englishMillionAbbreviation); //de abbreviationSubscribersCount.put(" χιλ", englishThousandAbbreviation); //el - abbreviationSubscribersCount.put(" εκ", englishMillionAbbreviation); //el abbreviationSubscribersCount.put(" tuh", englishThousandAbbreviation); //et abbreviationSubscribersCount.put(" هزار", englishThousandAbbreviation); //fa - abbreviationSubscribersCount.put(" میلیون", englishMillionAbbreviation); //fa abbreviationSubscribersCount.put(" t", englishThousandAbbreviation); //fi - abbreviationSubscribersCount.put(" milj", englishMillionAbbreviation); //fi, lv abbreviationSubscribersCount.put(" હજાર", englishThousandAbbreviation); //gu abbreviationSubscribersCount.put(" हज़ार", englishThousandAbbreviation); //hi abbreviationSubscribersCount.put(" E", englishThousandAbbreviation); //hu abbreviationSubscribersCount.put(" հզր", englishThousandAbbreviation); //hy - abbreviationSubscribersCount.put(" մլն", englishMillionAbbreviation); //hy abbreviationSubscribersCount.put(" rb", englishThousandAbbreviation); //id - abbreviationSubscribersCount.put(" jt", englishMillionAbbreviation); //id abbreviationSubscribersCount.put(" þ", englishThousandAbbreviation); //is - abbreviationSubscribersCount.put(" Mln", englishMillionAbbreviation); //it abbreviationSubscribersCount.put(" ათ", englishThousandAbbreviation); //ka - abbreviationSubscribersCount.put(" მლნ", englishMillionAbbreviation); //ka abbreviationSubscribersCount.put(" мың", englishThousandAbbreviation); //kk abbreviationSubscribersCount.put("ពាន់", englishThousandAbbreviation); //km abbreviationSubscribersCount.put(" ពាន់", englishThousandAbbreviation); //km - abbreviationSubscribersCount.put(" លាន", englishMillionAbbreviation); //km abbreviationSubscribersCount.put("ಸಾ", englishThousandAbbreviation); //kn - abbreviationSubscribersCount.put("ಮಿ", englishMillionAbbreviation); //kn abbreviationSubscribersCount.put("천", englishThousandAbbreviation); //ko abbreviationSubscribersCount.put(" миң", englishThousandAbbreviation); //ky abbreviationSubscribersCount.put(" ກີບ", englishThousandAbbreviation); //lo abbreviationSubscribersCount.put(" ພັນ", englishThousandAbbreviation); //lo - abbreviationSubscribersCount.put(" ລ້ານ", englishMillionAbbreviation); //lo abbreviationSubscribersCount.put(" tūkst", englishThousandAbbreviation); //lt, lv abbreviationSubscribersCount.put(" илј", englishThousandAbbreviation); //mk - abbreviationSubscribersCount.put(" мил", englishMillionAbbreviation); //mk, sr abbreviationSubscribersCount.put(" мянга", englishThousandAbbreviation); //mn - abbreviationSubscribersCount.put(" сая", englishMillionAbbreviation); //mn abbreviationSubscribersCount.put(" ह", englishThousandAbbreviation); //mr abbreviationSubscribersCount.put("ထောင်", englishThousandAbbreviation); //my - abbreviationSubscribersCount.put("သန်း", englishMillionAbbreviation); //my abbreviationSubscribersCount.put(" हजार", englishThousandAbbreviation); //ne abbreviationSubscribersCount.put("k", englishThousandAbbreviation); //no - abbreviationSubscribersCount.put(" mill", englishMillionAbbreviation); //no abbreviationSubscribersCount.put(" ਹਜ਼ਾਰ", englishThousandAbbreviation); //pa abbreviationSubscribersCount.put(" tys", englishThousandAbbreviation); //pl - abbreviationSubscribersCount.put(" mi", englishMillionAbbreviation); //pt abbreviationSubscribersCount.put(" K", englishThousandAbbreviation); //ro abbreviationSubscribersCount.put("ද", englishThousandAbbreviation); //si - abbreviationSubscribersCount.put("මි", englishMillionAbbreviation); //si abbreviationSubscribersCount.put(" mijë", englishThousandAbbreviation); //sq abbreviationSubscribersCount.put(" хиљ", englishThousandAbbreviation); //sr-Latn - abbreviationSubscribersCount.put(" mn", englishMillionAbbreviation); //sv abbreviationSubscribersCount.put("elfu ", englishThousandAbbreviation); //sw abbreviationSubscribersCount.put("ஆ", englishThousandAbbreviation); //ta - abbreviationSubscribersCount.put("மி", englishMillionAbbreviation); //ta abbreviationSubscribersCount.put("వే", englishThousandAbbreviation); //te - abbreviationSubscribersCount.put("మి", englishMillionAbbreviation); //te abbreviationSubscribersCount.put(" พัน", englishThousandAbbreviation); //th - abbreviationSubscribersCount.put(" ล้าน", englishMillionAbbreviation); //th abbreviationSubscribersCount.put(" B", englishThousandAbbreviation); //tr - abbreviationSubscribersCount.put(" Mn", englishMillionAbbreviation); //tr abbreviationSubscribersCount.put(" тис", englishThousandAbbreviation); //uk abbreviationSubscribersCount.put(" ہزار", englishThousandAbbreviation); //ur abbreviationSubscribersCount.put(" ming", englishThousandAbbreviation); //uz abbreviationSubscribersCount.put(" N", englishThousandAbbreviation); //vi + + abbreviationSubscribersCount.put(" m", englishMillionAbbreviation); //af, is + abbreviationSubscribersCount.put(" م", englishMillionAbbreviation); //an + abbreviationSubscribersCount.put("ሜ", englishMillionAbbreviation); //am + abbreviationSubscribersCount.put(" ሜትር", englishMillionAbbreviation); //am + abbreviationSubscribersCount.put(" مليون", englishMillionAbbreviation); //ar + abbreviationSubscribersCount.put(" mln", englishMillionAbbreviation); //az, et, lt, nl, pl, sq, uz + abbreviationSubscribersCount.put(" млн", englishMillionAbbreviation); //be, bg, kk, ky, ru, uk + abbreviationSubscribersCount.put(" mil", englishMillionAbbreviation); //bs, cs, hr, ro, sk, sr-Latn + abbreviationSubscribersCount.put(" M", englishMillionAbbreviation); //ca, es, eu, and many more + abbreviationSubscribersCount.put(" mio", englishMillionAbbreviation); //da, sl + abbreviationSubscribersCount.put(" Mio", englishMillionAbbreviation); //de + abbreviationSubscribersCount.put(" εκ", englishMillionAbbreviation); //el + abbreviationSubscribersCount.put(" میلیون", englishMillionAbbreviation); //fa + abbreviationSubscribersCount.put(" milj", englishMillionAbbreviation); //fi, lv + abbreviationSubscribersCount.put(" մլն", englishMillionAbbreviation); //hy + abbreviationSubscribersCount.put(" jt", englishMillionAbbreviation); //id + abbreviationSubscribersCount.put(" Mln", englishMillionAbbreviation); //it + abbreviationSubscribersCount.put(" მლნ", englishMillionAbbreviation); //ka + abbreviationSubscribersCount.put(" លាន", englishMillionAbbreviation); //km + abbreviationSubscribersCount.put("ಮಿ", englishMillionAbbreviation); //kn + abbreviationSubscribersCount.put(" ລ້ານ", englishMillionAbbreviation); //lo + abbreviationSubscribersCount.put(" М", englishMillionAbbreviation); //mk. It isn't a space but a + // narrow non-breaking space, ie U+202F + abbreviationSubscribersCount.put(" мил", englishMillionAbbreviation); //mk, sr + abbreviationSubscribersCount.put(" сая", englishMillionAbbreviation); //mn + abbreviationSubscribersCount.put("သန်း", englishMillionAbbreviation); //my + abbreviationSubscribersCount.put(" mill", englishMillionAbbreviation); //no + abbreviationSubscribersCount.put(" mi", englishMillionAbbreviation); //pt + abbreviationSubscribersCount.put("මි", englishMillionAbbreviation); //si + abbreviationSubscribersCount.put(" mn", englishMillionAbbreviation); //sv + abbreviationSubscribersCount.put("మి", englishMillionAbbreviation); //te + abbreviationSubscribersCount.put("மி", englishMillionAbbreviation); //ta + abbreviationSubscribersCount.put(" ล้าน", englishMillionAbbreviation); //th + abbreviationSubscribersCount.put(" Mn", englishMillionAbbreviation); //tr abbreviationSubscribersCount.put(" Tr", englishMillionAbbreviation); //vi abbreviationSubscribersCount.put("만", tenThousandAbbreviation); //ko @@ -127,10 +152,5 @@ public class YoutubeAbbreviationSubCountMap { abbreviationSubscribersCount.put("億", hundredMillionAbbreviation); //ja, zh-TW abbreviationSubscribersCount.put("억", hundredMillionAbbreviation); //ko abbreviationSubscribersCount.put("亿", hundredMillionAbbreviation); //zh-CN - - abbreviationSubscribersCount.put(" م", englishMillionAbbreviation); //an - abbreviationSubscribersCount.put("ሜ", englishMillionAbbreviation); //am - abbreviationSubscribersCount.put(" М", englishMillionAbbreviation); //mk, narrow non-breaking space, ie U+202F - } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index cf5ca2f266..45d457fa01 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -9,7 +9,7 @@ import java.net.URLDecoder; import java.util.List; -import static org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeAbbreviationSubCountMap.abbreviationSubscribersCount; +import static org.schabi.newpipe.extractor.utils.AbbreviationHashMap.abbreviationSubscribersCount; public class Utils { @@ -76,15 +76,15 @@ public static long mixedNumberWordToLong(String numberWord) throws NumberFormatE switch (multiplier.toUpperCase()) { case "K": return (long) (count * 1e3); - case "万": //10K + case "万": //10K, used by east-asian languages return (long) (count * 1e4); - case "ল": //100K + case "ল": //100K, used by indo-arabic languages return (long) (count * 1e5); case "M": return (long) (count * 1e6); - case "ক": //10M + case "ক": //10M, used by indo-arabic languages return (long) (count * 1e7); - case "億": //100M + case "億": //100M, used by east-asian languages return (long) (count * 1e8); case "B": return (long) (count * 1e9); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index 12c033342e..79b654ac63 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -1,3 +1,4 @@ +/* package org.schabi.newpipe.extractor.services.youtube; import org.jsoup.nodes.Document; @@ -17,20 +18,18 @@ import static org.schabi.newpipe.extractor.utils.Utils.removeNumber; /** - * Test for {@link } + * Test for {@link YoutubeChannelExtractor} * and specifically YoutubeChannelExtractor.getSubscriberCount() * in all the languages supported by YouTube. * Takes a long time because we need to test make 146 requests to YouTube * DON'T RUN ON MOBILE DATA - */ - -/* -pattern for functions name: -testlangcodeRegionabbreviation -eg: -testenk = english thousand -testfrCam = French (Canada) million -testzhTwk = Chinese (Taiwan) thousand + *

    + * pattern for functions name: + * testlangcodeRegionabbreviation (Region is optional) + * eg: + * testenk = english thousand + * testfrCam = French (Canada) million + * testzhTwk = Chinese (Taiwan) thousand */ /* @@ -39,7 +38,7 @@ (it could be reCAPTCHAs) but they, as of today (2020-02-16) success if you run each one. To run the test (to investigate maybe future problems), -You should create public Document getDoc() { return this.doc;} method in YoutubeChannelExtractor. +You should temporarily create « public Document getDoc() { return this.doc;} » method in YoutubeChannelExtractor. */ /* @@ -1178,3 +1177,4 @@ public void build() throws IOException, ExtractionException, InterruptedExceptio System.out.println(); } } +*/ \ No newline at end of file From e51cd2a79be44a072753007d41e1c92316c3fdda Mon Sep 17 00:00:00 2001 From: bopol Date: Mon, 17 Feb 2020 12:01:48 +0100 Subject: [PATCH 06/15] address suggestino on AbbreviationHelper and related --- .../localization/AbbreviationHelper.java | 151 +++++++++++++++++ .../extractor/utils/AbbreviationHashMap.java | 156 ------------------ .../schabi/newpipe/extractor/utils/Utils.java | 27 +-- .../youtube/YoutubeSubscriberTest.java | 9 +- 4 files changed, 173 insertions(+), 170 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java delete mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java new file mode 100644 index 0000000000..3f581b1b56 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java @@ -0,0 +1,151 @@ +package org.schabi.newpipe.extractor.localization; + +import java.util.HashMap; + + +/** + * Map matching abbreviations with their English equivalents + * + * Created by B0pol on 2020-02-16. + * + *

    + * By using this map, you can replace the abbreviations used for numbers in the 80 languages supported by YouTube + * With their English equivalent. + *

    + *

    + * Some language use more abbreviations for numbers: east-asian languages have abbreviations for ten thousand, + * and hundred million, indo-arabic languages have abbreviations for a hundred thousand and ten million, + * then we replace ten thousand by {@link #tenThousandAbbreviation}, + * hundred thousand by {@link #hundredThousandAbbreviation}, + * ten million by {@link #tenMillionAbbreviation}, + * hundred million by {@link #hundredMillionAbbreviation}. + *

    + *

    + * The languages using the abbreviation is commented with the language code at the left. + * @see Wikipedia page of language codes + *

    + */ +public class AbbreviationHelper { + + //should be safe until someone has 1 billion subscribers on YouTube + public static final HashMap abbreviationSubscribersCount = new HashMap<>(); + public static final String englishMillionAbbreviation = "M"; + public static final String englishThousandAbbreviation = "K"; + + public static final String tenThousandAbbreviation = "万"; + public static final String hundredThousandAbbreviation = "ল"; + public static final String tenMillionAbbreviation = "ক"; + public static final String hundredMillionAbbreviation = "億"; + + static { + abbreviationSubscribersCount.put(englishThousandAbbreviation, englishThousandAbbreviation); //az, iw, en, ro + abbreviationSubscribersCount.put(englishMillionAbbreviation, englishMillionAbbreviation); //iw, en, ca, es, eu, and many more + abbreviationSubscribersCount.put(tenMillionAbbreviation, tenMillionAbbreviation); + + abbreviationSubscribersCount.put("k", englishThousandAbbreviation); //af, no + abbreviationSubscribersCount.put("ሺ", englishThousandAbbreviation); //am + abbreviationSubscribersCount.put("ألف", englishThousandAbbreviation); //ar + abbreviationSubscribersCount.put("тыс", englishThousandAbbreviation); //be, ru + abbreviationSubscribersCount.put("хил", englishThousandAbbreviation); //bg + abbreviationSubscribersCount.put("হা", englishThousandAbbreviation); //bn + abbreviationSubscribersCount.put("hilj", englishThousandAbbreviation); //bs, sr + abbreviationSubscribersCount.put("tis", englishThousandAbbreviation); //cs, hr, sk, sl + abbreviationSubscribersCount.put("χιλ", englishThousandAbbreviation); //el + abbreviationSubscribersCount.put("tuh", englishThousandAbbreviation); //et + abbreviationSubscribersCount.put("هزار", englishThousandAbbreviation); //fa + abbreviationSubscribersCount.put("t", englishThousandAbbreviation); //fi + abbreviationSubscribersCount.put("હજાર", englishThousandAbbreviation); //gu + abbreviationSubscribersCount.put("हज़ार", englishThousandAbbreviation); //hi + abbreviationSubscribersCount.put("E", englishThousandAbbreviation); //hu + abbreviationSubscribersCount.put("հզր", englishThousandAbbreviation); //hy + abbreviationSubscribersCount.put("rb", englishThousandAbbreviation); //id + abbreviationSubscribersCount.put("þ", englishThousandAbbreviation); //is + abbreviationSubscribersCount.put("ათ", englishThousandAbbreviation); //ka + abbreviationSubscribersCount.put("мың", englishThousandAbbreviation); //kk + abbreviationSubscribersCount.put("ពាន់", englishThousandAbbreviation); //km + abbreviationSubscribersCount.put("ಸಾ", englishThousandAbbreviation); //kn + abbreviationSubscribersCount.put("천", englishThousandAbbreviation); //ko + abbreviationSubscribersCount.put("миң", englishThousandAbbreviation); //ky + abbreviationSubscribersCount.put("ກີບ", englishThousandAbbreviation); //lo + abbreviationSubscribersCount.put("ພັນ", englishThousandAbbreviation); //lo + abbreviationSubscribersCount.put("tūkst", englishThousandAbbreviation); //lt, lv + abbreviationSubscribersCount.put("илј", englishThousandAbbreviation); //mk + abbreviationSubscribersCount.put("мянга", englishThousandAbbreviation); //mn + abbreviationSubscribersCount.put("ह", englishThousandAbbreviation); //mr + abbreviationSubscribersCount.put("ထောင်", englishThousandAbbreviation); //my + abbreviationSubscribersCount.put("हजार", englishThousandAbbreviation); //ne + abbreviationSubscribersCount.put("ਹਜ਼ਾਰ", englishThousandAbbreviation); //pa + abbreviationSubscribersCount.put("tys", englishThousandAbbreviation); //pl + abbreviationSubscribersCount.put("ද", englishThousandAbbreviation); //si + abbreviationSubscribersCount.put("mijë", englishThousandAbbreviation); //sq + abbreviationSubscribersCount.put("хиљ", englishThousandAbbreviation); //sr-Latn + abbreviationSubscribersCount.put("elfu", englishThousandAbbreviation); //sw + abbreviationSubscribersCount.put("ஆ", englishThousandAbbreviation); //ta + abbreviationSubscribersCount.put("వే", englishThousandAbbreviation); //te + abbreviationSubscribersCount.put("พัน", englishThousandAbbreviation); //th + abbreviationSubscribersCount.put("B", englishThousandAbbreviation); //tr + abbreviationSubscribersCount.put("тис", englishThousandAbbreviation); //uk + abbreviationSubscribersCount.put("ہزار", englishThousandAbbreviation); //ur + abbreviationSubscribersCount.put("ming", englishThousandAbbreviation); //uz + abbreviationSubscribersCount.put("N", englishThousandAbbreviation); //vi + + abbreviationSubscribersCount.put("m", englishMillionAbbreviation); //af, is + abbreviationSubscribersCount.put(" م", englishMillionAbbreviation); //an + abbreviationSubscribersCount.put("ሜ", englishMillionAbbreviation); //am + abbreviationSubscribersCount.put("ሜትር", englishMillionAbbreviation); //am + abbreviationSubscribersCount.put("مليون", englishMillionAbbreviation); //ar + abbreviationSubscribersCount.put("mln", englishMillionAbbreviation); //az, et, lt, nl, pl, sq, uz + abbreviationSubscribersCount.put("млн", englishMillionAbbreviation); //be, bg, kk, ky, ru, uk + abbreviationSubscribersCount.put("mil", englishMillionAbbreviation); //bs, cs, hr, ro, sk, sr-Latn + abbreviationSubscribersCount.put("mio", englishMillionAbbreviation); //da, sl + abbreviationSubscribersCount.put("Mio", englishMillionAbbreviation); //de + abbreviationSubscribersCount.put("εκ", englishMillionAbbreviation); //el + abbreviationSubscribersCount.put("میلیون", englishMillionAbbreviation); //fa + abbreviationSubscribersCount.put("milj", englishMillionAbbreviation); //fi, lv + abbreviationSubscribersCount.put("մլն", englishMillionAbbreviation); //hy + abbreviationSubscribersCount.put("jt", englishMillionAbbreviation); //id + abbreviationSubscribersCount.put("Mln", englishMillionAbbreviation); //it + abbreviationSubscribersCount.put("მლნ", englishMillionAbbreviation); //ka + abbreviationSubscribersCount.put("លាន", englishMillionAbbreviation); //km + abbreviationSubscribersCount.put("ಮಿ", englishMillionAbbreviation); //kn + abbreviationSubscribersCount.put("ລ້ານ", englishMillionAbbreviation); //lo + abbreviationSubscribersCount.put("М", englishMillionAbbreviation); //mk + abbreviationSubscribersCount.put("мил", englishMillionAbbreviation); //mk, sr + abbreviationSubscribersCount.put("сая", englishMillionAbbreviation); //mn + abbreviationSubscribersCount.put("သန်း", englishMillionAbbreviation); //my + abbreviationSubscribersCount.put("mill", englishMillionAbbreviation); //no + abbreviationSubscribersCount.put("mi", englishMillionAbbreviation); //pt + abbreviationSubscribersCount.put("මි", englishMillionAbbreviation); //si + abbreviationSubscribersCount.put("mn", englishMillionAbbreviation); //sv + abbreviationSubscribersCount.put("మి", englishMillionAbbreviation); //te + abbreviationSubscribersCount.put("மி", englishMillionAbbreviation); //ta + abbreviationSubscribersCount.put("ล้าน", englishMillionAbbreviation); //th + abbreviationSubscribersCount.put("Mn", englishMillionAbbreviation); //tr + abbreviationSubscribersCount.put("Tr", englishMillionAbbreviation); //vi + + abbreviationSubscribersCount.put("만", tenThousandAbbreviation); //ko + abbreviationSubscribersCount.put("万", tenThousandAbbreviation); //ja, zh-CN + abbreviationSubscribersCount.put("萬", tenThousandAbbreviation); //zh-TW + + abbreviationSubscribersCount.put("লা", hundredThousandAbbreviation); //bn + abbreviationSubscribersCount.put("લાખ", hundredThousandAbbreviation); //gu + abbreviationSubscribersCount.put("लाख", hundredThousandAbbreviation); //hi, mr, ne + abbreviationSubscribersCount.put("ਲੱਖ", hundredThousandAbbreviation); //pa + abbreviationSubscribersCount.put("لاکھ", hundredThousandAbbreviation); //ur + abbreviationSubscribersCount.put("သိန်း", hundredThousandAbbreviation); //my + abbreviationSubscribersCount.put("แสน", hundredThousandAbbreviation); //th + + abbreviationSubscribersCount.put("কো", tenMillionAbbreviation); //bn + abbreviationSubscribersCount.put("કરોડ", tenMillionAbbreviation); //gu + abbreviationSubscribersCount.put("क॰", tenMillionAbbreviation); //hi + abbreviationSubscribersCount.put("कोटी", tenMillionAbbreviation); //mr + abbreviationSubscribersCount.put("ကုဋေ", tenMillionAbbreviation); //my + abbreviationSubscribersCount.put("करोड", tenMillionAbbreviation); //ne + abbreviationSubscribersCount.put("ਕਰੋੜ", tenMillionAbbreviation); //pa + abbreviationSubscribersCount.put("کروڑ", tenMillionAbbreviation); //ur + + abbreviationSubscribersCount.put("億", hundredMillionAbbreviation); //ja, zh-TW + abbreviationSubscribersCount.put("억", hundredMillionAbbreviation); //ko + abbreviationSubscribersCount.put("亿", hundredMillionAbbreviation); //zh-CN + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java deleted file mode 100644 index d0ca785159..0000000000 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/AbbreviationHashMap.java +++ /dev/null @@ -1,156 +0,0 @@ -package org.schabi.newpipe.extractor.utils; - -import java.util.HashMap; - - -/** - * Map matching abbreviations with their English equivalents - * - * @author B0pol - *

    - * By using this map, you can replace the abbreviations used for numbers in the 80 languages supported by YouTube - * With their English equivalent. - *

    - *

    - * Some language use more abbreviations for numbers: east-asian languages have abbreviations for ten thousand, - * and hundred million, indo-arabic languages have abbreviations for a hundred thousand and ten million, - * then we replace ten thousand by {@link #tenThousandAbbreviation}, - * hundred thousand by {@link #hundredThousandAbbreviation}, - * ten million by {@link #tenMillionAbbreviation}, - * hundred million by {@link #hundredMillionAbbreviation}. - *

    - *

    - * The languages using the abbreviation is commented with the language code at the left. - * @see Wikipedia page of language codes - *

    - */ -public class AbbreviationHashMap { - - //should be safe until someone has 1 billion subscribers on YouTube - public static final HashMap abbreviationSubscribersCount = new HashMap<>(); - public static final String englishMillionAbbreviation = "M"; - public static final String englishThousandAbbreviation = "K"; - - public static final String tenThousandAbbreviation = "万"; - public static final String hundredThousandAbbreviation = "ল"; - public static final String tenMillionAbbreviation = "ক"; - public static final String hundredMillionAbbreviation = "億"; - - static { - abbreviationSubscribersCount.put(englishThousandAbbreviation, englishThousandAbbreviation); //az, iw, en - abbreviationSubscribersCount.put(englishMillionAbbreviation, englishMillionAbbreviation); //iw, en - abbreviationSubscribersCount.put(tenMillionAbbreviation, tenMillionAbbreviation); - - abbreviationSubscribersCount.put(" k", englishThousandAbbreviation); //af - abbreviationSubscribersCount.put(" ሺ", englishThousandAbbreviation); //am - abbreviationSubscribersCount.put(" ألف", englishThousandAbbreviation); //ar - abbreviationSubscribersCount.put(" тыс", englishThousandAbbreviation); //be, ru - abbreviationSubscribersCount.put(" хил", englishThousandAbbreviation); //bg - abbreviationSubscribersCount.put(" হা", englishThousandAbbreviation); //bn - abbreviationSubscribersCount.put(" hilj", englishThousandAbbreviation); //bs, sr - abbreviationSubscribersCount.put("m", englishThousandAbbreviation); //ca - abbreviationSubscribersCount.put(" tis", englishThousandAbbreviation); //cs, hr, sk, sl - abbreviationSubscribersCount.put(" χιλ", englishThousandAbbreviation); //el - abbreviationSubscribersCount.put(" tuh", englishThousandAbbreviation); //et - abbreviationSubscribersCount.put(" هزار", englishThousandAbbreviation); //fa - abbreviationSubscribersCount.put(" t", englishThousandAbbreviation); //fi - abbreviationSubscribersCount.put(" હજાર", englishThousandAbbreviation); //gu - abbreviationSubscribersCount.put(" हज़ार", englishThousandAbbreviation); //hi - abbreviationSubscribersCount.put(" E", englishThousandAbbreviation); //hu - abbreviationSubscribersCount.put(" հզր", englishThousandAbbreviation); //hy - abbreviationSubscribersCount.put(" rb", englishThousandAbbreviation); //id - abbreviationSubscribersCount.put(" þ", englishThousandAbbreviation); //is - abbreviationSubscribersCount.put(" ათ", englishThousandAbbreviation); //ka - abbreviationSubscribersCount.put(" мың", englishThousandAbbreviation); //kk - abbreviationSubscribersCount.put("ពាន់", englishThousandAbbreviation); //km - abbreviationSubscribersCount.put(" ពាន់", englishThousandAbbreviation); //km - abbreviationSubscribersCount.put("ಸಾ", englishThousandAbbreviation); //kn - abbreviationSubscribersCount.put("천", englishThousandAbbreviation); //ko - abbreviationSubscribersCount.put(" миң", englishThousandAbbreviation); //ky - abbreviationSubscribersCount.put(" ກີບ", englishThousandAbbreviation); //lo - abbreviationSubscribersCount.put(" ພັນ", englishThousandAbbreviation); //lo - abbreviationSubscribersCount.put(" tūkst", englishThousandAbbreviation); //lt, lv - abbreviationSubscribersCount.put(" илј", englishThousandAbbreviation); //mk - abbreviationSubscribersCount.put(" мянга", englishThousandAbbreviation); //mn - abbreviationSubscribersCount.put(" ह", englishThousandAbbreviation); //mr - abbreviationSubscribersCount.put("ထောင်", englishThousandAbbreviation); //my - abbreviationSubscribersCount.put(" हजार", englishThousandAbbreviation); //ne - abbreviationSubscribersCount.put("k", englishThousandAbbreviation); //no - abbreviationSubscribersCount.put(" ਹਜ਼ਾਰ", englishThousandAbbreviation); //pa - abbreviationSubscribersCount.put(" tys", englishThousandAbbreviation); //pl - abbreviationSubscribersCount.put(" K", englishThousandAbbreviation); //ro - abbreviationSubscribersCount.put("ද", englishThousandAbbreviation); //si - abbreviationSubscribersCount.put(" mijë", englishThousandAbbreviation); //sq - abbreviationSubscribersCount.put(" хиљ", englishThousandAbbreviation); //sr-Latn - abbreviationSubscribersCount.put("elfu ", englishThousandAbbreviation); //sw - abbreviationSubscribersCount.put("ஆ", englishThousandAbbreviation); //ta - abbreviationSubscribersCount.put("వే", englishThousandAbbreviation); //te - abbreviationSubscribersCount.put(" พัน", englishThousandAbbreviation); //th - abbreviationSubscribersCount.put(" B", englishThousandAbbreviation); //tr - abbreviationSubscribersCount.put(" тис", englishThousandAbbreviation); //uk - abbreviationSubscribersCount.put(" ہزار", englishThousandAbbreviation); //ur - abbreviationSubscribersCount.put(" ming", englishThousandAbbreviation); //uz - abbreviationSubscribersCount.put(" N", englishThousandAbbreviation); //vi - - abbreviationSubscribersCount.put(" m", englishMillionAbbreviation); //af, is - abbreviationSubscribersCount.put(" م", englishMillionAbbreviation); //an - abbreviationSubscribersCount.put("ሜ", englishMillionAbbreviation); //am - abbreviationSubscribersCount.put(" ሜትር", englishMillionAbbreviation); //am - abbreviationSubscribersCount.put(" مليون", englishMillionAbbreviation); //ar - abbreviationSubscribersCount.put(" mln", englishMillionAbbreviation); //az, et, lt, nl, pl, sq, uz - abbreviationSubscribersCount.put(" млн", englishMillionAbbreviation); //be, bg, kk, ky, ru, uk - abbreviationSubscribersCount.put(" mil", englishMillionAbbreviation); //bs, cs, hr, ro, sk, sr-Latn - abbreviationSubscribersCount.put(" M", englishMillionAbbreviation); //ca, es, eu, and many more - abbreviationSubscribersCount.put(" mio", englishMillionAbbreviation); //da, sl - abbreviationSubscribersCount.put(" Mio", englishMillionAbbreviation); //de - abbreviationSubscribersCount.put(" εκ", englishMillionAbbreviation); //el - abbreviationSubscribersCount.put(" میلیون", englishMillionAbbreviation); //fa - abbreviationSubscribersCount.put(" milj", englishMillionAbbreviation); //fi, lv - abbreviationSubscribersCount.put(" մլն", englishMillionAbbreviation); //hy - abbreviationSubscribersCount.put(" jt", englishMillionAbbreviation); //id - abbreviationSubscribersCount.put(" Mln", englishMillionAbbreviation); //it - abbreviationSubscribersCount.put(" მლნ", englishMillionAbbreviation); //ka - abbreviationSubscribersCount.put(" លាន", englishMillionAbbreviation); //km - abbreviationSubscribersCount.put("ಮಿ", englishMillionAbbreviation); //kn - abbreviationSubscribersCount.put(" ລ້ານ", englishMillionAbbreviation); //lo - abbreviationSubscribersCount.put(" М", englishMillionAbbreviation); //mk. It isn't a space but a - // narrow non-breaking space, ie U+202F - abbreviationSubscribersCount.put(" мил", englishMillionAbbreviation); //mk, sr - abbreviationSubscribersCount.put(" сая", englishMillionAbbreviation); //mn - abbreviationSubscribersCount.put("သန်း", englishMillionAbbreviation); //my - abbreviationSubscribersCount.put(" mill", englishMillionAbbreviation); //no - abbreviationSubscribersCount.put(" mi", englishMillionAbbreviation); //pt - abbreviationSubscribersCount.put("මි", englishMillionAbbreviation); //si - abbreviationSubscribersCount.put(" mn", englishMillionAbbreviation); //sv - abbreviationSubscribersCount.put("మి", englishMillionAbbreviation); //te - abbreviationSubscribersCount.put("மி", englishMillionAbbreviation); //ta - abbreviationSubscribersCount.put(" ล้าน", englishMillionAbbreviation); //th - abbreviationSubscribersCount.put(" Mn", englishMillionAbbreviation); //tr - abbreviationSubscribersCount.put(" Tr", englishMillionAbbreviation); //vi - - abbreviationSubscribersCount.put("만", tenThousandAbbreviation); //ko - abbreviationSubscribersCount.put("万", tenThousandAbbreviation); //ja, zh-CN - abbreviationSubscribersCount.put("萬", tenThousandAbbreviation); //zh-TW - - abbreviationSubscribersCount.put(" লা", hundredThousandAbbreviation); //bn - abbreviationSubscribersCount.put(" લાખ", hundredThousandAbbreviation); //gu - abbreviationSubscribersCount.put(" लाख", hundredThousandAbbreviation); //hi, mr, ne - abbreviationSubscribersCount.put(" ਲੱਖ", hundredThousandAbbreviation); //pa - abbreviationSubscribersCount.put(" لاکھ", hundredThousandAbbreviation); //ur - abbreviationSubscribersCount.put("သိန်း", hundredThousandAbbreviation); //my - abbreviationSubscribersCount.put(" แสน", hundredThousandAbbreviation); //th - - abbreviationSubscribersCount.put(" কো", tenMillionAbbreviation); //bn - abbreviationSubscribersCount.put(" કરોડ", tenMillionAbbreviation); //gu - abbreviationSubscribersCount.put(" क॰", tenMillionAbbreviation); //hi - abbreviationSubscribersCount.put(" कोटी", tenMillionAbbreviation); //mr - abbreviationSubscribersCount.put("ကုဋေ", tenMillionAbbreviation); //my - abbreviationSubscribersCount.put(" करोड", tenMillionAbbreviation); //ne - abbreviationSubscribersCount.put(" ਕਰੋੜ", tenMillionAbbreviation); //pa - abbreviationSubscribersCount.put(" کروڑ", tenMillionAbbreviation); //ur - - abbreviationSubscribersCount.put("億", hundredMillionAbbreviation); //ja, zh-TW - abbreviationSubscribersCount.put("억", hundredMillionAbbreviation); //ko - abbreviationSubscribersCount.put("亿", hundredMillionAbbreviation); //zh-CN - } -} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 45d457fa01..38a12ff708 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -9,7 +9,7 @@ import java.net.URLDecoder; import java.util.List; -import static org.schabi.newpipe.extractor.utils.AbbreviationHashMap.abbreviationSubscribersCount; +import static org.schabi.newpipe.extractor.localization.AbbreviationHelper.abbreviationSubscribersCount; public class Utils { @@ -94,30 +94,37 @@ public static long mixedNumberWordToLong(String numberWord) throws NumberFormatE } //does the same as the function above, but for the 80 languages supported by YouTube. - public static long mixedNumberWordToLong(String numberWord, Localization loc) throws NumberFormatException, ParsingException { + public static long mixedNumberWordToLong(String numberWord, Localization loc) throws ParsingException { + numberWord = numberWord.replaceAll("(\\s| | )", ""); //remove whitespaces String langCode = loc.getLanguageCode(); String abbreviation = removeNumber(numberWord); - //special case for portugal, " mil" is the abbreviation for thousand, but is Million for many other languages - if (langCode.equals("pt") && abbreviation.equals(" mil")) { - numberWord = numberWord.replace(" mil", "K"); + //special case for portugal, "mil" is the abbreviation for thousand, but is Million for many other languages + if (langCode.equals("pt") && abbreviation.equals("mil")) { + numberWord = numberWord.replace("mil", "K"); + } else if (langCode.equals("ca") && abbreviation.equals("m")) { //same for catalan but for "m" + numberWord = numberWord.replace("m", "K"); } //special case for languages written right to left - else if (langCode.equals("sw") && abbreviation.equals("elfu ")) { - numberWord = moveAtRight("elfu ", numberWord); + else if (langCode.equals("sw") && abbreviation.equals("elfu")) { + numberWord = moveAtRight("elfu", numberWord); } else if (langCode.equals("si")) { numberWord = moveAtRight(abbreviation, numberWord); } - try { //special cases where it gives a number directly for some languages, or with a dot or a comma, or space - String maybeAlreadyNumber = numberWord.replaceAll("([ .,])", ""); //dot, comma or narrow non-breaking space, ie U+202Fw + try { //special cases where it gives a number directly for some languages, or with a dot or a comma + String maybeAlreadyNumber = numberWord.replaceAll("([.,])", ""); return Long.parseLong(maybeAlreadyNumber); } catch (NumberFormatException e) { //the number had an abbreviation, so it will be handled below } if (!langCode.equals("en")) { - numberWord = numberWord.replace(abbreviation, abbreviationSubscribersCount.get(abbreviation)); + try { + numberWord = numberWord.replace(abbreviation, abbreviationSubscribersCount.get(abbreviation)); + } catch (NullPointerException e) { + throw new ParsingException("The abbreviation \"" + abbreviation + "\" is missing in AbbreviationHelper map"); + } } return mixedNumberWordToLong(numberWord); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index 79b654ac63..e41bf28598 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -1,9 +1,9 @@ -/* package org.schabi.newpipe.extractor.services.youtube; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.NewPipe; @@ -33,7 +33,7 @@ */ /* -Commenting the whole file because otherwise it will slow down too much the CI test. +Ignoring the test because otherwise it will slow down too much the CI test. And also, often one up to three tests fail if you launch the whole tests, because some requests fail (it could be reCAPTCHAs) but they, as of today (2020-02-16) success if you run each one. @@ -41,7 +41,7 @@ To run the test (to investigate maybe future problems), You should temporarily create « public Document getDoc() { return this.doc;} » method in YoutubeChannelExtractor. */ -/* +@Ignore public class YoutubeSubscriberTest { private static final String channelThousand = "https://www.youtube.com/channel/UC_Fh8kvtkVPkeihBs42jGcA"; @@ -73,6 +73,7 @@ public static String getSubscriberCount(YoutubeChannelExtractor extractor) { } public static String getAbbreviation(String count) { + count = count.replaceAll("(\\s| | )", ""); return removeNumber(count); } @@ -1167,6 +1168,7 @@ public void testzum() throws IOException, ExtractionException { assertEquals(countMillion, extractor.getSubscriberCount()); } + @Ignore @Test public void build() throws IOException, ExtractionException, InterruptedException { Localization current = YouTube.getSupportedLocalizations().get(79); @@ -1177,4 +1179,3 @@ public void build() throws IOException, ExtractionException, InterruptedExceptio System.out.println(); } } -*/ \ No newline at end of file From 62effa08384dc1a2ac23b39cbc294324a976d761 Mon Sep 17 00:00:00 2001 From: bopol Date: Mon, 17 Feb 2020 12:24:13 +0100 Subject: [PATCH 07/15] resolve merge conflicts --- .../newpipe/extractor/localization/AbbreviationHelper.java | 3 ++- .../youtube/extractors/YoutubeChannelExtractor.java | 7 ++++--- .../services/youtube/YoutubeChannelExtractorTest.java | 3 ++- .../extractor/services/youtube/YoutubeSubscriberTest.java | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java index 3f581b1b56..131715516f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java @@ -5,7 +5,7 @@ /** * Map matching abbreviations with their English equivalents - * + *

    * Created by B0pol on 2020-02-16. * *

    @@ -22,6 +22,7 @@ *

    *

    * The languages using the abbreviation is commented with the language code at the left. + * * @see Wikipedia page of language codes *

    */ diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 19286bc609..c2c04f4c3b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -6,16 +6,13 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; -import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; @@ -52,6 +49,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { /*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/"; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; + public Document getDoc() { + return doc; + } + private Document doc; public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index d0c396f399..fbb4647ab6 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -7,6 +7,7 @@ import org.schabi.newpipe.extractor.ServiceList; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.services.BaseChannelExtractorTest; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; @@ -488,7 +489,7 @@ public void testSubscriberCount() throws Exception { } } - + public static class RandomChannel implements BaseChannelExtractorTest { private static YoutubeChannelExtractor extractor; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index e41bf28598..a3aea369c4 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -41,7 +41,7 @@ To run the test (to investigate maybe future problems), You should temporarily create « public Document getDoc() { return this.doc;} » method in YoutubeChannelExtractor. */ -@Ignore +//@Ignore public class YoutubeSubscriberTest { private static final String channelThousand = "https://www.youtube.com/channel/UC_Fh8kvtkVPkeihBs42jGcA"; From 9b612034c490dcca8d557f088fa32ab8486fc3d7 Mon Sep 17 00:00:00 2001 From: bopol Date: Mon, 17 Feb 2020 14:53:07 +0100 Subject: [PATCH 08/15] 3 more abbreviations found, improve ut method --- .../localization/AbbreviationHelper.java | 3 + .../youtube/YoutubeSubscriberTest.java | 70 +++++++++++++++++-- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java index 131715516f..9825a790c9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java @@ -102,6 +102,7 @@ public class AbbreviationHelper { abbreviationSubscribersCount.put("Mio", englishMillionAbbreviation); //de abbreviationSubscribersCount.put("εκ", englishMillionAbbreviation); //el abbreviationSubscribersCount.put("میلیون", englishMillionAbbreviation); //fa + abbreviationSubscribersCount.put("م", englishMillionAbbreviation); //fa abbreviationSubscribersCount.put("milj", englishMillionAbbreviation); //fi, lv abbreviationSubscribersCount.put("մլն", englishMillionAbbreviation); //hy abbreviationSubscribersCount.put("jt", englishMillionAbbreviation); //id @@ -127,6 +128,8 @@ public class AbbreviationHelper { abbreviationSubscribersCount.put("만", tenThousandAbbreviation); //ko abbreviationSubscribersCount.put("万", tenThousandAbbreviation); //ja, zh-CN abbreviationSubscribersCount.put("萬", tenThousandAbbreviation); //zh-TW + abbreviationSubscribersCount.put("သောင်း", tenThousandAbbreviation); //my + abbreviationSubscribersCount.put("หมื่น", tenThousandAbbreviation); //th abbreviationSubscribersCount.put("লা", hundredThousandAbbreviation); //bn abbreviationSubscribersCount.put("લાખ", hundredThousandAbbreviation); //gu diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index a3aea369c4..9d266867a6 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -15,6 +15,7 @@ import static org.junit.Assert.assertEquals; import static org.schabi.newpipe.extractor.ServiceList.YouTube; +import static org.schabi.newpipe.extractor.localization.AbbreviationHelper.abbreviationSubscribersCount; import static org.schabi.newpipe.extractor.utils.Utils.removeNumber; /** @@ -41,10 +42,33 @@ To run the test (to investigate maybe future problems), You should temporarily create « public Document getDoc() { return this.doc;} » method in YoutubeChannelExtractor. */ -//@Ignore +/* +Tested channels: +https://www.youtube.com/user/NeekoMonster, 1K +https://www.youtube.com/channel/UCZ8PPBXJmEW4UV53zm3pC_w, 8.64K +https://www.youtube.com/channel/UCgd-Ztt8sr_J7nSdz25FQgA, 51.6K +https://www.youtube.com/channel/UC_Fh8kvtkVPkeihBs42jGcA, 104K +https://invidio.us/channel/TroncheEnBiais, 181K +https://invidio.us/channel/UCsTK8xMZKkrbTeWc8REbn8Q, 651K +https://invidio.us/channel/UCYpRDnhk5H8h16jpS84uqsA, 744K + +https://invidio.us/channel/UClOeGHFiUlegRJFGhkMxoHg, 1.2M +https://www.youtube.com/user/lemondealenversvideo, 3M +https://www.youtube.com/channel/UC-J-KZfRV8c13fOCkhXdLiQ, 12.M +https://invidio.us/channel/BANGTANTV, 25M +https://www.youtube.com/user/zeemusiccompany, 50,5M +https://invidio.us/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw, 103M +https://www.youtube.com/user/tseries, 129M + +https://www.youtube.com/user/EminemVEVO/, disabled +The test fails (because of the ut function), but actually it's ok. +see testDisabled() + */ + +@Ignore public class YoutubeSubscriberTest { - private static final String channelThousand = "https://www.youtube.com/channel/UC_Fh8kvtkVPkeihBs42jGcA"; + private static final String channelThousand = "https://invidio.us/channel/UCq-8pBMM3I40QlrhM9ExXJQ "; private static final String channelMillion = "https://www.youtube.com/channel/UC-J-KZfRV8c13fOCkhXdLiQ"; private static long countMillion; private static long countThousand; @@ -81,6 +105,21 @@ public static String getAbbreviation(YoutubeChannelExtractor extractor) { return getAbbreviation(getSubscriberCount(extractor)); } + @Test + public void testDisabled() throws ExtractionException, IOException { + NewPipe.init(DownloaderTestImpl.getInstance(), Localization.DEFAULT); + YoutubeChannelExtractor en = (YoutubeChannelExtractor) YouTube.getChannelExtractor("https://www.youtube.com/user/EminemVEVO/"); + en.fetchPage(); + System.out.println(en.getSubscriberCount() + " " + en.getExtractorLocalization()); + + NewPipe.init(DownloaderTestImpl.getInstance(), Localization.fromLocalizationCode("ar")); + YoutubeChannelExtractor fr = (YoutubeChannelExtractor) YouTube.getChannelExtractor("https://www.youtube.com/user/EminemVEVO/"); + fr.fetchPage(); + System.out.println(fr.getSubscriberCount() + " " + fr.getExtractorLocalization()); + + assertEquals(fr.getSubscriberCount(), en.getSubscriberCount()); + } + public YoutubeChannelExtractor getExtractor(Localization loc, String channelUrl) throws ExtractionException, IOException { NewPipe.init(DownloaderTestImpl.getInstance(), loc); YoutubeChannelExtractor extractor = (YoutubeChannelExtractor) YouTube @@ -109,8 +148,29 @@ public YoutubeChannelExtractor getExtractorThousand(Localization loc) throws Ext public void ut(YoutubeChannelExtractor extractor) { String subscriberCount = getSubscriberCount(extractor); System.out.println(extractor.getExtractorLocalization() + ": " + subscriberCount); - System.out.println(getAbbreviation(subscriberCount)); -// System.out.println("abbreviation =\"" + getAbbreviation(getSubscriberCount(extractor)) + "\""); + String abbreviation = getAbbreviation(subscriberCount); + try { + abbreviation = abbreviation.replace(abbreviation, abbreviationSubscribersCount.get(abbreviation)); + } catch (NullPointerException e) { + if (!abbreviation.isEmpty()) { + try { + throw new Exception("this should be a real failed test. Abbreviation=\"" + abbreviation + "\"" + + "Localization :" + extractor.getExtractorLocalization()); + } catch (Exception ex) { + ex.printStackTrace(); //do that instead of adding "throws Exception" to signature, because otherwise I would have + //to edit the 146 signatures. + } + } else { + //see if it's not one of the languages giving a number directly + try { //special cases where it gives a number directly for some languages, or with a dot or a comma + String maybeAlreadyNumber = subscriberCount.replaceAll("([ ., ])", ""); + long count = Long.parseLong(maybeAlreadyNumber); + } catch (NumberFormatException i) { + System.out.println("The abbreviation is empty, this is probably a failed request" + + "Localization :" + extractor.getExtractorLocalization()); + } + } + } } public void buildthousand(Localization loc) throws IOException, ExtractionException { @@ -150,12 +210,14 @@ public void buildmillion(Localization loc) throws IOException, ExtractionExcepti @Test public void testafk() throws IOException, ExtractionException { YoutubeChannelExtractor extractor = getExtractor("k", new Localization("af")); + ut(extractor); assertEquals(countThousand, extractor.getSubscriberCount()); } @Test public void testafm() throws IOException, ExtractionException { YoutubeChannelExtractor extractor = getExtractor("m", new Localization("af")); + ut(extractor); assertEquals(countMillion, extractor.getSubscriberCount()); } From 88f9ab97d3aa3033cba8d6d50a8ffc62d7641a84 Mon Sep 17 00:00:00 2001 From: bopol Date: Mon, 17 Feb 2020 22:32:29 +0100 Subject: [PATCH 09/15] Refactored YouTubeSubscriberTest --- .../localization/AbbreviationHelper.java | 2 + .../extractors/YoutubeChannelExtractor.java | 4 - .../schabi/newpipe/extractor/utils/Utils.java | 6 +- .../youtube/YoutubeSubcriberTest.java | 209 +++ .../youtube/YoutubeSubscriberTest.java | 1243 ----------------- 5 files changed, 216 insertions(+), 1248 deletions(-) create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java delete mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java index 9825a790c9..f0a7c9edcf 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/AbbreviationHelper.java @@ -63,6 +63,7 @@ public class AbbreviationHelper { abbreviationSubscribersCount.put("þ", englishThousandAbbreviation); //is abbreviationSubscribersCount.put("ათ", englishThousandAbbreviation); //ka abbreviationSubscribersCount.put("мың", englishThousandAbbreviation); //kk + abbreviationSubscribersCount.put("м", englishThousandAbbreviation); //kk abbreviationSubscribersCount.put("ពាន់", englishThousandAbbreviation); //km abbreviationSubscribersCount.put("ಸಾ", englishThousandAbbreviation); //kn abbreviationSubscribersCount.put("천", englishThousandAbbreviation); //ko @@ -114,6 +115,7 @@ public class AbbreviationHelper { abbreviationSubscribersCount.put("М", englishMillionAbbreviation); //mk abbreviationSubscribersCount.put("мил", englishMillionAbbreviation); //mk, sr abbreviationSubscribersCount.put("сая", englishMillionAbbreviation); //mn + abbreviationSubscribersCount.put("J", englishMillionAbbreviation); //ms abbreviationSubscribersCount.put("သန်း", englishMillionAbbreviation); //my abbreviationSubscribersCount.put("mill", englishMillionAbbreviation); //no abbreviationSubscribersCount.put("mi", englishMillionAbbreviation); //pt diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index c2c04f4c3b..dc9ccc5ee3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -49,10 +49,6 @@ public class YoutubeChannelExtractor extends ChannelExtractor { /*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/"; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; - public Document getDoc() { - return doc; - } - private Document doc; public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 38a12ff708..b595d99bc2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -93,9 +93,13 @@ public static long mixedNumberWordToLong(String numberWord) throws NumberFormatE } } + public static String cleanWhiteSpaces(String s) { + return s.replaceAll("(\\s| | )", ""); + } + //does the same as the function above, but for the 80 languages supported by YouTube. public static long mixedNumberWordToLong(String numberWord, Localization loc) throws ParsingException { - numberWord = numberWord.replaceAll("(\\s| | )", ""); //remove whitespaces + numberWord = cleanWhiteSpaces(numberWord); String langCode = loc.getLanguageCode(); String abbreviation = removeNumber(numberWord); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java new file mode 100644 index 0000000000..9b9362952e --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java @@ -0,0 +1,209 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; +import org.junit.Ignore; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.downloader.Response; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.localization.Localization; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; +import static org.schabi.newpipe.extractor.localization.AbbreviationHelper.abbreviationSubscribersCount; +import static org.schabi.newpipe.extractor.utils.Utils.cleanWhiteSpaces; +import static org.schabi.newpipe.extractor.utils.Utils.removeNumber; + +/** + * A class that tests abbreviations and subscriber counts for all the languages YouTube supports. + */ +@Ignore("Should be ran manually from time to time, as it's too time consuming.") +public class YoutubeSubcriberTest { + + private static final String url = "https://www.youtube.com/feed/guide_builder"; + private static final int PAUSE_DURATION_EXTRACTORS = 250; + private static final int PAUSE_DURATION_ABBREVIATIONS = 125; + + public static String getAbbreviation(String count) { + return removeNumber(cleanWhiteSpaces(count)); + } + + public static void assertEqualsWithEnglish(String channelUrl) throws ExtractionException, IOException, InterruptedException { + NewPipe.init(DownloaderTestImpl.getInstance(), new Localization("en")); + YoutubeChannelExtractor extractorEnglish = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelUrl); + extractorEnglish.fetchPage(); + long englishSubCount = extractorEnglish.getSubscriberCount(); + Localization localization; + for (int z = 0; z < YouTube.getSupportedLocalizations().size(); z++) { + localization = YouTube.getSupportedLocalizations().get(z); + System.out.println("Current localization: " + localization); + NewPipe.init(DownloaderTestImpl.getInstance(), localization); + YoutubeChannelExtractor extractor = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelUrl); + extractor.fetchPage(); + + long subcriberCount = extractor.getSubscriberCount(); + assertEquals("language that failed:" + localization.toString() + ".\nWe", englishSubCount, subcriberCount); + Thread.sleep(PAUSE_DURATION_EXTRACTORS); + } + } + + public static void assertEqualsWithEnglish(String channelUrl, Localization loc) throws ExtractionException, IOException { + //for only one language + NewPipe.init(DownloaderTestImpl.getInstance(), new Localization("en")); + YoutubeChannelExtractor extractorEnglish = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelUrl); + extractorEnglish.fetchPage(); + long englishSubCount = extractorEnglish.getSubscriberCount(); + + NewPipe.init(DownloaderTestImpl.getInstance(), loc); + YoutubeChannelExtractor extractor = (YoutubeChannelExtractor) YouTube + .getChannelExtractor(channelUrl); + extractor.fetchPage(); + assertEquals(englishSubCount, extractor.getSubscriberCount()); + } + + public static void assertEqualsWithEnglish(String channelUrl, String languageCode) throws ExtractionException, IOException { + assertEqualsWithEnglish(channelUrl, new Localization(languageCode)); + + } + + public void runTest(Document doc, Localization localisation) throws ParsingException { + String currentSubscriberCountString; + String currentChannelName; + + Elements elements = doc.select(".yt-subscriber-count"); + for (int i = 0; i < elements.size(); i++) { + currentSubscriberCountString = doc.select(".yt-subscriber-count").get(i).attr("title"); + currentChannelName = doc.select(".yt-ui-ellipsis.yt-ui-ellipsis-2.yt-uix-sessionlink").get(i).attr("title"); + String abbreviation = getAbbreviation(currentSubscriberCountString); + try { + abbreviation = abbreviation.replace(abbreviation, abbreviationSubscribersCount.get(abbreviation)); + } catch (NullPointerException e) { + if (!abbreviation.isEmpty()) { + throw new ParsingException("This should be a real failed test. Abbreviation=\"" + abbreviation + "\"" + + "\nLocalization : " + localisation + + "\nOriginal string gathered from YouTube =\"" + currentSubscriberCountString + "\"" + + "\nTitle of the channel (probably wrong):" + currentChannelName); + } else { + //see if it's not one of the languages giving a number directly + try { //special cases where it gives a number directly for some languages, or with a dot or a comma + String maybeAlreadyNumber = currentSubscriberCountString.replaceAll("([ ., ])", ""); + long count = Long.parseLong(maybeAlreadyNumber); + } catch (NumberFormatException x) { + System.err.println("The abbreviation is empty, this is probably a failed request" + + "Localization :" + localisation); + } + } + } + } + } + + /* + ======================== + TESTS FOR ABBREVIATIONS + ======================== + */ + + @Test + public void testOneLanguageAbbreviations() throws IOException, ReCaptchaException, ParsingException, InterruptedException { + Localization loc = new Localization("ms"); + //change the value of loc if you wanna test a specific language. + + NewPipe.init(DownloaderTestImpl.getInstance(), loc); + Downloader dl = NewPipe.getDownloader(); + Response response = dl.get(url); + Document doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + + /* + Uncomment this if you want to view the html file in your browser, search for the subscriber count given by the Exception. + You'll get the real channel name (because the one given by Exception is often desynchronised). + run with your browser, and you'll see how much subscribers the channel have + then you can know the abbreviation given by the exception correspond to (eg a million, a thousand, 10 thousand…) + add it in the AbbreviationHelper.java map. + */ +// String pathToYTBTests = "src/test/java/org/schabi/newpipe/extractor/services/youtube/"; +// createFile(pathToYTBTests +"DELETEME_failTestYTBsubscriber" + loc.toString() + ".html", doc.toString()); + runTest(doc, loc); + } + + @Test + public void testAllLanguagesAbbreviations() throws IOException, ReCaptchaException, InterruptedException, ParsingException { + List docs = new ArrayList<>(); + int totalCount = 0; + Localization localization; + + for (int z = 0; z < YouTube.getSupportedLocalizations().size(); z++) { + localization = YouTube.getSupportedLocalizations().get(z); + System.out.println("Current localization: " + localization); + NewPipe.init(DownloaderTestImpl.getInstance(), localization); + Downloader dl = NewPipe.getDownloader(); + Response response = dl.get(url); + Document doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + docs.add(doc); + runTest(doc, localization); + totalCount += doc.select(".yt-subscriber-count").size(); + Thread.sleep(PAUSE_DURATION_ABBREVIATIONS); //slowed down a bit to decrease reCAPTCHAs rate and false negatives + } + System.out.println("docs size: " + docs.size()); + System.out.println("total count (should be around 112*80=8960)" + totalCount); + } + + /* + ======================== + TESTS WITH THE EXTRACTOR + There are often false positives (the test with all languages often fail, but if you try the failed language + it will be ok. Increase PAUSE_DURATION_EXTRACTORS to prevent false positives. + ======================== + */ + + @Test + public void testDisabled() throws IOException, ExtractionException, InterruptedException { + //every languages should give -1 + Localization localization; + for (int z = 0; z < YouTube.getSupportedLocalizations().size(); z++) { + localization = YouTube.getSupportedLocalizations().get(z); + System.out.println("Current localization: " + localization); + NewPipe.init(DownloaderTestImpl.getInstance(), localization); + YoutubeChannelExtractor extractor = (YoutubeChannelExtractor) YouTube + .getChannelExtractor("https://www.youtube.com/user/EminemVEVO/"); + extractor.fetchPage(); + + long subcriberCount = extractor.getSubscriberCount(); + assertEquals("language that failed:" + localization.toString() + ".\nWe", -1, subcriberCount); + Thread.sleep(PAUSE_DURATION_EXTRACTORS); + } + } + + @Test + public void testOneLanguageExtractor() throws ExtractionException, IOException { + assertEqualsWithEnglish("https://www.youtube.com/channel/UCPNxhDvTcytIdvwXWAm43cA", "ml"); + } + + @Test + public void testHighestSubsOnYoutube() throws ExtractionException, IOException, InterruptedException { + assertEqualsWithEnglish("https://www.youtube.com/user/tseries"); + } + + @Test + public void testKurzgesagt() throws InterruptedException, ExtractionException, IOException { + assertEqualsWithEnglish("https://www.youtube.com/user/Kurzgesagt"); + } + + @Test + public void testSelenaGomez() throws InterruptedException, ExtractionException, IOException { + assertEqualsWithEnglish("https://www.youtube.com/channel/UCPNxhDvTcytIdvwXWAm43cA"); + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java deleted file mode 100644 index 9d266867a6..0000000000 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ /dev/null @@ -1,1243 +0,0 @@ -package org.schabi.newpipe.extractor.services.youtube; - -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; -import org.schabi.newpipe.DownloaderTestImpl; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.localization.Localization; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; - -import java.io.IOException; - -import static org.junit.Assert.assertEquals; -import static org.schabi.newpipe.extractor.ServiceList.YouTube; -import static org.schabi.newpipe.extractor.localization.AbbreviationHelper.abbreviationSubscribersCount; -import static org.schabi.newpipe.extractor.utils.Utils.removeNumber; - -/** - * Test for {@link YoutubeChannelExtractor} - * and specifically YoutubeChannelExtractor.getSubscriberCount() - * in all the languages supported by YouTube. - * Takes a long time because we need to test make 146 requests to YouTube - * DON'T RUN ON MOBILE DATA - *

    - * pattern for functions name: - * testlangcodeRegionabbreviation (Region is optional) - * eg: - * testenk = english thousand - * testfrCam = French (Canada) million - * testzhTwk = Chinese (Taiwan) thousand - */ - -/* -Ignoring the test because otherwise it will slow down too much the CI test. -And also, often one up to three tests fail if you launch the whole tests, because some requests fail -(it could be reCAPTCHAs) but they, as of today (2020-02-16) success if you run each one. - -To run the test (to investigate maybe future problems), -You should temporarily create « public Document getDoc() { return this.doc;} » method in YoutubeChannelExtractor. - */ - -/* -Tested channels: -https://www.youtube.com/user/NeekoMonster, 1K -https://www.youtube.com/channel/UCZ8PPBXJmEW4UV53zm3pC_w, 8.64K -https://www.youtube.com/channel/UCgd-Ztt8sr_J7nSdz25FQgA, 51.6K -https://www.youtube.com/channel/UC_Fh8kvtkVPkeihBs42jGcA, 104K -https://invidio.us/channel/TroncheEnBiais, 181K -https://invidio.us/channel/UCsTK8xMZKkrbTeWc8REbn8Q, 651K -https://invidio.us/channel/UCYpRDnhk5H8h16jpS84uqsA, 744K - -https://invidio.us/channel/UClOeGHFiUlegRJFGhkMxoHg, 1.2M -https://www.youtube.com/user/lemondealenversvideo, 3M -https://www.youtube.com/channel/UC-J-KZfRV8c13fOCkhXdLiQ, 12.M -https://invidio.us/channel/BANGTANTV, 25M -https://www.youtube.com/user/zeemusiccompany, 50,5M -https://invidio.us/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw, 103M -https://www.youtube.com/user/tseries, 129M - -https://www.youtube.com/user/EminemVEVO/, disabled -The test fails (because of the ut function), but actually it's ok. -see testDisabled() - */ - -@Ignore -public class YoutubeSubscriberTest { - - private static final String channelThousand = "https://invidio.us/channel/UCq-8pBMM3I40QlrhM9ExXJQ "; - private static final String channelMillion = "https://www.youtube.com/channel/UC-J-KZfRV8c13fOCkhXdLiQ"; - private static long countMillion; - private static long countThousand; - - @BeforeClass - public static void setUp() throws Exception { - NewPipe.init(DownloaderTestImpl.getInstance(), Localization.DEFAULT); - - YoutubeChannelExtractor extractorMillion = (YoutubeChannelExtractor) YouTube - .getChannelExtractor(channelMillion); - extractorMillion.fetchPage(); - countMillion = extractorMillion.getSubscriberCount(); - - YoutubeChannelExtractor extractorThousand = (YoutubeChannelExtractor) YouTube - .getChannelExtractor(channelThousand); - extractorThousand.fetchPage(); - countThousand = extractorThousand.getSubscriberCount(); - } - - public static String getSubscriberCount(YoutubeChannelExtractor extractor) { - //fetches and return number abbreviation - //eg 26,8 k - Document doc = extractor.getDoc(); - Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); - return el.attr("title"); - } - - public static String getAbbreviation(String count) { - count = count.replaceAll("(\\s| | )", ""); - return removeNumber(count); - } - - public static String getAbbreviation(YoutubeChannelExtractor extractor) { - return getAbbreviation(getSubscriberCount(extractor)); - } - - @Test - public void testDisabled() throws ExtractionException, IOException { - NewPipe.init(DownloaderTestImpl.getInstance(), Localization.DEFAULT); - YoutubeChannelExtractor en = (YoutubeChannelExtractor) YouTube.getChannelExtractor("https://www.youtube.com/user/EminemVEVO/"); - en.fetchPage(); - System.out.println(en.getSubscriberCount() + " " + en.getExtractorLocalization()); - - NewPipe.init(DownloaderTestImpl.getInstance(), Localization.fromLocalizationCode("ar")); - YoutubeChannelExtractor fr = (YoutubeChannelExtractor) YouTube.getChannelExtractor("https://www.youtube.com/user/EminemVEVO/"); - fr.fetchPage(); - System.out.println(fr.getSubscriberCount() + " " + fr.getExtractorLocalization()); - - assertEquals(fr.getSubscriberCount(), en.getSubscriberCount()); - } - - public YoutubeChannelExtractor getExtractor(Localization loc, String channelUrl) throws ExtractionException, IOException { - NewPipe.init(DownloaderTestImpl.getInstance(), loc); - YoutubeChannelExtractor extractor = (YoutubeChannelExtractor) YouTube - .getChannelExtractor(channelUrl); - extractor.fetchPage(); - return extractor; - } - - public YoutubeChannelExtractor getExtractor(String type, Localization loc) throws ExtractionException, IOException { - if (type.equals("k")) { - return getExtractorThousand(loc); - } else if (type.equals("m")) { - return getExtractorMillion(loc); - } - return null; - } - - public YoutubeChannelExtractor getExtractorMillion(Localization loc) throws ExtractionException, IOException { - return getExtractor(loc, channelMillion); - } - - public YoutubeChannelExtractor getExtractorThousand(Localization loc) throws ExtractionException, IOException { - return getExtractor(loc, channelThousand); - } - - public void ut(YoutubeChannelExtractor extractor) { - String subscriberCount = getSubscriberCount(extractor); - System.out.println(extractor.getExtractorLocalization() + ": " + subscriberCount); - String abbreviation = getAbbreviation(subscriberCount); - try { - abbreviation = abbreviation.replace(abbreviation, abbreviationSubscribersCount.get(abbreviation)); - } catch (NullPointerException e) { - if (!abbreviation.isEmpty()) { - try { - throw new Exception("this should be a real failed test. Abbreviation=\"" + abbreviation + "\"" + - "Localization :" + extractor.getExtractorLocalization()); - } catch (Exception ex) { - ex.printStackTrace(); //do that instead of adding "throws Exception" to signature, because otherwise I would have - //to edit the 146 signatures. - } - } else { - //see if it's not one of the languages giving a number directly - try { //special cases where it gives a number directly for some languages, or with a dot or a comma - String maybeAlreadyNumber = subscriberCount.replaceAll("([ ., ])", ""); - long count = Long.parseLong(maybeAlreadyNumber); - } catch (NumberFormatException i) { - System.out.println("The abbreviation is empty, this is probably a failed request" + - "Localization :" + extractor.getExtractorLocalization()); - } - } - } - } - - public void buildthousand(Localization loc) throws IOException, ExtractionException { - String languageCode = loc.getLanguageCode(); - System.out.println(); - YoutubeChannelExtractor current = getExtractor("k", loc); - ut(current); - String abr = getAbbreviation(getSubscriberCount(current)); - System.out.println(" abbreviationSubscribersCount.put(\"" + abr + "\", englishThousandAbbreviation); //" + - languageCode); - String s = " @Test\n" + - " public void test" + languageCode + "k() throws IOException, ExtractionException {\n" + - " YoutubeChannelExtractor extractor = getExtractor(\"k\", new Localization(\"" + languageCode + - "\"));\n" + " ut(extractor);\n" + - " assertEquals(countThousand, extractor.getSubscriberCount());\n" + - " }"; - System.out.println(s + "\n"); - } - - public void buildmillion(Localization loc) throws IOException, ExtractionException { - String languageCode = loc.getLanguageCode(); - String s = " @Test\n" + - " public void test" + languageCode + "m() throws IOException, ExtractionException {\n" + - " YoutubeChannelExtractor extractor = getExtractor(\"m\", new Localization(\"" + languageCode + - "\"));\n" + " ut(extractor);\n" + - " assertEquals(countMillion, extractor.getSubscriberCount());\n" + - " }"; - System.out.println(s); - System.out.println(); - YoutubeChannelExtractor current = getExtractor("m", loc); - ut(current); - String abr = getAbbreviation(getSubscriberCount(current)); - System.out.println(" abbreviationSubscribersCount.put(\"" + abr + "\", englishMillionAbbreviation); //" + - languageCode); - } - - @Test - public void testafk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("af")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testafm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("af")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testamk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("am")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testamm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("am")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testark() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ar")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testarm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ar")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testazk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("az")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testazm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("az")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testbek() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("be")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testbem() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("be")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testbgk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("bg")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testbgm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("bg")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testbnk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("bn")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testbnm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("bn")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testbsk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("bs")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testbsm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("bs")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testcak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ca")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testcam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ca")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testcsk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("cs")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testcsm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("cs")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testdak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("da")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testdam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("da")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testdek() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("de")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testdem() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("de")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testelk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("el")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testelm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("el")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testes419k() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("es", "419")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testesUSk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("es", "US")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testesUSm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("es", "US")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testes419m() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("es", "419")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - - @Test - public void testesk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("es")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testesm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("es")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testetk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("et")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testetm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("et")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testeuk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("eu")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testeum() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("eu")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testfak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fa")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testfam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fa")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testfik() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fi")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testfim() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fi")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testfrk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fr")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testfrm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fr")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testfrCak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("fr", "CA")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testfrCam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("fr", "CA")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testglk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("gl")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testglm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("gl")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testguk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("gu")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testgum() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("gu")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testhik() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hi")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testhim() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hi")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testhrk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hr")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testhrm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hr")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testhuk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hu")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testhum() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hu")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testhyk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("hy")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testhym() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("hy")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testidk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("id")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testidm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("id")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testisk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("is")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testism() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("is")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testitk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("it")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testitm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("it")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testiwk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("iw")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testiwm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("iw")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testjak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ja")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testjam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ja")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testkak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ka")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testkam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ka")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testkmk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("km")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testkmm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("km")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testknk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("kn")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testknm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("kn")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testkok() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ko")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testkom() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ko")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testkyk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ky")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testkym() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ky")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testlok() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("lo")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testlom() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("lo")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testltk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("lt")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testltm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("lt")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testlvk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("lv")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testlvm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("lv")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testmkk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("mk")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testmkm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("mk")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testmnk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("mn")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testmnm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("mn")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testmrk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("mr")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testmrm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("mr")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testmyk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("my")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testmym() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("my")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testnek() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ne")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testnem() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ne")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testnlk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("nl")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testnlm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("nl")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testnok() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("no")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testnom() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("no")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testpak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("pa")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testpam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("pa")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testplk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("pl")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testplm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("pl")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testptk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("pt")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testptm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("pt")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testrok() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ro")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testrom() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ro")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testruk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ru")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testrum() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ru")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testsik() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("si")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testsim() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("si")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testskk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sk")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testskm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sk")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testslk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sl")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testslm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sl")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testsqk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sq")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testsqm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sq")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testsrk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sr")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testsrm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sr")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testsrLatnk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sr", "Latn")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testsrLatnm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sr", "Latn")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testsvk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sv")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testsvm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sv")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testswk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("sw")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testswm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("sw")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testtak() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ta")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testtam() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ta")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testtek() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("te")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testtem() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("te")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testthk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("th")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testthm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("th")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testtrk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("tr")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testtrm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("tr")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testukk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("uk")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testukm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("uk")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testurk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("ur")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testurm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("ur")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testuzk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("uz")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testuzm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("uz")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testvik() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("vi")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testvim() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("vi")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testzhCnk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zh", "CN")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testzhCnm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zh", "CN")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testzhHkk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zh", "HK")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testzhHkm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zh", "HK")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testzhTwk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zh", "TW")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testzhTwm() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zh", "TW")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Test - public void testzuk() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("k", new Localization("zu")); - ut(extractor); - assertEquals(countThousand, extractor.getSubscriberCount()); - } - - @Test - public void testzum() throws IOException, ExtractionException { - YoutubeChannelExtractor extractor = getExtractor("m", new Localization("zu")); - ut(extractor); - assertEquals(countMillion, extractor.getSubscriberCount()); - } - - @Ignore - @Test - public void build() throws IOException, ExtractionException, InterruptedException { - Localization current = YouTube.getSupportedLocalizations().get(79); - buildthousand(current); - buildmillion(current); - System.out.println(); - Thread.sleep(500); - System.out.println(); - } -} From ff6c6a899eb4ce10316386d9fd892b02a5f3c43c Mon Sep 17 00:00:00 2001 From: bopol Date: Mon, 17 Feb 2020 23:02:28 +0100 Subject: [PATCH 10/15] small reformatting --- .../org/schabi/newpipe/extractor/utils/Utils.java | 13 ++++++++++--- ...ubcriberTest.java => YoutubeSubscriberTest.java} | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) rename extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/{YoutubeSubcriberTest.java => YoutubeSubscriberTest.java} (98%) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index b595d99bc2..525805797d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -93,13 +93,20 @@ public static long mixedNumberWordToLong(String numberWord) throws NumberFormatE } } - public static String cleanWhiteSpaces(String s) { + public static String removeWhiteSpaces(String s) { return s.replaceAll("(\\s| | )", ""); } - //does the same as the function above, but for the 80 languages supported by YouTube. + /** + * Does the same as {@link #mixedNumberWordToLong(String)}, but for the 80 languages supported by YouTube. + * + * @param numberWord string to be converted to a long + * @param loc: a {@link Localization} + * @return a long + * @throws ParsingException + */ public static long mixedNumberWordToLong(String numberWord, Localization loc) throws ParsingException { - numberWord = cleanWhiteSpaces(numberWord); + numberWord = removeWhiteSpaces(numberWord); String langCode = loc.getLanguageCode(); String abbreviation = removeNumber(numberWord); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java similarity index 98% rename from extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java rename to extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index 9b9362952e..355f003140 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubcriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -22,21 +22,21 @@ import static org.junit.Assert.assertEquals; import static org.schabi.newpipe.extractor.ServiceList.YouTube; import static org.schabi.newpipe.extractor.localization.AbbreviationHelper.abbreviationSubscribersCount; -import static org.schabi.newpipe.extractor.utils.Utils.cleanWhiteSpaces; import static org.schabi.newpipe.extractor.utils.Utils.removeNumber; +import static org.schabi.newpipe.extractor.utils.Utils.removeWhiteSpaces; /** * A class that tests abbreviations and subscriber counts for all the languages YouTube supports. */ @Ignore("Should be ran manually from time to time, as it's too time consuming.") -public class YoutubeSubcriberTest { +public class YoutubeSubscriberTest { private static final String url = "https://www.youtube.com/feed/guide_builder"; private static final int PAUSE_DURATION_EXTRACTORS = 250; private static final int PAUSE_DURATION_ABBREVIATIONS = 125; public static String getAbbreviation(String count) { - return removeNumber(cleanWhiteSpaces(count)); + return removeNumber(removeWhiteSpaces(count)); } public static void assertEqualsWithEnglish(String channelUrl) throws ExtractionException, IOException, InterruptedException { From e52a86d0ea375f402226e39adcbd5e19753002b4 Mon Sep 17 00:00:00 2001 From: bopol Date: Wed, 19 Feb 2020 15:28:47 +0100 Subject: [PATCH 11/15] improve createFile func --- .../newpipe/extractor/utils/UtilsTest.java | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java index 151216c8db..9a2264f941 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java @@ -1,12 +1,17 @@ package org.schabi.newpipe.extractor.utils; +import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParserException; +import com.grack.nanojson.JsonWriter; import org.junit.Test; import org.schabi.newpipe.extractor.exceptions.ParsingException; import java.io.BufferedWriter; +import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; import static org.junit.Assert.assertEquals; @@ -21,8 +26,29 @@ public void testMixedNumberWordToLong() throws JsonParserException, ParsingExcep } public static void createFile(String path, String content) throws IOException { + String[] dirs = path.split("/"); + if (dirs.length > 1) { + String pathWithoutFileName = path.replace(dirs[dirs.length - 1], ""); + if (!Files.exists(Paths.get(pathWithoutFileName))) { //create dirs if they don't exist + new File(pathWithoutFileName).mkdirs(); + } + } + writeFile(path, content); + } + + //lower lever createFile. Doesn't create directories and takes only a String + public static void writeFile(String path, String content) throws IOException { BufferedWriter writer = new BufferedWriter(new FileWriter(path)); writer.write(content); + writer.flush(); writer.close(); } -} + + public static String jsonObjToString(JsonObject object) { + return JsonWriter.string(object); + } + + public static void createFile(String path, JsonObject content) throws IOException { + createFile(path, jsonObjToString(content)); + } +} \ No newline at end of file From 8e0c7254d17500264b19c119df09a1eb15cac8c2 Mon Sep 17 00:00:00 2001 From: bopol Date: Wed, 19 Feb 2020 16:35:05 +0100 Subject: [PATCH 12/15] refactor mixedWordtolong&improve YTBSubscriberTest It now doesn't fail the whole test if one language fail, but show an error on console. You may want to check individually the language that failed after the test, with testOneLanguageExtractor(). for mixedwordtolong, using power of tens may lead to a small rounding error. --- .../schabi/newpipe/extractor/utils/Utils.java | 14 +++++----- .../youtube/YoutubeSubscriberTest.java | 26 +++++++++++++------ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 525805797d..9ecddca6fd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -75,19 +75,19 @@ public static long mixedNumberWordToLong(String numberWord) throws NumberFormatE .replace(",", ".")); switch (multiplier.toUpperCase()) { case "K": - return (long) (count * 1e3); + return (long) (count * 1000); case "万": //10K, used by east-asian languages - return (long) (count * 1e4); + return (long) (count * 10_000); case "ল": //100K, used by indo-arabic languages - return (long) (count * 1e5); + return (long) (count * 100_000); case "M": - return (long) (count * 1e6); + return (long) (count * 1_000_000); case "ক": //10M, used by indo-arabic languages - return (long) (count * 1e7); + return (long) (count * 10_000_000); case "億": //100M, used by east-asian languages - return (long) (count * 1e8); + return (long) (count * 100_000_000); case "B": - return (long) (count * 1e9); + return (long) (count * 1_000_000_000); default: return (long) (count); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index 355f003140..63de817eca 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -55,7 +55,12 @@ public static void assertEqualsWithEnglish(String channelUrl) throws ExtractionE extractor.fetchPage(); long subcriberCount = extractor.getSubscriberCount(); - assertEquals("language that failed:" + localization.toString() + ".\nWe", englishSubCount, subcriberCount); + if (subcriberCount == -1) { + System.err.println("Subscriber count for " + localization.toString() + " was -1;\n" + + "If the channel doesn't have the subscribers disabled, it was probably a failed request"); + } else { + assertEquals("Language that failed:" + localization.toString() + ".\nWe", englishSubCount, subcriberCount); + } Thread.sleep(PAUSE_DURATION_EXTRACTORS); } } @@ -182,28 +187,33 @@ public void testDisabled() throws IOException, ExtractionException, InterruptedE extractor.fetchPage(); long subcriberCount = extractor.getSubscriberCount(); - assertEquals("language that failed:" + localization.toString() + ".\nWe", -1, subcriberCount); + assertEquals("Language that failed: " + localization.toString() + "\n We ", -1, subcriberCount); Thread.sleep(PAUSE_DURATION_EXTRACTORS); } } + //don't use invidious links, they take more time and the tests fail more + private static final String highestSubsUrl = "https://www.youtube.com/user/tseries"; + private static final String selenaGomezUrl = "https://www.youtube.com/channel/UCPNxhDvTcytIdvwXWAm43cA"; + private static final String franjoUrl = "https://www.youtube.com/channel/UC53gfTiWvslLPNuoDcoxmVg"; + @Test public void testOneLanguageExtractor() throws ExtractionException, IOException { - assertEqualsWithEnglish("https://www.youtube.com/channel/UCPNxhDvTcytIdvwXWAm43cA", "ml"); + assertEqualsWithEnglish(franjoUrl, "ms"); } @Test public void testHighestSubsOnYoutube() throws ExtractionException, IOException, InterruptedException { - assertEqualsWithEnglish("https://www.youtube.com/user/tseries"); + assertEqualsWithEnglish(highestSubsUrl); } @Test - public void testKurzgesagt() throws InterruptedException, ExtractionException, IOException { - assertEqualsWithEnglish("https://www.youtube.com/user/Kurzgesagt"); + public void testSelenaGomez() throws InterruptedException, ExtractionException, IOException { + assertEqualsWithEnglish(selenaGomezUrl); } @Test - public void testSelenaGomez() throws InterruptedException, ExtractionException, IOException { - assertEqualsWithEnglish("https://www.youtube.com/channel/UCPNxhDvTcytIdvwXWAm43cA"); + public void testFranjo() throws InterruptedException, ExtractionException, IOException { + assertEqualsWithEnglish(franjoUrl); } } From 4e91921e6b1649570105c5a10adffaea308e2ec0 Mon Sep 17 00:00:00 2001 From: bopol Date: Wed, 19 Feb 2020 17:42:59 +0100 Subject: [PATCH 13/15] debug branch --- .../extractor/services/youtube/YoutubeSubscriberTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java index 63de817eca..fb5b04a876 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriberTest.java @@ -186,8 +186,8 @@ public void testDisabled() throws IOException, ExtractionException, InterruptedE .getChannelExtractor("https://www.youtube.com/user/EminemVEVO/"); extractor.fetchPage(); - long subcriberCount = extractor.getSubscriberCount(); - assertEquals("Language that failed: " + localization.toString() + "\n We ", -1, subcriberCount); + long subscriberCount = extractor.getSubscriberCount(); + assertEquals("Language that failed: " + localization.toString() + "\n We ", -1, subscriberCount); Thread.sleep(PAUSE_DURATION_EXTRACTORS); } } From ea68770ee1d5ed8367081257274137ddb5371938 Mon Sep 17 00:00:00 2001 From: bopol Date: Fri, 21 Feb 2020 14:55:54 +0100 Subject: [PATCH 14/15] fix travis CI + some typos --- .../linkhandler/ListLinkHandlerFactory.java | 2 +- .../SearchQueryHandlerFactory.java | 12 +++++------ .../schabi/newpipe/extractor/utils/Utils.java | 2 +- .../YoutubeStreamExtractorDefaultTest.java | 21 +++++++++++-------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java index 9ea478b02c..7b04078c9b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java @@ -73,7 +73,7 @@ public ListLinkHandler fromQuery(String id, * however it should not be overridden by the actual implementation. * * @param id - * @return the url coresponding to id without any filters applied + * @return the url corresponding to id without any filters applied */ public String getUrl(String id) throws ParsingException { return getUrl(id, new ArrayList(0), ""); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/SearchQueryHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/SearchQueryHandlerFactory.java index 50977e20c1..d46670c9ba 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/SearchQueryHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/SearchQueryHandlerFactory.java @@ -12,7 +12,7 @@ public abstract class SearchQueryHandlerFactory extends ListLinkHandlerFactory { /////////////////////////////////// @Override - public abstract String getUrl(String querry, List contentFilter, String sortFilter) throws ParsingException; + public abstract String getUrl(String query, List contentFilter, String sortFilter) throws ParsingException; public String getSearchString(String url) { return ""; @@ -28,21 +28,21 @@ public String getId(String url) { } @Override - public SearchQueryHandler fromQuery(String querry, + public SearchQueryHandler fromQuery(String query, List contentFilter, String sortFilter) throws ParsingException { - return new SearchQueryHandler(super.fromQuery(querry, contentFilter, sortFilter)); + return new SearchQueryHandler(super.fromQuery(query, contentFilter, sortFilter)); } - public SearchQueryHandler fromQuery(String querry) throws ParsingException { - return fromQuery(querry, new ArrayList(0), ""); + public SearchQueryHandler fromQuery(String query) throws ParsingException { + return fromQuery(query, new ArrayList(0), ""); } /** * It's not mandatory for NewPipe to handle the Url * * @param url - * @return + * @return if we should accept the url */ @Override public boolean onAcceptUrl(String url) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 9ecddca6fd..bf5ba34010 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -101,7 +101,7 @@ public static String removeWhiteSpaces(String s) { * Does the same as {@link #mixedNumberWordToLong(String)}, but for the 80 languages supported by YouTube. * * @param numberWord string to be converted to a long - * @param loc: a {@link Localization} + * @param loc a {@link Localization} * @return a long * @throws ParsingException */ diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index 7309621f8a..bcc8b39dca 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -250,15 +250,18 @@ public void testGetDescription() throws ParsingException { @Test public void testGetFullLinksInDescription() throws ParsingException { - assertTrue(extractor.getDescription().getContent().contains("https://www.youtube.com/watch?v=X7FLCHVXpsA&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); - assertTrue(extractor.getDescription().getContent().contains("https://www.youtube.com/watch?v=Lqv6G0pDNnw&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); - assertTrue(extractor.getDescription().getContent().contains("https://www.youtube.com/watch?v=XxaRBPyrnBU&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); - assertTrue(extractor.getDescription().getContent().contains("https://www.youtube.com/watch?v=U-9tUEOFKNU&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); - - assertFalse(extractor.getDescription().getContent().contains("https://youtu.be/X7FLCHVXpsA?list=PL7...")); - assertFalse(extractor.getDescription().getContent().contains("https://youtu.be/Lqv6G0pDNnw?list=PL7...")); - assertFalse(extractor.getDescription().getContent().contains("https://youtu.be/XxaRBPyrnBU?list=PL7...")); - assertFalse(extractor.getDescription().getContent().contains("https://youtu.be/U-9tUEOFKNU?list=PL7...")); + Description description = extractor.getDescription(); + String content = description.getContent(); + if (description.getType() == Description.HTML) { + assertTrue(content.contains("https://www.youtube.com/watch?v=X7FLCHVXpsA&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + assertTrue(content.contains("https://www.youtube.com/watch?v=Lqv6G0pDNnw&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + assertTrue(content.contains("https://www.youtube.com/watch?v=XxaRBPyrnBU&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + assertTrue(content.contains("https://www.youtube.com/watch?v=U-9tUEOFKNU&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + } + assertFalse(content.contains("https://youtu.be/X7FLCHVXpsA?list=PL7...")); + assertFalse(content.contains("https://youtu.be/Lqv6G0pDNnw?list=PL7...")); + assertFalse(content.contains("https://youtu.be/XxaRBPyrnBU?list=PL7...")); + assertFalse(content.contains("https://youtu.be/U-9tUEOFKNU?list=PL7...")); } } From a60aa107ce7660642fcd336089c2fee7bbab3e75 Mon Sep 17 00:00:00 2001 From: bopol Date: Fri, 21 Feb 2020 16:34:17 +0100 Subject: [PATCH 15/15] actually fix testGetFullLinksInDescription see this: https://github.com/TeamNewPipe/NewPipeExtractor/pull/257#discussion_r382647356 --- .../stream/YoutubeStreamExtractorDefaultTest.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index bcc8b39dca..c31d9a06b5 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -253,11 +253,21 @@ public void testGetFullLinksInDescription() throws ParsingException { Description description = extractor.getDescription(); String content = description.getContent(); if (description.getType() == Description.HTML) { - assertTrue(content.contains("https://www.youtube.com/watch?v=X7FLCHVXpsA&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + //we should have full links + assertTrue(content.contains("https://www.youtube.com/watch?v=X7FLCHVXpsA&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); assertTrue(content.contains("https://www.youtube.com/watch?v=Lqv6G0pDNnw&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); assertTrue(content.contains("https://www.youtube.com/watch?v=XxaRBPyrnBU&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); assertTrue(content.contains("https://www.youtube.com/watch?v=U-9tUEOFKNU&list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + } else { + //type == PLAIN_TEXT, we should have shortened links + assertTrue(content.contains("https://youtu.be/X7FLCHVXpsA?list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + assertTrue(content.contains("https://youtu.be/Lqv6G0pDNnw?list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + assertTrue(content.contains("https://youtu.be/XxaRBPyrnBU?list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); + assertTrue(content.contains("https://youtu.be/U-9tUEOFKNU?list=PL7u4lWXQ3wfI_7PgX0C-VTiwLeu0S4v34")); } + + //we should NEVER have broken shortened links, that could be given by HTML + // YoutubeStreamExtractor.parseHtmlAndGetFullLinks fix the fact we could have broken link, so we test this function here. assertFalse(content.contains("https://youtu.be/X7FLCHVXpsA?list=PL7...")); assertFalse(content.contains("https://youtu.be/Lqv6G0pDNnw?list=PL7...")); assertFalse(content.contains("https://youtu.be/XxaRBPyrnBU?list=PL7..."));