diff --git a/.gitignore b/.gitignore index b61a59ef2..7a15b8f05 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ ripme.jar.update ripme.jar rip.properties history.json +.idea +*.iml diff --git a/src/main/java/com/rarchives/ripme/App.java b/src/main/java/com/rarchives/ripme/App.java index 408f9a8e7..e6d22aac6 100644 --- a/src/main/java/com/rarchives/ripme/App.java +++ b/src/main/java/com/rarchives/ripme/App.java @@ -3,6 +3,10 @@ import java.io.File; import java.io.FilenameFilter; import java.io.IOException; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileNotFoundException; + import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; @@ -138,26 +142,54 @@ public static void handleArguments(String[] args) { // change the default rips directory Utils.setConfigString("rips.directory", cl.getOptionValue('l')); } - if (cl.hasOption('u')) { - // User provided URL, rip it. + + if (cl.hasOption('f')) { + + String filename = cl.getOptionValue('f'); + try { - URL url = new URL(cl.getOptionValue('u').trim()); - rip(url); - List history = Utils.getConfigList("download.history"); - if (!history.contains(url.toExternalForm())) { - history.add(url.toExternalForm()); - Utils.setConfigList("download.history", Arrays.asList(history.toArray())); - if(!cl.hasOption("n")) { - Utils.saveConfig(); - } + + String url; + + BufferedReader br = new BufferedReader(new FileReader(filename)); + while((url = br.readLine()) != null) { + // loop through each url in the file and proces each url individually. + ripURL(url.trim(), cl.hasOption("n")); + } + + } catch (FileNotFoundException fne) { + logger.error("[!] File containing list of URLs not found. Cannot continue."); + } catch (IOException ioe) { + logger.error("[!] Failed reading file containing list of URLs. Cannot continue."); + } + + } + + if (cl.hasOption('u')) { + String url = cl.getOptionValue('u').trim(); + ripURL(url, cl.hasOption("n")); + } + } + + // this function will attempt to rip the provided url + public static void ripURL(String targetURL, boolean saveConfig) { + try { + URL url = new URL(targetURL); + rip(url); + List history = Utils.getConfigList("download.history"); + if (!history.contains(url.toExternalForm())) { + history.add(url.toExternalForm()); + Utils.setConfigList("download.history", Arrays.asList(history.toArray())); + if(saveConfig) { + Utils.saveConfig(); } - } catch (MalformedURLException e) { - logger.error("[!] Given URL is not valid. Expected URL format is http://domain.com/..."); - System.exit(-1); - } catch (Exception e) { - logger.error("[!] Error while ripping URL " + cl.getOptionValue('u'), e); - System.exit(-1); } + } catch (MalformedURLException e) { + logger.error("[!] Given URL is not valid. Expected URL format is http://domain.com/..."); + // System.exit(-1); + } catch (Exception e) { + logger.error("[!] Error while ripping URL " + targetURL, e); + // System.exit(-1); } } @@ -174,6 +206,7 @@ public static Options getOptions() { opts.addOption("4", "skip404", false, "Don't retry after a 404 (not found) error"); opts.addOption("l", "ripsdirectory", true, "Rips Directory (Default: ./rips)"); opts.addOption("n", "no-prop-file", false, "Do not create properties file."); + opts.addOption("f", "urls-file", true, "Rip URLs from a file."); return opts; } diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index 5b2698085..2d6143752 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -106,23 +106,9 @@ public boolean addURLToDownload(URL url, String prefix, String subdirectory, Str return false; } logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies); - String saveAs = url.toExternalForm(); - saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); - if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } - if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } - if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } - if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } File saveFileAs; try { - if (!subdirectory.equals("")) { - subdirectory = File.separator + subdirectory; - } - saveFileAs = new File( - workingDir.getCanonicalPath() - + subdirectory - + File.separator - + prefix - + saveAs); + saveFileAs = getSaveAsFile(url, prefix, subdirectory); } catch (IOException e) { logger.error("[!] Error creating save file path for URL '" + url + "':", e); return false; @@ -134,7 +120,26 @@ public boolean addURLToDownload(URL url, String prefix, String subdirectory, Str } return addURLToDownload(url, saveFileAs, referrer, cookies); } - + + protected File getSaveAsFile(URL url, String prefix, String subdirectory) throws IOException { + String saveAs = url.toExternalForm(); + saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } + if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } + if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } + if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + File saveFileAs; + if (!subdirectory.equals("")) { + subdirectory = File.separator + subdirectory; + } + saveFileAs = new File( + workingDir.getCanonicalPath() + + subdirectory + + File.separator + + prefix + + saveAs); + return saveFileAs; + } /** * Queues file to be downloaded and saved. With options. diff --git a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java index e8dbc6e32..8369673ad 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java @@ -1,10 +1,13 @@ package com.rarchives.ripme.ripper; +import com.rarchives.ripme.utils.Http; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -12,6 +15,7 @@ import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; +import org.jsoup.nodes.Document; public abstract class AlbumRipper extends AbstractRipper { @@ -19,6 +23,8 @@ public abstract class AlbumRipper extends AbstractRipper { protected Map itemsCompleted = Collections.synchronizedMap(new HashMap()); protected Map itemsErrored = Collections.synchronizedMap(new HashMap()); + protected HashMap docs = new HashMap(); + public AlbumRipper(URL url) throws IOException { super(url); } @@ -82,6 +88,18 @@ public boolean addURLToDownload(URL url, File saveAs, String referrer, Map cookies) { + File saveFileAs; + try { + saveFileAs = getSaveAsFile(url, prefix, subdirectory); + } catch (IOException e) { + logger.error("[!] Error creating save file path for URL '" + url + "':", e); + return false; + } + return addURLToDownload(url, saveFileAs, referrer, cookies); + } + @Override public boolean addURLToDownload(URL url, File saveAs) { return addURLToDownload(url, saveAs, null, null); @@ -210,4 +228,29 @@ public String getStatusText() { .append(", Errored: " ).append(itemsErrored.size()); return sb.toString(); } + + protected Document downloadAndSaveHTML(URL url) throws IOException { + String urlString = url.toExternalForm(); + Document doc = docs.get(urlString); + if (doc == null) { + doc = Http.url(url).header("User-Agent", USER_AGENT).referrer(url).cookies(Utils.getCookies(getHost())).get(); + docs.put(urlString, doc); + } + String filename = urlToFilename(url); + if (getWorkingDir() != null) { + Files.write(Paths.get(getWorkingDir().getCanonicalPath() + File.separator + filename), doc.toString().getBytes()); + } + return doc; + } + + protected static String urlToFilename(URL url) { + String filename = url.toExternalForm().replaceFirst("^https?://.*/", "").replaceFirst("[#&:].*$", ""); + if (filename.contains("?") && filename.contains(".")) { + int periodIdx = filename.lastIndexOf('.'); + int questionMarkIdx = filename.indexOf('?'); + String params = filename.substring(questionMarkIdx + 1).replaceAll("=", "-").replaceAll("&", "_"); + filename = filename.substring(0, periodIdx) + "_" + params + filename.substring(periodIdx, questionMarkIdx); + } + return filename; + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 058a368c7..17acd29db 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -29,26 +29,42 @@ * @author losipher */ public class EroShareRipper extends AbstractHTMLRipper { - + public static String HOST = "eroshare"; + public EroShareRipper (URL url) throws IOException { super(url); } @Override public String getDomain() { - return "eroshare.com"; + return HOST + ".com"; } @Override public String getHost() { - return "eroshare"; + return HOST; } @Override public void downloadURL(URL url, int index){ addURLToDownload(url); } - + + private static String cleanURL(String url) { + if (url.length() == 0) { + return ""; + } + if (url.startsWith("//")) { + url = "https:" + url; + } else { + String urlLower = url.toLowerCase(); + if (!urlLower.startsWith("http://") && !urlLower.startsWith("https://")) { + url = "https://" + url; + } + } + return url; + } + @Override public List getURLsFromPage(Document doc){ List URLs = new ArrayList(); @@ -57,8 +73,10 @@ public List getURLsFromPage(Document doc){ for (Element img : imgs){ if (img.hasClass("album-image")){ String imageURL = img.attr("src"); - imageURL = "https:" + imageURL; - URLs.add(imageURL); + imageURL = cleanURL(imageURL); + if (imageURL.length() > 0) { + URLs.add(imageURL); + } } } //Videos @@ -67,7 +85,10 @@ public List getURLsFromPage(Document doc){ if (vid.hasClass("album-video")){ Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(videoURL); + videoURL = cleanURL(videoURL); + if (videoURL.length() > 0) { + URLs.add(videoURL); + } } } @@ -87,7 +108,7 @@ public Document getFirstPage() throws IOException { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); + Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?.*$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); @@ -109,8 +130,10 @@ public static List getURLs(URL url) throws IOException{ for (Element img : imgs){ if (img.hasClass("album-image")){ String imageURL = img.attr("src"); - imageURL = "https:" + imageURL; - URLs.add(new URL(imageURL)); + imageURL = cleanURL(imageURL); + if (imageURL.length() > 0) { + URLs.add(new URL(imageURL)); + } } } //Videos @@ -119,10 +142,44 @@ public static List getURLs(URL url) throws IOException{ if (vid.hasClass("album-video")){ Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(new URL(videoURL)); + if (videoURL.length() > 0) { + URLs.add(new URL(videoURL)); + } } } return URLs; } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + String title = HOST; + Document doc = null; + try { + doc = downloadAndSaveHTML(url); + } catch (IOException e) { + title += "_" + getGID(url); + logger.error("Exception retrieving url=" + url + ": " + e.getMessage()); + } + if (doc != null) { + // Find username. + Element element = doc.select(".album-info-container a.avatar").first(); + if (element != null) { + title += "_" + (element.attr("href").length() > 0 ? element.attr("href").replaceAll("^/u/", "") : ""); + } else { + logger.warn("No username was found in the contents of url=" + url); + } + title += "_" + getGID(url); + element = doc.select(".album-info-container .center-mobile").first(); + if (element != null) { + title += "-" + element.text().replaceAll(" ", "_").replaceAll("[^a-zA-Z0-9_\\[\\]()\\.-]+", ""); + } else { + logger.warn("No title was found in the contents of url=" + url); + } + } else { + logger.warn("No username or title could be retrieved for url=" + url); + } + title = title.replaceAll("_+$", ""); + return title; + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 4df2b0ff3..1e41406e6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -37,14 +37,12 @@ public String getDomain() { @Override public boolean canRip(URL url) { - return (url.getHost().endsWith("instagram.com") - || url.getHost().endsWith("statigr.am") - || url.getHost().endsWith("iconosquare.com/")); + return (url.getHost().endsWith("instagram.com")); } @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$"); + Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); @@ -54,106 +52,70 @@ public String getGID(URL url) throws MalformedURLException { @Override public URL sanitizeURL(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://instagram\\.com/p/([a-zA-Z0-9\\-_.]{1,}).*$"); + Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]{3,}).*$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { - // Link to photo, not the user account - try { - url = getUserPageFromImage(url); - } catch (Exception e) { - logger.error("[!] Failed to get user page from " + url, e); - throw new MalformedURLException("Failed to retrieve user page from " + url); - } - } - p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]{3,}).*$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return new URL("http://iconosquare.com/" + m.group(1)); - } - p = Pattern.compile("^.*iconosquare\\.com/([a-zA-Z0-9\\-_.]{3,}).*$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return new URL("http://iconosquare.com/" + m.group(1)); - } - p = Pattern.compile("^.*statigr\\.am/([a-zA-Z0-9\\-_.]{3,}).*$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return new URL("http://iconosquare.com/" + m.group(1)); - } - throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url); - } - - private URL getUserPageFromImage(URL url) throws IOException { - Document doc = Http.url(url).get(); - for (Element element : doc.select("meta[property='og:description']")) { - String content = element.attr("content"); - if (content.endsWith("'s photo on Instagram")) { - return new URL("http://iconosquare/" + content.substring(0, content.indexOf("'"))); - } + return new URL("http://instagram.com/" + m.group(1)); } + throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url); } private String getUserID(URL url) throws IOException { - this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = Http.url(url).get(); - for (Element element : doc.select("input[id=user_public]")) { - return element.attr("value"); + + Pattern p = Pattern.compile("^https?://instagram\\.com/([^/]+)"); + Matcher m = p.matcher(url.toExternalForm()); + if(m.matches()) { + return m.group(1); } + throw new IOException("Unable to find userID at " + this.url); } @Override public JSONObject getFirstPage() throws IOException { userID = getUserID(url); - String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id=" - + userID; - logger.info("Loading " + baseURL); + + String baseURL = "http://instagram.com/" + userID + "/media"; try { JSONObject result = Http.url(baseURL).getJSON(); return result; } catch (JSONException e) { - throw new IOException("Could not get instagram user via iconosquare", e); + throw new IOException("Could not get instagram user via: " + baseURL); } } @Override public JSONObject getNextPage(JSONObject json) throws IOException { - if (isThisATest()) { - return null; - } - JSONObject pagination = json.getJSONObject("pagination"); - String nextMaxID = ""; - JSONArray datas = json.getJSONArray("data"); - for (int i = 0; i < datas.length(); i++) { - JSONObject data = datas.getJSONObject(i); - if (data.has("id")) { - nextMaxID = data.getString("id"); - } - } - if (nextMaxID.equals("")) { - if (!pagination.has("next_max_id")) { - throw new IOException("No next_max_id found, stopping"); - } - nextMaxID = pagination.getString("next_max_id"); + + boolean nextPageAvailable; + try { + nextPageAvailable = json.getBoolean("more_available"); + } catch (Exception e) { + throw new IOException("No additional pages found"); } - String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id=" - + userID - + "&max_id=" + nextMaxID; - logger.info("Loading " + baseURL); - sleep(1000); - JSONObject nextJSON = Http.url(baseURL).getJSON(); - datas = nextJSON.getJSONArray("data"); - if (datas.length() == 0) { + + if(nextPageAvailable) { + JSONArray items = json.getJSONArray("items"); + JSONObject last_item = items.getJSONObject(items.length() - 1); + String nextMaxID = last_item.getString("id"); + + String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID; + logger.info("Loading " + baseURL); + sleep(1000); + + JSONObject nextJSON = Http.url(baseURL).getJSON(); + + return nextJSON; + } else { throw new IOException("No more images found"); } - return nextJSON; } @Override public List getURLsFromJSON(JSONObject json) { List imageURLs = new ArrayList(); - JSONArray datas = json.getJSONArray("data"); + JSONArray datas = json.getJSONArray("items"); for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); String imageURL; @@ -166,6 +128,7 @@ public List getURLsFromJSON(JSONObject json) { } imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-"); imageURL = imageURL.replaceAll("s640x640/", ""); + imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", ""); imageURLs.add(imageURL); if (isThisATest()) { break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java index c6f9cab54..88ede8a67 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -1,8 +1,12 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -17,14 +21,15 @@ public class XhamsterRipper extends AlbumRipper { private static final String HOST = "xhamster"; + private static Pattern xhPattern = Pattern.compile("^https?://[a-z.]*" + HOST + "\\.com/photos/(?:gallery/([0-9]+).*|view/([0-9]+)-([0-9]+)\\.html(?:.*)?)$"); + public XhamsterRipper(URL url) throws IOException { super(url); } @Override public boolean canRip(URL url) { - Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/[0-9]+.*$"); - Matcher m = p.matcher(url.toExternalForm()); + Matcher m = xhPattern.matcher(url.toExternalForm()); return m.matches(); } @@ -35,22 +40,41 @@ public URL sanitizeURL(URL url) throws MalformedURLException { @Override public void rip() throws IOException { + if (isGallery(url)) { + ripGallery(); + } else { + ripPhoto(); + } + } + + private static boolean isGallery(URL url) { + Matcher m = xhPattern.matcher(url.toExternalForm()); + if (!m.matches()) { + return false; + } + return m.group(3) == null || m.group(3).length() == 0; // Is a gallery. + } + + private void ripPhoto() throws IOException { + Document doc = downloadAndSaveHTML(url); + for (Element element : doc.select("img#imgSized")) { + String image = cleanImageSrc(element.attr("src")); + addURLToDownload(new URL(image), "", "", url.toExternalForm(), Utils.getCookies(HOST)); + } + waitForThreads(); + } + + private void ripGallery() throws IOException { int index = 0; - String nextURL = this.url.toExternalForm(); + String nextURL = url.toExternalForm(); while (nextURL != null) { logger.info(" Retrieving " + nextURL); - Document doc = Http.url(nextURL).get(); + Document doc = downloadAndSaveHTML(new URL(nextURL)); for (Element thumb : doc.select("table.iListing div.img img")) { if (!thumb.hasAttr("src")) { continue; } - String image = thumb.attr("src"); - image = image.replaceAll( - "http://p[0-9]*\\.", - "http://up."); - image = image.replaceAll( - "_160\\.", - "_1000."); + String image = cleanImageSrc(thumb.attr("src")); index += 1; String prefix = ""; if (Utils.getConfigBoolean("download.save_order", true)) { @@ -73,6 +97,55 @@ public void rip() throws IOException { waitForThreads(); } + private String cleanImageSrc(String imageSrc) { + imageSrc = imageSrc.replaceAll("https?://p[0-9]*\\.", "https?://up."); + imageSrc = imageSrc.replaceAll("_160\\.", "_1000."); + return imageSrc; + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + String title = HOST + "_"; + Document doc = null; + try { + doc = downloadAndSaveHTML(url); + } catch (IOException e) { + logger.error("Exception retrieving url=" + url + ": " + e.getMessage()); + title += getGID(url); + } + if (doc != null) { + // Find username. + Element link = doc.select("#galleryUser .item a").first(); + if (link != null) { + title += link.text() + "_"; + } else { + logger.warn("No username was found in the contents of url=" + url); + } + } else { + logger.warn("No username could be retrieved for url=" + url); + } + String galleryLink = url.toExternalForm(); + if (!isGallery(url) && doc != null) { + for (Element link : doc.select("#viewBox a")) { + if (link != null) { + String href = link.attr("href"); + if (href.length() > 0 && !href.startsWith("#")) { + galleryLink = href; + break; + } + } + } + if (galleryLink == url.toExternalForm()) { + logger.warn("No gallery title link was found for url=" + url); + } + } + title += galleryLink + .replaceFirst("^http.*/photos/(?:gallery/([^?#:&]+)|view/([^-]+)-).*$", "$1$2") + .replace('/', '-') + .replace(".html", ""); + return title; + } + @Override public String getHost() { return HOST; @@ -80,15 +153,14 @@ public String getHost() { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://([a-z0-9.]*?)xhamster\\.com/photos/gallery/([0-9]{1,})/.*\\.html"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(2); - } - throw new MalformedURLException( + String gid = url.toExternalForm().replaceFirst("^https?://(?:[a-z0-9.]*?)" + HOST + "\\.com/photos/(?:gallery/([0-9]{1,})/.*\\.html|view/([^-]+)-).*$", "$1$2"); + if (gid.length() == 0) { + throw new MalformedURLException( "Expected xhamster.com gallery formats: " - + "xhamster.com/photos/gallery/#####/xxxxx..html" - + " Got: " + url); + + "http://xhamster.com/photos/gallery/#####/xxxxx..html or http://xhamster.com/photos/view/####-####.html" + + " Got: " + url); + } + return gid; } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java index cb6d57257..6ec61a59c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java @@ -6,7 +6,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Utils; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.VideoRipper; @@ -39,28 +41,50 @@ public URL sanitizeURL(URL url) throws MalformedURLException { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://.*xhamster\\.com/movies/([0-9]+).*$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - - throw new MalformedURLException( + String gid = url.toExternalForm().replaceFirst("^https?://.*" + HOST + "\\.com/movies/([0-9]+)/.*$", "$1"); + if (gid.length() == 0) { + throw new MalformedURLException( "Expected xhamster format:" - + "xhamster.com/movies/####" - + " Got: " + url); + + "xhamster.com/movies/####" + + " Got: " + url); + + } + return gid; } @Override public void rip() throws IOException { - logger.info("Retrieving " + this.url); - Document doc = Http.url(url).get(); + logger.info("Retrieving " + url); + Document doc = Http.url(url).header("User-Agent", USER_AGENT).referrer("http://" + HOST + ".com/").cookies(Utils.getCookies(HOST)).get(); Elements videos = doc.select("a.mp4Thumb"); if (videos.size() == 0) { throw new IOException("Could not find Embed code at " + url); } String vidUrl = videos.attr("href"); - addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + addURLToDownload(new URL(vidUrl), getVideoName(), "", url.toExternalForm(), Utils.getCookies(HOST)); waitForThreads(); } -} \ No newline at end of file + + private String getVideoName() throws IOException { + String title = HOST + "_"; + try { + Document doc = Http.url(url).header("User-Agent", USER_AGENT).referrer(url).cookies(Utils.getCookies(HOST)).get(); + Element link = doc.select("#videoUser a").first(); + if (link != null) { + title += link.text() + "_"; + } + } catch (IOException e) { + logger.error("Exception retrieving url=" + url + ": " + e.getMessage()); + try { + title += getGID(url); + } catch (MalformedURLException malformedEx) { + throw new IOException(malformedEx.getMessage()); + } + } + title += url.toExternalForm() + .replaceFirst("^https?://.*" + HOST + "\\.com/movies/([0-9]+)/([^\\.]+).*$", "$1_$2_") + .replaceAll("_+", "_"); + return title; + } + +} diff --git a/src/main/java/com/rarchives/ripme/ui/ClipboardUtils.java b/src/main/java/com/rarchives/ripme/ui/ClipboardUtils.java index 883336a10..8b68a4b24 100644 --- a/src/main/java/com/rarchives/ripme/ui/ClipboardUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/ClipboardUtils.java @@ -10,6 +10,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static com.rarchives.ripme.App.logger; + public class ClipboardUtils { private static AutoripThread autoripThread = new AutoripThread(); @@ -33,6 +35,9 @@ public static String getClipboardString() { .getDefaultToolkit() .getSystemClipboard() .getData(DataFlavor.stringFlavor); + } catch (IllegalStateException e) { + e.printStackTrace(); + logger.error("Caught and recovered from IllegalStateException: " + e.getMessage()); } catch (HeadlessException e) { e.printStackTrace(); } catch (UnsupportedFlavorException e) { diff --git a/src/main/java/com/rarchives/ripme/ui/QueueMenuMouseListener.java b/src/main/java/com/rarchives/ripme/ui/QueueMenuMouseListener.java index c487cc641..bca467ee1 100644 --- a/src/main/java/com/rarchives/ripme/ui/QueueMenuMouseListener.java +++ b/src/main/java/com/rarchives/ripme/ui/QueueMenuMouseListener.java @@ -1,9 +1,12 @@ package com.rarchives.ripme.ui; +import java.awt.datatransfer.StringSelection; +import java.awt.datatransfer.Clipboard; import java.awt.event.ActionEvent; import java.awt.event.InputEvent; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; +import java.awt.Toolkit; import java.util.Enumeration; import javax.swing.AbstractAction; @@ -16,7 +19,8 @@ public class QueueMenuMouseListener extends MouseAdapter { private JPopupMenu popup = new JPopupMenu(); - private Action removeSelected, + private Action copySelected, + removeSelected, clearQueue; private JList queueList; private DefaultListModel queueListModel; @@ -24,6 +28,24 @@ public class QueueMenuMouseListener extends MouseAdapter { @SuppressWarnings("serial") public QueueMenuMouseListener() { + copySelected = new AbstractAction("Copy Selected") { + @Override + public void actionPerformed(ActionEvent ae) { + StringBuffer selection = new StringBuffer(); + for (Object value : queueList.getSelectedValuesList()) { + if (selection.length() > 0) { + selection.append('\n'); + } + selection.append(value); + } + StringSelection stringSelection = new StringSelection(selection.toString()); + Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard(); + clipboard.setContents(stringSelection, stringSelection); + updateUI(); + } + }; + popup.add(copySelected); + removeSelected = new AbstractAction("Remove Selected") { @Override public void actionPerformed(ActionEvent ae) { diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index 946fce549..b223d194a 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -9,7 +9,9 @@ import java.net.URLDecoder; import java.util.ArrayList; import java.util.Enumeration; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.jar.JarEntry; import java.util.jar.JarFile; @@ -71,6 +73,11 @@ public class Utils { } } + private static HashMap> cookieCache; + static { + cookieCache = new HashMap>(); + } + /** * Get the root rips directory. * @return @@ -387,4 +394,21 @@ public static List between(String fullText, String start, String finish) } return result; } + + public static Map getCookies(String host) { + HashMap domainCookies = cookieCache.get(host); + if (domainCookies == null) { + domainCookies = new HashMap(); + String cookiesConfig = getConfigString("cookies." + host, ""); + for (String pair : cookiesConfig.split(" ")) { + pair = pair.trim(); + if (pair.contains("=")) { + String[] pieces = pair.split("=", 2); + domainCookies.put(pieces[0], pieces[1]); + } + } + cookieCache.put(host, domainCookies); + } + return domainCookies; + } } diff --git a/src/main/resources/rip.properties b/src/main/resources/rip.properties index 1a0ff01c7..a92d99f75 100644 --- a/src/main/resources/rip.properties +++ b/src/main/resources/rip.properties @@ -30,3 +30,7 @@ twitter.max_requests = 10 clipboard.autorip = false download.save_order = true + +cookies.xhamster = +# e.g. cookies.xhamster = USERNAME=sleaze UID=69696969 PWD=144354bc90792a91957df1ef962908c1 fingerprint=d65f704a8fef31b5327175e00f1eeb85 + diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BasicRippersTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BasicRippersTest.java index 39f575760..7e11accb8 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BasicRippersTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BasicRippersTest.java @@ -169,7 +169,7 @@ public void testImagevenueRip() throws IOException { } public void testImgboxRip() throws IOException { - AbstractRipper ripper = new ImgboxRipper(new URL("http://imgbox.com/g/sEMHfsqx4w")); + AbstractRipper ripper = new ImgboxRipper(new URL("http://imgbox.com/g/z7Bj2FjxJX")); testRipper(ripper); }