Skip to content

Eroshare naming enhancements and XhamsterRipper method refactoring. #358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c51dc1f
attempt at fixing issue #330 instagram api changes
Jun 24, 2016
373f1d8
Cookie support for Xhamster gallery ripper.
Sep 20, 2016
3648668
Cookie support for Xhamster video ripper.
Oct 7, 2016
705356b
Xhamster gallery pretty naming (with username and title) and index fi…
Oct 7, 2016
1115ebc
Xhamster pretty video naming with username and video title in filename.
Oct 7, 2016
0fb881b
Xhamster individual picture ripping capability.
Oct 7, 2016
87ade90
Fixed invalid URL in ImgboxRipper integration test.
Oct 8, 2016
a8125e5
Recover from IllegalStateException when polling clipboard.
Oct 1, 2016
108dc1a
Merge branch 'sleaze/clipboard-exception-recovery'
Oct 8, 2016
5032ac9
Merge branch 'sleaze/gitignore-intellij'
Oct 8, 2016
56e3969
Merge branch 'sleaze/restore-window-position'
Oct 8, 2016
a981ae3
Merge branch 'sleaze/xhamster-cookie-support'
Oct 8, 2016
92cb8c4
Merge branch 'sleaze/xhamster-pretty-structure'
Oct 8, 2016
ff7b26c
Merge branch 'sleaze/xhamster-single-picture-ripper'
Oct 8, 2016
093ccb3
Merge remote-tracking branch 'JoshKastang/issue-330'
Oct 8, 2016
20b91b9
Merge remote-tracking branch 'JoshKastang/issue-343'
Oct 8, 2016
f1be9de
Skip Xhamster HTML meta-data file writing when current working dir is…
Oct 10, 2016
d72f71c
Merge branch 'sleaze/xhamster-pretty-structure' into sleaze/xhamster-…
Oct 10, 2016
0194ef6
Merge branch 'sleaze/xhamster-single-picture-ripper'
Oct 10, 2016
6e9b57c
Download Queue: Right-click 'Copy Selected' functionality.
Oct 11, 2016
92f56c1
Merge branch 'sleaze/queue-copy-selected'
Oct 11, 2016
64fcfe7
Eroshare naming enhancements and XhamsterRipper method refactoring.
Oct 27, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ ripme.jar.update
ripme.jar
rip.properties
history.json
.idea
*.iml
2 changes: 1 addition & 1 deletion src/main/java/com/rarchives/ripme/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static void main(String[] args) throws MalformedURLException {
Utils.configureLogger();
System.setProperty("apple.laf.useScreenMenuBar", "true");
System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe");
logger = Logger.getLogger(App.class);
logger = Logger.getLogger(App.class);
logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion());

if (args.length > 0) {
Expand Down
37 changes: 21 additions & 16 deletions src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,23 +106,9 @@ public boolean addURLToDownload(URL url, String prefix, String subdirectory, Str
return false;
}
logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies);
String saveAs = url.toExternalForm();
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
File saveFileAs;
try {
if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory;
}
saveFileAs = new File(
workingDir.getCanonicalPath()
+ subdirectory
+ File.separator
+ prefix
+ saveAs);
saveFileAs = getSaveAsFile(url, prefix, subdirectory);
} catch (IOException e) {
logger.error("[!] Error creating save file path for URL '" + url + "':", e);
return false;
Expand All @@ -134,7 +120,26 @@ public boolean addURLToDownload(URL url, String prefix, String subdirectory, Str
}
return addURLToDownload(url, saveFileAs, referrer, cookies);
}


protected File getSaveAsFile(URL url, String prefix, String subdirectory) throws IOException {
String saveAs = url.toExternalForm();
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
File saveFileAs;
if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory;
}
saveFileAs = new File(
workingDir.getCanonicalPath()
+ subdirectory
+ File.separator
+ prefix
+ saveAs);
return saveFileAs;
}

/**
* Queues file to be downloaded and saved. With options.
Expand Down
43 changes: 43 additions & 0 deletions src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
package com.rarchives.ripme.ripper;

import com.rarchives.ripme.utils.Http;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.nodes.Document;

public abstract class AlbumRipper extends AbstractRipper {

protected Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
protected Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
protected Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());

protected HashMap<String, Document> docs = new HashMap<String, Document>();

public AlbumRipper(URL url) throws IOException {
super(url);
}
Expand Down Expand Up @@ -82,6 +88,18 @@ public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<Strin
return true;
}

@Override
public boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String,String> cookies) {
File saveFileAs;
try {
saveFileAs = getSaveAsFile(url, prefix, subdirectory);
} catch (IOException e) {
logger.error("[!] Error creating save file path for URL '" + url + "':", e);
return false;
}
return addURLToDownload(url, saveFileAs, referrer, cookies);
}

@Override
public boolean addURLToDownload(URL url, File saveAs) {
return addURLToDownload(url, saveAs, null, null);
Expand Down Expand Up @@ -210,4 +228,29 @@ public String getStatusText() {
.append(", Errored: " ).append(itemsErrored.size());
return sb.toString();
}

protected Document downloadAndSaveHTML(URL url) throws IOException {
String urlString = url.toExternalForm();
Document doc = docs.get(urlString);
if (doc == null) {
doc = Http.url(url).header("User-Agent", USER_AGENT).referrer(url).cookies(Utils.getCookies(getHost())).get();
docs.put(urlString, doc);
}
String filename = urlToFilename(url);
if (getWorkingDir() != null) {
Files.write(Paths.get(getWorkingDir().getCanonicalPath() + File.separator + filename), doc.toString().getBytes());
}
return doc;
}

protected static String urlToFilename(URL url) {
String filename = url.toExternalForm().replaceFirst("^https?://.*/", "").replaceFirst("[#&:].*$", "");
if (filename.contains("?") && filename.contains(".")) {
int periodIdx = filename.lastIndexOf('.');
int questionMarkIdx = filename.indexOf('?');
String params = filename.substring(questionMarkIdx + 1).replaceAll("=", "-").replaceAll("&", "_");
filename = filename.substring(0, periodIdx) + "_" + params + filename.substring(periodIdx, questionMarkIdx);
}
return filename;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,42 @@
* @author losipher
*/
public class EroShareRipper extends AbstractHTMLRipper {

public static String HOST = "eroshare";

public EroShareRipper (URL url) throws IOException {
super(url);
}

@Override
public String getDomain() {
return "eroshare.com";
return HOST + ".com";
}

@Override
public String getHost() {
return "eroshare";
return HOST;
}

@Override
public void downloadURL(URL url, int index){
addURLToDownload(url);
}


private static String cleanURL(String url) {
if (url.length() == 0) {
return "";
}
if (url.startsWith("//")) {
url = "https:" + url;
} else {
String urlLower = url.toLowerCase();
if (!urlLower.startsWith("http://") && !urlLower.startsWith("https://")) {
url = "https://" + url;
}
}
return url;
}

@Override
public List<String> getURLsFromPage(Document doc){
List<String> URLs = new ArrayList<String>();
Expand All @@ -57,8 +73,10 @@ public List<String> getURLsFromPage(Document doc){
for (Element img : imgs){
if (img.hasClass("album-image")){
String imageURL = img.attr("src");
imageURL = "https:" + imageURL;
URLs.add(imageURL);
imageURL = cleanURL(imageURL);
if (imageURL.length() > 0) {
URLs.add(imageURL);
}
}
}
//Videos
Expand All @@ -67,7 +85,10 @@ public List<String> getURLsFromPage(Document doc){
if (vid.hasClass("album-video")){
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
URLs.add(videoURL);
videoURL = cleanURL(videoURL);
if (videoURL.length() > 0) {
URLs.add(videoURL);
}
}
}

Expand All @@ -87,7 +108,7 @@ public Document getFirstPage() throws IOException {

@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?.*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
Expand All @@ -109,8 +130,10 @@ public static List<URL> getURLs(URL url) throws IOException{
for (Element img : imgs){
if (img.hasClass("album-image")){
String imageURL = img.attr("src");
imageURL = "https:" + imageURL;
URLs.add(new URL(imageURL));
imageURL = cleanURL(imageURL);
if (imageURL.length() > 0) {
URLs.add(new URL(imageURL));
}
}
}
//Videos
Expand All @@ -119,10 +142,44 @@ public static List<URL> getURLs(URL url) throws IOException{
if (vid.hasClass("album-video")){
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
URLs.add(new URL(videoURL));
if (videoURL.length() > 0) {
URLs.add(new URL(videoURL));
}
}
}

return URLs;
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
String title = HOST;
Document doc = null;
try {
doc = downloadAndSaveHTML(url);
} catch (IOException e) {
title += "_" + getGID(url);
logger.error("Exception retrieving url=" + url + ": " + e.getMessage());
}
if (doc != null) {
// Find username.
Element element = doc.select(".album-info-container a.avatar").first();
if (element != null) {
title += "_" + (element.attr("href").length() > 0 ? element.attr("href").replaceAll("^/u/", "") : "");
} else {
logger.warn("No username was found in the contents of url=" + url);
}
title += "_" + getGID(url);
element = doc.select(".album-info-container .center-mobile").first();
if (element != null) {
title += "-" + element.text().replaceAll(" ", "_").replaceAll("[^a-zA-Z0-9_\\[\\]()\\.-]+", "");
} else {
logger.warn("No title was found in the contents of url=" + url);
}
} else {
logger.warn("No username or title could be retrieved for url=" + url);
}
title = title.replaceAll("_+$", "");
return title;
}
}
Loading