2828 * Simplified ripper, designed for ripping from sites by parsing HTML.
2929 */
3030public abstract class AbstractHTMLRipper extends AbstractRipper {
31-
31+
3232 private final Map <URL , File > itemsPending = Collections .synchronizedMap (new HashMap <>());
3333 private final Map <URL , Path > itemsCompleted = Collections .synchronizedMap (new HashMap <>());
3434 private final Map <URL , String > itemsErrored = Collections .synchronizedMap (new HashMap <>());
@@ -60,11 +60,15 @@ protected Document getCachedFirstPage() throws IOException, URISyntaxException {
6060 public Document getNextPage (Document doc ) throws IOException , URISyntaxException {
6161 return null ;
6262 }
63- protected abstract List <String > getURLsFromPage (Document page ) throws UnsupportedEncodingException ;
63+
64+ protected abstract List <String > getURLsFromPage (Document page ) throws UnsupportedEncodingException , URISyntaxException ;
65+
6466 protected List <String > getDescriptionsFromPage (Document doc ) throws IOException {
6567 throw new IOException ("getDescriptionsFromPage not implemented" ); // Do I do this or make an abstract function?
6668 }
69+
6770 protected abstract void downloadURL (URL url , int index );
71+
6872 protected DownloadThreadPool getThreadPool () {
6973 return null ;
7074 }
@@ -130,7 +134,7 @@ public void rip() throws IOException, URISyntaxException {
130134 List <String > doclocation = new ArrayList <>();
131135
132136 LOGGER .info ("Got doc location " + doc .location ());
133-
137+
134138 while (doc != null ) {
135139
136140 LOGGER .info ("Processing a doc..." );
@@ -167,7 +171,7 @@ public void rip() throws IOException, URISyntaxException {
167171 for (String imageURL : imageURLs ) {
168172 index += 1 ;
169173 LOGGER .debug ("Found image url #" + index + ": '" + imageURL + "'" );
170- downloadURL (new URL (imageURL ), index );
174+ downloadURL (new URI (imageURL ). toURL ( ), index );
171175 if (isStopped () || isThisATest ()) {
172176 break ;
173177 }
@@ -182,19 +186,26 @@ public void rip() throws IOException, URISyntaxException {
182186 if (isStopped () || isThisATest ()) {
183187 break ;
184188 }
189+
185190 textindex += 1 ;
186191 LOGGER .debug ("Getting description from " + textURL );
187192 String [] tempDesc = getDescription (textURL ,doc );
193+
188194 if (tempDesc != null ) {
189- if (Utils .getConfigBoolean ("file.overwrite" , false ) || !(new File (
190- workingDir .getCanonicalPath ()
191- + ""
192- + File .separator
193- + getPrefix (index )
194- + (tempDesc .length > 1 ? tempDesc [1 ] : fileNameFromURL (new URL (textURL )))
195- + ".txt" ).exists ())) {
195+ URL url = new URI (textURL ).toURL ();
196+ String filename = fileNameFromURL (url );
197+
198+ boolean fileExists = new File (
199+ workingDir .getCanonicalPath ()
200+ + ""
201+ + File .separator
202+ + getPrefix (index )
203+ + (tempDesc .length > 1 ? tempDesc [1 ] : filename )
204+ + ".txt" ).exists ();
205+
206+ if (Utils .getConfigBoolean ("file.overwrite" , false ) || !fileExists ) {
196207 LOGGER .debug ("Got description from " + textURL );
197- saveText (new URL ( textURL ) , "" , tempDesc [0 ], textindex , (tempDesc .length > 1 ? tempDesc [1 ] : fileNameFromURL ( new URL ( textURL )) ));
208+ saveText (url , "" , tempDesc [0 ], textindex , (tempDesc .length > 1 ? tempDesc [1 ] : filename ));
198209 sleep (descSleepTime ());
199210 } else {
200211 LOGGER .debug ("Description from " + textURL + " already exists." );
@@ -225,12 +236,12 @@ public void rip() throws IOException, URISyntaxException {
225236 }
226237 waitForThreads ();
227238 }
228-
239+
229240 /**
230241 * Gets the file name from the URL
231- * @param url
242+ * @param url
232243 * URL that you want to get the filename from
233- * @return
244+ * @return
234245 * Filename of the URL
235246 */
236247 private String fileNameFromURL (URL url ) {
@@ -244,7 +255,7 @@ private String fileNameFromURL(URL url) {
244255 return saveAs ;
245256 }
246257 /**
247- *
258+ *
248259 * @param url
249260 * Target URL
250261 * @param subdirectory
@@ -253,7 +264,7 @@ private String fileNameFromURL(URL url) {
253264 * Text you want to save
254265 * @param index
255266 * Index in something like an album
256- * @return
267+ * @return
257268 * True if ripped successfully
258269 * False if failed
259270 */
@@ -295,12 +306,12 @@ private boolean saveText(URL url, String subdirectory, String text, int index, S
295306 }
296307 return true ;
297308 }
298-
309+
299310 /**
300311 * Gets prefix based on where in the index it is
301- * @param index
312+ * @param index
302313 * The index in question
303- * @return
314+ * @return
304315 * Returns prefix for a file. (?)
305316 */
306317 protected String getPrefix (int index ) {
@@ -313,9 +324,9 @@ protected String getPrefix(int index) {
313324
314325 /*
315326 * ------ Methods copied from AlbumRipper. ------
316- * This removes AlbumnRipper's usage from this class.
327+ * This removes AlbumnRipper's usage from this class.
317328 */
318-
329+
319330 protected boolean allowDuplicates () {
320331 return false ;
321332 }
0 commit comments