11package com .rarchives .ripme .ripper .rippers ;
22
3- import com .rarchives .ripme .ripper .AbstractHTMLRipper ;
4- import com .rarchives .ripme .utils .Http ;
53import java .io .IOException ;
64import java .net .MalformedURLException ;
75import java .net .URI ;
119import java .util .List ;
1210import java .util .regex .Matcher ;
1311import java .util .regex .Pattern ;
12+
1413import org .jsoup .nodes .Document ;
1514import org .jsoup .nodes .Element ;
1615
16+ import com .rarchives .ripme .ripper .AbstractHTMLRipper ;
17+ import com .rarchives .ripme .utils .Http ;
1718
1819public class MrCongRipper extends AbstractHTMLRipper {
19-
2020 private Document currDoc ;
2121 private int lastPageNum ;
2222 private int currPageNum ;
@@ -29,43 +29,48 @@ public MrCongRipper(URL url) throws IOException {
2929
3030 @ Override
3131 public String getHost () {
32- return "mrcong " ;
32+ return "misskon " ;
3333 }
3434
3535 @ Override
3636 public String getDomain () {
37- return "mrcong.com" ;
37+ // NOTE: This was previously mrcong.com, which now redirects to
38+ // misskon.com. Some resources still refer to mrcong.com
39+ // but all the top level URLs are now misskon.com
40+ return "misskon.com" ;
3841 }
3942
4043 @ Override
4144 public String getGID (URL url ) throws MalformedURLException {
4245 System .out .println (url .toExternalForm ());
43- Pattern p = Pattern .compile ("^https?://mrcong\\ .com/(\\ S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$" );
44- Pattern p2 = Pattern .compile ("^https?://mrcong\\ .com/tag/(\\ S*)/$" ); //Added 6-10-21
46+ Pattern p = Pattern .compile (
47+ "^https?://(?:[a-z]+\\ .)?misskon\\ .com/([-0-9a-zA-Z]+)(?:/?|/[0-9]+/?)?$" );
48+ Pattern p2 = Pattern .compile ("^https?://misskon\\ .com/tag/(\\ S*)/$" );
4549 Matcher m = p .matcher (url .toExternalForm ());
46- Matcher m2 = p2 .matcher (url .toExternalForm ()); //6-10-21
50+ Matcher m2 = p2 .matcher (url .toExternalForm ());
4751 if (m .matches ()) {
4852 return m .group (1 );
49- }
50- else if (m2 .matches ()) { //Added 6-10-21
53+ } else if (m2 .matches ()) {
5154 tagPage = true ;
5255 System .out .println ("tagPage = TRUE" );
5356 return m2 .group (1 );
5457 }
5558
56- throw new MalformedURLException ("Expected mrcong.com URL format: "
57- + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead" );
59+ throw new MalformedURLException ("Expected misskon.com URL format: "
60+ + "misskon.com/GALLERY_NAME (or /PAGE_NUMBER/) - got " + url
61+ + " instead" );
5862 }
5963
6064 @ Override
61- public Document getFirstPage () throws IOException { //returns the root gallery page regardless of actual page number
65+ public Document getFirstPage () throws IOException {
66+ // returns the root gallery page regardless of actual page number
6267 // "url" is an instance field of the superclass
6368 String rootUrlStr ;
6469 URL rootUrl ;
6570
66- if (!tagPage ) {
71+ if (!tagPage ) {
6772 rootUrlStr = url .toExternalForm ().replaceAll ("(|/|/[0-9]+/?)$" , "/" );
68- } else { //6-10-21
73+ } else { // 6-10-21
6974 rootUrlStr = url .toExternalForm ().replaceAll ("(page/[0-9]+/)$" , "page/1/" );
7075 }
7176
@@ -81,51 +86,56 @@ public Document getFirstPage() throws IOException { //returns the root gallery p
8186 public Document getNextPage (Document doc ) throws IOException {
8287 int pageNum = currPageNum ;
8388 String urlStr ;
84- if (!tagPage ) {
89+ if (!tagPage ) {
8590 if (pageNum == 1 && lastPageNum > 1 ) {
8691 urlStr = url .toExternalForm ().concat ((pageNum + 1 ) + "" );
8792 System .out .printf ("Old Str: %s New Str: %s\n " , url .toExternalForm (), urlStr );
8893 } else if (pageNum < lastPageNum ) {
8994 urlStr = url .toExternalForm ().replaceAll ("(/([0-9]*)/?)$" , ("/" + (pageNum + 1 ) + "/" ));
9095 System .out .printf ("Old Str: %s New Str: %s\n " , url .toString (), urlStr );
9196 } else {
92- //System.out.printf("Error: Page number provided goes past last valid page number\n");
97+ // System.out.printf("Error: Page number provided goes past last valid page
98+ // number\n");
9399 throw (new IOException ("Error: Page number provided goes past last valid page number\n " ));
94100 }
95- } else { //6-10-21
96- //if (pageNum == 1 && lastPageNum >= 1) {
97- if (pageNum == 1 && lastPageNum > 1 ) { //6-10-21
101+ } else { // 6-10-21
102+ // if (pageNum == 1 && lastPageNum >= 1) {
103+ if (pageNum == 1 && lastPageNum > 1 ) { // 6-10-21
98104 urlStr = url .toExternalForm ().concat ("page/" + (pageNum + 1 ) + "" );
99105 System .out .printf ("Old Str: %s New Str: %s\n " , url .toExternalForm (), urlStr );
100106 } else if (pageNum < lastPageNum ) {
101107 urlStr = url .toExternalForm ().replaceAll ("(page/([0-9]*)/?)$" , ("page/" + (pageNum + 1 ) + "/" ));
102108 System .out .printf ("Old Str: %s New Str: %s\n " , url .toString (), urlStr );
103109 } else {
104- //System.out.printf("Error: Page number provided goes past last valid page number\n");
110+ // System.out.printf("Error: Page number provided goes past last valid page
111+ // number\n");
105112 System .out .print ("Error: There is no next page!\n " );
106113 return null ;
107- //throw (new IOException("Error: Page number provided goes past last valid page number\n"));
114+ // throw (new IOException("Error: Page number provided goes past last valid page
115+ // number\n"));
108116 }
109117 }
110118
111119 url = URI .create (urlStr ).toURL ();
112120 currDoc = Http .url (url ).get ();
113- currPageNum ++;//hi
121+ currPageNum ++;// hi
114122 return currDoc ;
115123 }
116124
117125 private int getMaxPageNumber (Document doc ) {
118- if (!tagPage ) {
126+ if (!tagPage ) {
119127 try {
120- lastPageNum = Integer .parseInt (doc .select ("div.page-link > a" ).last ().text ()); //gets the last possible page for the gallery
121- } catch (Exception e ) {
128+ // gets the last possible page for the gallery
129+ lastPageNum = Integer .parseInt (doc .select ("div.page-link > a" ).last ().text ());
130+ } catch (Exception e ) {
122131 return 1 ;
123132 }
124133 } else {
125134 try {
126- lastPageNum = Integer .parseInt (doc .select ("div.pagination > a" ).last ().text ()); //gets the last possible page for the gallery
135+ // gets the last possible page for the gallery
136+ lastPageNum = Integer .parseInt (doc .select ("div.pagination > a" ).last ().text ());
127137 System .out .println ("The last page found for " + url + " was " + lastPageNum );
128- } catch (Exception e ) {
138+ } catch (Exception e ) {
129139 return 1 ;
130140 }
131141 }
@@ -134,9 +144,9 @@ private int getMaxPageNumber(Document doc) {
134144 }
135145
136146 private int getCurrentPageNum (Document doc ) {
137- int currPage ; //6-10-21
147+ int currPage ; // 6-10-21
138148
139- if (!tagPage ) {
149+ if (!tagPage ) {
140150 currPage = Integer .parseInt (doc .select ("div.page-link > span" ).first ().text ());
141151 } else {
142152 currPage = Integer .parseInt (doc .select ("div.pagination > span" ).first ().text ());
@@ -148,45 +158,25 @@ private int getCurrentPageNum(Document doc) {
148158 }
149159
150160 @ Override
151- public List <String > getURLsFromPage (Document doc ) { //gets the urls of the images
161+ public List <String > getURLsFromPage (Document doc ) { // gets the urls of the images
152162 List <String > result = new ArrayList <>();
153163
154- if (!tagPage ) {
164+ if (!tagPage ) {
155165 for (Element el : doc .select ("p > img" )) {
156- String imageSource = el .attr ("src" );
166+ String imageSource = el .attr ("data- src" );
157167 result .add (imageSource );
158168 }
159169
160170 System .out .println ("\n 1.)Printing List: " + result + "\n " );
161- } else { //6-10-21
162- //List<String> gallery_set_list = new ArrayList<>();
163-
171+ } else {
164172 for (Element el : doc .select ("h2 > a" )) {
165173 String pageSource = el .attr ("href" );
166- if (!pageSource .equals ("https://mrcong .com/" )) {
174+ if (!pageSource .equals ("https://misskon .com/" )) {
167175 result .add (pageSource );
168176 System .out .println ("\n " + pageSource + " has been added to the list." );
169177 }
170178 }
171179
172- /*for (String el2 : gallery_set_list) {
173- try {
174- URL temp_urL = URI.create(el2).toURL();
175- MrCongRipper mcr = new MrCongRipper(temp_urL);
176- System.out.println("URL being ripped: " + mcr.url.toString());
177- result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
178-
179- Document nextPg = mcr.getNextPage(mcr.currDoc);
180- while(nextPg != null) {
181- result.addAll(mcr.getURLsFromPage(nextPg));
182- nextPg = mcr.getNextPage(mcr.currDoc);
183- }
184- } catch (IOException e) {
185- e.printStackTrace();
186- }
187-
188- }*/
189-
190180 System .out .println ("\n 2.)Printing List: " + result + "\n " );
191181 }
192182
@@ -195,21 +185,20 @@ public List<String> getURLsFromPage(Document doc) { //gets the urls of the image
195185
196186 @ Override
197187 public void downloadURL (URL url , int index ) {
198- //addURLToDownload(url, getPrefix(index));
199-
200- if (!tagPage ) {
188+ if (!tagPage ) {
201189 addURLToDownload (url , getPrefix (index ));
202190 } else {
203191 try {
204192 List <String > ls = this .getURLsFromPage (this .currDoc );
205193 Document np = this .getNextPage (this .currDoc );
206194
207- while (np != null ) { //Creates a list of all sets to download
195+ // Creates a list of all sets to download
196+ while (np != null ) {
208197 ls .addAll (this .getURLsFromPage (np ));
209198 np = this .getNextPage (np );
210199 }
211200
212- for (String urlStr : ls ) {
201+ for (String urlStr : ls ) {
213202 MrCongRipper mcr = new MrCongRipper (URI .create (urlStr ).toURL ());
214203 mcr .setup ();
215204 mcr .rip ();
@@ -220,4 +209,4 @@ public void downloadURL(URL url, int index) {
220209 }
221210 }
222211 }
223- }
212+ }
0 commit comments