Skip to content

Commit d4c2b09

Browse files
committed
Better support for extracting RSS fields from the description
1 parent 81b2593 commit d4c2b09

File tree

3 files changed

+43
-14
lines changed

3 files changed

+43
-14
lines changed

core/src/com/biglybt/core/metasearch/Result.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ public Map toJSONMap() {
489489
}
490490
}
491491

492-
protected static String removeHTMLTags(String input) {
492+
public static String removeHTMLTags(String input) {
493493
if ( input == null ){
494494
return( null );
495495
}

core/src/com/biglybt/core/metasearch/impl/web/rss/RSSEngine.java

+39-12
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.biglybt.core.metasearch.impl.web.WebResult;
3636
import com.biglybt.core.util.ByteFormatter;
3737
import com.biglybt.core.util.Debug;
38+
import com.biglybt.core.util.RegExUtil;
3839
import com.biglybt.core.util.SystemTime;
3940
import com.biglybt.core.util.UrlUtils;
4041
import com.biglybt.pif.utils.StaticUtilities;
@@ -48,8 +49,10 @@
4849
RSSEngine
4950
extends WebEngine
5051
{
51-
private Pattern seed_leecher_pat = Pattern.compile("([0-9]+)\\s+(seed|leecher)s", Pattern.CASE_INSENSITIVE);
52-
private Pattern size_pat = Pattern.compile("([0-9\\.]+)\\s+(B|KB|KiB|MB|MiB|GB|GiB|TB|TiB)", Pattern.CASE_INSENSITIVE);
52+
private Pattern seed_leecher_pat1 = Pattern.compile("([0-9]+)" + RegExUtil.PAT_WHITE_SPACE + "(seed|seeder|leech|leecher)s", Pattern.CASE_INSENSITIVE );
53+
private Pattern seed_leecher_pat2 = Pattern.compile("(seed|seeder|leech|leecher)s" + RegExUtil.PAT_WHITE_SPACE + "([0-9]+)", Pattern.CASE_INSENSITIVE );
54+
55+
private Pattern size_pat = Pattern.compile("([0-9\\.]+)" + RegExUtil.PAT_WHITE_SPACE + "(B|KB|KiB|MB|MiB|GB|GiB|TB|TiB)", Pattern.CASE_INSENSITIVE );
5356

5457
public static EngineImpl
5558
importFromBEncodedMap(
@@ -641,26 +644,50 @@
641644

642645
desc = desc.replaceAll( "\\(s\\)", "s" );
643646

644-
desc = desc.replaceAll( "seeders", "seeds" );
647+
desc = desc.replace( ":", " " );
648+
649+
desc = Result.removeHTMLTags( desc );
650+
651+
Matcher m = seed_leecher_pat1.matcher( desc );
645652

646-
Matcher m = seed_leecher_pat.matcher( desc );
653+
if ( m.find()){
654+
655+
do{
656+
String num = m.group(1);
647657

648-
while( m.find()){
658+
String type = m.group(2);
649659

650-
String num = m.group(1);
660+
if ( type.toLowerCase().charAt(0) == 's' ){
651661

652-
String type = m.group(2);
662+
result.setNbSeedsFromHTML( num );
653663

654-
if ( type.toLowerCase().charAt(0) == 's' ){
664+
}else{
655665

656-
result.setNbSeedsFromHTML( num );
666+
result.setNbPeersFromHTML( num );
667+
}
668+
}while( m.find());
669+
670+
}else{
671+
672+
m = seed_leecher_pat2.matcher( desc );
657673

658-
}else{
674+
while( m.find()){
675+
676+
String num = m.group(2);
659677

660-
result.setNbPeersFromHTML( num );
678+
String type = m.group(1);
679+
680+
if ( type.toLowerCase().charAt(0) == 's' ){
681+
682+
result.setNbSeedsFromHTML( num );
683+
684+
}else{
685+
686+
result.setNbPeersFromHTML( num );
687+
}
661688
}
662689
}
663-
690+
664691
m = size_pat.matcher( desc );
665692

666693
if ( m.find()){

core/src/com/biglybt/core/util/RegExUtil.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
public static final Pattern PAT_SPLIT_DOT = Pattern.compile("\\.");
3636
public static final Pattern PAT_SPLIT_SPACE = Pattern.compile(" ");
3737
public static final Pattern PAT_SPLIT_SLASH_N = Pattern.compile("\n");
38-
38+
39+
public static final String PAT_WHITE_SPACE = "(?:\\s|\\p{Z})+"; // includes non-breaking space char
40+
3941
private static final ThreadLocal<Map<String,Object[]>> tls =
4042
new ThreadLocal<Map<String,Object[]>>()
4143
{

0 commit comments

Comments
 (0)