Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -279,15 +279,15 @@ public void run() {
if (!someoneIsWorking) {
if (!shuttingDown) {
long queueLength = frontier.getQueueLength();
if (queueLength > 0) {
if (! frontier.isFinished() && queueLength > 0) {
continue;
}
logger.info(
"No thread is working and no more URLs are in queue waiting for another 10 seconds to make " +
"sure...");
sleep(10);
queueLength = frontier.getQueueLength();
if (queueLength > 0) {
if (! frontier.isFinished() && queueLength > 0) {
continue;
}
}
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/edu/uci/ics/crawler4j/frontier/Frontier.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class Frontier extends Configurable {
protected boolean isFinished = false;

protected long scheduledPages;
protected long fetchedPages = 0;

protected Counters counters;

Expand Down Expand Up @@ -82,7 +83,7 @@ public Frontier(Environment env, CrawlConfig config) {
}

public void scheduleAll(List<WebURL> urls) {
int maxPagesToFetch = config.getMaxPagesToFetch();
int maxPagesToFetch = -1; //config.getMaxPagesToFetch();
synchronized (mutex) {
int newScheduledPage = 0;
for (WebURL url : urls) {
Expand All @@ -108,7 +109,7 @@ public void scheduleAll(List<WebURL> urls) {
}

public void schedule(WebURL url) {
int maxPagesToFetch = config.getMaxPagesToFetch();
int maxPagesToFetch = -1; //config.getMaxPagesToFetch();
synchronized (mutex) {
try {
if (maxPagesToFetch < 0 || scheduledPages < maxPagesToFetch) {
Expand All @@ -123,8 +124,11 @@ public void schedule(WebURL url) {
}

public void getNextURLs(int max, List<WebURL> result) {
int maxPagesToFetch = config.getMaxPagesToFetch();
while (true) {
synchronized (mutex) {
if (maxPagesToFetch > 0 && fetchedPages > maxPagesToFetch)
finish();
if (isFinished) {
return;
}
Expand Down Expand Up @@ -160,6 +164,7 @@ public void getNextURLs(int max, List<WebURL> result) {
}

public void setProcessed(WebURL webURL) {
fetchedPages++;
counters.increment(Counters.ReservedCounterNames.PROCESSED_PAGES);
if (inProcessPages != null) {
if (!inProcessPages.removeURL(webURL)) {
Expand Down