Skip to content

Commit a331470

Browse files
committed
Allow to differentiate between queue sizes in Frontier: WORK_QUEUE,
IN_PROGRESS_QUEUE or WORK_QUEUE | IN_PROGRESS_QUEUE
1 parent b5826dc commit a331470

1 file changed

Lines changed: 22 additions & 3 deletions

File tree

src/main/java/edu/uci/ics/crawler4j/frontier/Frontier.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ public class Frontier extends Configurable {
3838

3939
private static final String DATABASE_NAME = "PendingURLsDB";
4040
private static final int IN_PROCESS_RESCHEDULE_BATCH_SIZE = 100;
41+
42+
/** Identifier for the InProgress queue: pages in progress by a thread */
43+
public static final int IN_PROGRESS_QUEUE = 1;
44+
/** Identifier for the WorkQueue: pages not yet claimed by any thread */
45+
public static final int WORK_QUEUE = 2;
46+
/** convenience identifier for both queues: IN_PROGRESS_QUEUE | WORK_QUEUE */
47+
public static final int BOTH_QUEUES = IN_PROGRESS_QUEUE | WORK_QUEUE;
48+
4149
protected WorkQueues workQueues;
4250

4351
protected InProcessPagesDB inProcessPages;
@@ -169,11 +177,22 @@ public void setProcessed(WebURL webURL) {
169177
}
170178

171179
public long getQueueLength() {
172-
return workQueues.getLength();
180+
return getQueueLength(WORK_QUEUE);
181+
}
182+
183+
public long getQueueLength(int type) {
184+
synchronized (mutex) {
185+
int length = 0;
186+
if ((type & WORK_QUEUE) == WORK_QUEUE)
187+
length += workQueues.getLength();
188+
if ((type & IN_PROGRESS_QUEUE) == IN_PROGRESS_QUEUE)
189+
length += inProcessPages.getLength();
190+
return length;
191+
}
173192
}
174193

175194
public long getNumberOfAssignedPages() {
176-
return inProcessPages.getLength();
195+
return getQueueLength(IN_PROGRESS_QUEUE);
177196
}
178197

179198
public long getNumberOfProcessedPages() {
@@ -198,4 +217,4 @@ public void finish() {
198217
waitingList.notifyAll();
199218
}
200219
}
201-
}
220+
}

0 commit comments

Comments
 (0)