Skip to content

Commit

Permalink
Merge pull request #101 from klockla/testcase_94
Browse files Browse the repository at this point in the history
Added test case for discussion #94
  • Loading branch information
jnioche authored Sep 20, 2024
2 parents 350bd1b + 054bd6b commit 2be58f8
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,27 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import crawlercommons.urlfrontier.Urlfrontier.AckMessage;
import crawlercommons.urlfrontier.Urlfrontier.DiscoveredURLItem;
import crawlercommons.urlfrontier.Urlfrontier.ListUrlParams;
import crawlercommons.urlfrontier.Urlfrontier.StringList;
import crawlercommons.urlfrontier.Urlfrontier.URLInfo;
import crawlercommons.urlfrontier.Urlfrontier.URLItem;
import crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest;
import crawlercommons.urlfrontier.service.memory.MemoryFrontierService;
import io.grpc.stub.StreamObserver;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.Order;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestMethodOrder;
import org.slf4j.LoggerFactory;

@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
class MemoryFrontierServiceTest {

private static final org.slf4j.Logger LOG =
Expand All @@ -32,6 +41,7 @@ static void setup() {
}

@Test
@Order(1)
void testGetStatusDiscovered() {

String crawlId = "crawl_id";
Expand Down Expand Up @@ -77,6 +87,7 @@ public void onCompleted() {
}

@Test
@Order(2)
void testGetStatusCompleted() {

String crawlId = "crawl_id";
Expand Down Expand Up @@ -110,7 +121,7 @@ public void onError(Throwable t) {

@Override
public void onCompleted() {
LOG.info("completed testGetStatusKnown");
LOG.info("completed testGetStatusCompleted");
}
};

Expand All @@ -121,6 +132,7 @@ public void onCompleted() {
}

@Test
@Order(3)
void testNotFound() {
String crawlId = "crawl_id";
String url = "https://www.example3.com";
Expand Down Expand Up @@ -169,6 +181,7 @@ private void logURLItem(URLItem item) {
}

@Test
@Order(4)
void testGetStatusToRefetch() {

String crawlId = "crawl_id";
Expand Down Expand Up @@ -205,7 +218,7 @@ public void onError(Throwable t) {

@Override
public void onCompleted() {
LOG.info("completed testGetStatusKnown");
LOG.info("completed testGetStatusToRefetch");
}
};

Expand All @@ -216,6 +229,7 @@ public void onCompleted() {
}

@Test
@Order(5)
void testListAllURLs() {

ListUrlParams params =
Expand Down Expand Up @@ -254,6 +268,7 @@ public void onCompleted() {
}

@Test
@Order(6)
void testListURLsinglequeue() {

ListUrlParams params =
Expand Down Expand Up @@ -296,6 +311,7 @@ public void onCompleted() {
}

@Test
@Order(7)
void testMemoryIterator() {
int nbQueues = 0;
int nbUrls = 0;
Expand All @@ -317,6 +333,7 @@ void testMemoryIterator() {
}

@Test
@Order(8)
void testMemoryIteratorSingleQueue() {
int nbQueues = 0;
int nbUrls = 0;
Expand All @@ -340,4 +357,133 @@ void testMemoryIteratorSingleQueue() {
assertEquals(1, nbQueues);
assertEquals(3, nbUrls);
}

@Test
@Order(99)
void testNoRescheduleCompleted() {

String crawlId = "crawl_id";
String url2 = "https://www.mysite.com/completed";
String key2 = "queue_mysite";
StringList sl2 = StringList.newBuilder().addValues("md2").build();

crawlercommons.urlfrontier.Urlfrontier.URLItem.Builder builder1 = URLItem.newBuilder();

StreamObserver<URLItem> statusObserver =
new StreamObserver<>() {

@Override
public void onNext(URLItem value) {
// receives confirmation that the value has been received
logURLItem(value);

// Internally, MemoryFrontierService does not make a distinction
// between discovered and known which have to be re-fetched
if (value.hasKnown()) {
assertEquals(0, value.getKnown().getRefetchableFromDate());
}
}

@Override
public void onError(Throwable t) {
t.printStackTrace();
}

@Override
public void onCompleted() {
LOG.info("completed testNoRescheduleCompleted 1/2");
}
};

// First check that we have the URL as Known URL with a refetch date of 0
URLStatusRequest request =
URLStatusRequest.newBuilder().setCrawlID(crawlId).setUrl(url2).setKey(key2).build();

memoryFrontierService.getURLStatus(request, statusObserver);

// PutURL for the same URL with Discovered status
URLInfo info2 =
URLInfo.newBuilder()
.setUrl(url2)
.setCrawlID(crawlId)
.setKey(key2)
.putMetadata("meta1", sl2)
.build();

DiscoveredURLItem disco2 = DiscoveredURLItem.newBuilder().setInfo(info2).build();
builder1.clear();
builder1.setDiscovered(disco2);
builder1.setID(crawlId + "_" + url2);

final AtomicBoolean completed = new AtomicBoolean(false);
final AtomicInteger acked = new AtomicInteger(0);
final AtomicInteger failed = new AtomicInteger(0);
final AtomicInteger skipped = new AtomicInteger(0);
final AtomicInteger ok = new AtomicInteger(0);
StreamObserver<crawlercommons.urlfrontier.Urlfrontier.AckMessage> responseObserver =
new StreamObserver<>() {

@Override
public void onNext(crawlercommons.urlfrontier.Urlfrontier.AckMessage value) {
// receives confirmation that the value has been received
acked.addAndGet(1);
if (value.getStatus().equals(AckMessage.Status.SKIPPED)) {
skipped.getAndIncrement();
LOG.info("PutURL skipped");
} else if (value.getStatus().equals(AckMessage.Status.FAIL)) {
failed.getAndIncrement();
LOG.info("PutURL failed");
} else if (value.getStatus().equals(AckMessage.Status.OK)) {
ok.getAndIncrement();
LOG.info("PutURL OK");
}
}

@Override
public void onError(Throwable t) {
completed.set(true);
t.printStackTrace();
}

@Override
public void onCompleted() {
completed.set(true);
LOG.info("Completed putURL");
}
};

StreamObserver<URLItem> streamObserver = memoryFrontierService.putURLs(responseObserver);
streamObserver.onNext(builder1.build());
streamObserver.onCompleted();

assertEquals(1, skipped.get());

StreamObserver<URLItem> statusObserver2 =
new StreamObserver<>() {

@Override
public void onNext(URLItem value) {
// receives confirmation that the value has been received
logURLItem(value);

// Internally, MemoryFrontierService does not make a distinction
// between discovered and known which have to be re-fetched
if (value.hasKnown()) {
assertEquals(0, value.getKnown().getRefetchableFromDate());
}
}

@Override
public void onError(Throwable t) {
t.printStackTrace();
}

@Override
public void onCompleted() {
LOG.info("completed testNoRescheduleCompleted 2/2");
}
};

memoryFrontierService.getURLStatus(request, statusObserver2);
}
}
Loading

0 comments on commit 2be58f8

Please sign in to comment.