Skip to content

Commit 4f33b6a

Browse files
committed
test: issue http request with a valid user agent
Some suites (e.g., rdf-streaming-updater, rdf-spark-tools) were failing because they accessed https://schema.wikimedia.org with an invalid user agent, resulting in a 403. This patch updates the default WDQS UA string and ensures all requests use it. Bug: T406064 Change-Id: If294b2ea71b441778e7533c235e9523817f3f300
1 parent 59de2a9 commit 4f33b6a

File tree

6 files changed

+34
-6
lines changed

6 files changed

+34
-6
lines changed

rdf-spark-tools/src/test/scala/org/wikidata/query/rdf/updater/reconcile/ReconciliationSenderUnitTest.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@ package org.wikidata.query.rdf.updater.reconcile
33
import java.io.IOException
44
import java.net.URI
55
import java.time.Instant
6-
76
import java.util.{UUID, function}
87
import scala.collection.JavaConverters._
98
import scala.language.postfixOps
109
import scala.util.{Failure, Success, Try}
11-
1210
import com.fasterxml.jackson.databind.node.ArrayNode
1311
import org.apache.commons.io.IOUtils
1412
import org.apache.http.HttpVersion
@@ -20,7 +18,7 @@ import org.scalamock.matchers.ArgCapture.CaptureAll
2018
import org.scalamock.scalatest.MockFactory
2119
import org.scalatest.flatspec.AnyFlatSpec
2220
import org.scalatest.matchers.should.Matchers
23-
import org.wikidata.query.rdf.tool.{EntityId, MapperUtils}
21+
import org.wikidata.query.rdf.tool.{EntityId, HttpClientUtils, MapperUtils}
2422
import org.wikidata.query.rdf.tool.change.events.{EventInfo, EventsMeta, ReconcileEvent}
2523
import org.wikidata.query.rdf.tool.change.events.ReconcileEvent.Action
2624
import org.wikimedia.eventutilities.core.event.{EventSchemaLoader, EventSchemaValidator}
@@ -35,7 +33,11 @@ class ReconciliationSenderUnitTest extends AnyFlatSpec with Matchers with MockFa
3533
private val domain: String = "mydomain"
3634
private val mystream: String = "mystream"
3735
private val defaultLoader: function.Function[URI, Array[Byte]] = new function.Function[URI, Array[Byte]]() {
38-
override def apply(u: URI): Array[Byte] = IOUtils.toByteArray(u)
36+
override def apply(u: URI): Array[Byte] = {
37+
val connection = u.toURL.openConnection()
38+
connection.setRequestProperty("User-Agent", HttpClientUtils.WDQS_DEFAULT_UA)
39+
IOUtils.toByteArray(connection.getInputStream)
40+
}
3941
}
4042
private val resLoader: ResourceLoader = ResourceLoader.builder()
4143
.setDefaultLoader(defaultLoader)
@@ -65,6 +67,7 @@ class ReconciliationSenderUnitTest extends AnyFlatSpec with Matchers with MockFa
6567

6668
"ReconciliationSender" should "send batch of events compatible with their schema" in {
6769
val httpClient = mock[HttpClient]
70+
6871
val validResponse = stub[CloseableHttpResponse]
6972
val captureAllSuccess = CaptureAll[HttpUriRequest]()
7073

streaming-updater-common/src/test/java/org/wikidata/query/rdf/updater/MutationEventDataJsonSerializationUnitTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
import org.openrdf.model.Statement;
2525
import org.openrdf.rio.RDFFormat;
2626
import org.openrdf.rio.RDFWriterRegistry;
27+
import org.wikidata.query.rdf.tool.HttpClientUtils;
2728
import org.wikidata.query.rdf.tool.MapperUtils;
2829
import org.wikidata.query.rdf.tool.change.events.EventsMeta;
2930
import org.wikimedia.eventutilities.core.event.EventSchemaLoader;
3031
import org.wikimedia.eventutilities.core.event.EventSchemaValidator;
32+
import org.wikimedia.eventutilities.core.http.BasicHttpClient;
3133
import org.wikimedia.eventutilities.core.json.JsonLoadingException;
3234
import org.wikimedia.eventutilities.core.json.JsonSchemaLoader;
3335
import org.wikimedia.eventutilities.core.util.ResourceLoader;
@@ -56,10 +58,14 @@ public static Collection<Object[]> params() {
5658
}
5759

5860
public MutationEventDataJsonSerializationUnitTest(String version) throws MalformedURLException {
61+
BasicHttpClient.Builder builder = BasicHttpClient.builder();
62+
builder.httpClientBuilder().setUserAgent(HttpClientUtils.WDQS_DEFAULT_UA);
63+
BasicHttpClient httpClient = builder.build();
5964
eventSchemaLoader = EventSchemaLoader
6065
.builder()
6166
.setJsonSchemaLoader(JsonSchemaLoader.build(ResourceLoader
6267
.builder()
68+
.withHttpClient(httpClient)
6369
.setBaseUrls(Collections.singletonList(new URL("https://schema.wikimedia.org/repositories/primary/jsonschema")))
6470
.build()))
6571
.build();

streaming-updater-producer/src/test/scala/org/wikidata/query/rdf/updater/IncomingEventStreamsIntegrationTest.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ import org.apache.flink.api.connector.sink2.{Sink, SinkWriter}
44
import org.apache.flink.core.fs.Path
55
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
66
import org.scalatest.{FlatSpec, Matchers}
7+
import org.wikidata.query.rdf.tool.HttpClientUtils
78
import org.wikidata.query.rdf.tool.change.events.{EventInfo, EventsMeta}
89
import org.wikidata.query.rdf.tool.wikibase.WikibaseRepository
910
import org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris
1011
import org.wikidata.query.rdf.updater.config.{FilteredReconciliationStream, InputStreams, UpdaterPipelineInputEventStreamConfig}
1112
import org.wikimedia.eventutilities.core.event.{EventSchemaLoader, EventStreamConfig, EventStreamFactory, StaticEventStreamConfigLoader}
13+
import org.wikimedia.eventutilities.core.http.BasicHttpClient
1214
import org.wikimedia.eventutilities.core.json.{JsonLoader, JsonSchemaLoader}
1315
import org.wikimedia.eventutilities.core.util.ResourceLoader
1416
import org.wikimedia.eventutilities.flink.stream.EventDataStreamFactory
@@ -26,8 +28,15 @@ class IncomingEventStreamsIntegrationTest extends FlatSpec with FlinkTestCluster
2628
// useful to put test schemas while changes are being reviewed on the schemas repos
2729
this.getClass.getResource("/schema_repo/").toString)
2830

31+
private val httpClient: BasicHttpClient = {
32+
val builder = BasicHttpClient.builder()
33+
builder.httpClientBuilder().setUserAgent(HttpClientUtils.WDQS_DEFAULT_UA)
34+
builder.build()
35+
}
36+
2937
private val resourceLoader = ResourceLoader.builder()
3038
.setBaseUrls(ResourceLoader.asURLs(schemaRepos.asJava))
39+
.withHttpClient(httpClient)
3140
.build()
3241
private val jsonLoader: JsonLoader = new JsonLoader(resourceLoader)
3342
private val eventStreamConfigLoader = new StaticEventStreamConfigLoader(

streaming-updater-producer/src/test/scala/org/wikidata/query/rdf/updater/IncomingEventStreamsUnitTest.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ package org.wikidata.query.rdf.updater
22

33
import org.apache.flink.types.Row
44
import org.scalatest.{FlatSpec, Matchers}
5+
import org.wikidata.query.rdf.tool.HttpClientUtils
56
import org.wikidata.query.rdf.tool.change.events.{EventInfo, EventsMeta}
67
import org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris
78
import org.wikimedia.eventutilities.core.event.{EventSchemaLoader, EventStreamConfig, EventStreamFactory, StaticEventStreamConfigLoader}
9+
import org.wikimedia.eventutilities.core.http.BasicHttpClient
810
import org.wikimedia.eventutilities.core.json.{JsonLoader, JsonSchemaLoader}
911
import org.wikimedia.eventutilities.core.util.ResourceLoader
1012
import org.wikimedia.eventutilities.flink.stream.EventDataStreamFactory
@@ -22,8 +24,15 @@ class IncomingEventStreamsUnitTest extends FlatSpec with Matchers {
2224
// useful to put test schemas while changes are being reviewed on the schemas repos
2325
this.getClass.getResource("/schema_repo/").toString)
2426

27+
private val httpClient: BasicHttpClient = {
28+
val builder = BasicHttpClient.builder()
29+
builder.httpClientBuilder().setUserAgent(HttpClientUtils.WDQS_DEFAULT_UA)
30+
builder.build()
31+
}
32+
2533
private val resourceLoader = ResourceLoader.builder()
2634
.setBaseUrls(ResourceLoader.asURLs(schemaRepos.asJava))
35+
.withHttpClient(httpClient)
2736
.build()
2837
private val jsonLoader: JsonLoader = new JsonLoader(resourceLoader)
2938
private val eventStreamConfigLoader = new StaticEventStreamConfigLoader(

tools/src/main/java/org/wikidata/query/rdf/tool/HttpClientUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public final class HttpClientUtils {
9191
/**
9292
* Default HTTP User-Agent used by the system.
9393
*/
94-
public static final String WDQS_DEFAULT_UA = "Wikidata Query Service Updater Bot";
94+
public static final String WDQS_DEFAULT_UA = "Wikidata-Query-RDF/1.0 (https://github.com/wikimedia/wikidata-query-rdf) Bot/1.0";
9595

9696
/**
9797
* Max number of connection pooled per route.

tools/src/test/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepositoryWireIntegrationTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.junit.Test;
2929
import org.openrdf.model.Statement;
3030
import org.wikidata.query.rdf.test.SystemPropertyContext;
31+
import org.wikidata.query.rdf.tool.HttpClientUtils;
3132
import org.wikidata.query.rdf.tool.change.Change;
3233
import org.wikidata.query.rdf.tool.exception.RetryableException;
3334
import org.wikidata.query.rdf.tool.rdf.RDFParserSuppliers;
@@ -159,7 +160,7 @@ public void defaultUserAgentIsSet() throws RetryableException {
159160

160161
repository.fetchRdfForEntity("Q1");
161162

162-
verify(getRequestedFor(anyUrl()).withHeader("User-Agent", containing("Wikidata Query Service Updater")));
163+
verify(getRequestedFor(anyUrl()).withHeader("User-Agent", containing(HttpClientUtils.WDQS_DEFAULT_UA)));
163164
}
164165

165166
@Test

0 commit comments

Comments
 (0)