Skip to content

Feature/Add Tika server support #1385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions Testcontainers.sln
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.ServiceBus",
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.Sftp", "src\Testcontainers.Sftp\Testcontainers.Sftp.csproj", "{7D5C6816-0DD2-4E13-A585-033B5D3C80D5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.Tika", "src\Testcontainers.Tika\Testcontainers.Tika.csproj", "{AC084DE2-1857-E200-EF47-8C4ADEB2173D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.Weaviate", "src\Testcontainers.Weaviate\Testcontainers.Weaviate.csproj", "{68F8600D-24E9-4E03-9E25-5F6EB338EAC1}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.WebDriver", "src\Testcontainers.WebDriver\Testcontainers.WebDriver.csproj", "{64A87DE5-29B0-4A54-9E74-560484D8C7C0}"
Expand Down Expand Up @@ -233,14 +235,13 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.WebDriver.Te
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.Xunit.Tests", "tests\Testcontainers.Xunit.Tests\Testcontainers.Xunit.Tests.csproj", "{E901DF14-6F05-4FC2-825A-3055FAD33561}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Testcontainers.Tika.Tests", "tests\Testcontainers.Tika.Tests\Testcontainers.Tika.Tests.csproj", "{FDD2E9F5-DAAC-2F8F-B8DD-6924CD1D9091}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{5365F780-0E6C-41F0-B1B9-7DC34368F80C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5365F780-0E6C-41F0-B1B9-7DC34368F80C}.Debug|Any CPU.Build.0 = Debug|Any CPU
Expand Down Expand Up @@ -682,6 +683,17 @@ Global
{E901DF14-6F05-4FC2-825A-3055FAD33561}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E901DF14-6F05-4FC2-825A-3055FAD33561}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E901DF14-6F05-4FC2-825A-3055FAD33561}.Release|Any CPU.Build.0 = Release|Any CPU
{AC084DE2-1857-E200-EF47-8C4ADEB2173D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{AC084DE2-1857-E200-EF47-8C4ADEB2173D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AC084DE2-1857-E200-EF47-8C4ADEB2173D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AC084DE2-1857-E200-EF47-8C4ADEB2173D}.Release|Any CPU.Build.0 = Release|Any CPU
{FDD2E9F5-DAAC-2F8F-B8DD-6924CD1D9091}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FDD2E9F5-DAAC-2F8F-B8DD-6924CD1D9091}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FDD2E9F5-DAAC-2F8F-B8DD-6924CD1D9091}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FDD2E9F5-DAAC-2F8F-B8DD-6924CD1D9091}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{5365F780-0E6C-41F0-B1B9-7DC34368F80C} = {673F23AE-7694-4BB9-ABD4-136D6C13634E}
Expand Down Expand Up @@ -794,5 +806,7 @@ Global
{DDB41BC8-5826-4D97-9C5F-001151E3FFD6} = {7164F1FB-7F24-444A-ACD2-2C329C2B3CCF}
{EBA72C3B-57D5-43FF-A5B4-3D55B3B6D4C2} = {7164F1FB-7F24-444A-ACD2-2C329C2B3CCF}
{E901DF14-6F05-4FC2-825A-3055FAD33561} = {7164F1FB-7F24-444A-ACD2-2C329C2B3CCF}
{AC084DE2-1857-E200-EF47-8C4ADEB2173D} = {673F23AE-7694-4BB9-ABD4-136D6C13634E}
{FDD2E9F5-DAAC-2F8F-B8DD-6924CD1D9091} = {7164F1FB-7F24-444A-ACD2-2C329C2B3CCF}
EndGlobalSection
EndGlobal
12 changes: 12 additions & 0 deletions src/Testcontainers.Tika/Testcontainers.Tika.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net8.0;net9.0;netstandard2.0;netstandard2.1</TargetFrameworks>
<LangVersion>latest</LangVersion>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="JetBrains.Annotations" VersionOverride="2023.3.0" PrivateAssets="All" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../Testcontainers/Testcontainers.csproj" />
</ItemGroup>
</Project>
115 changes: 115 additions & 0 deletions src/Testcontainers.Tika/TikaBuilder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
namespace Testcontainers.Tika;

/// <inheritdoc cref="ContainerBuilder{TBuilderEntity, TContainerEntity, TConfigurationEntity}" />
[PublicAPI]
public sealed class TikaBuilder : ContainerBuilder<TikaBuilder, TikaContainer, TikaConfiguration>
{
public const string TikaImage = "apache/tika:3.0.0.0-full";
public const ushort TikaHttpPort = 9998;

/// <summary>
/// Initializes a new instance of the <see cref="TikaBuilder" /> class.
/// </summary>
public TikaBuilder()
: this(new TikaConfiguration())
{
DockerResourceConfiguration = Init().DockerResourceConfiguration;
}

/// <summary>
/// Initializes a new instance of the <see cref="TikaBuilder" /> class.
/// </summary>
/// <param name="resourceConfiguration">The Docker resource configuration.</param>
private TikaBuilder(TikaConfiguration resourceConfiguration)
: base(resourceConfiguration)
{
DockerResourceConfiguration = resourceConfiguration;
}
/// <inheritdoc />

protected override TikaConfiguration DockerResourceConfiguration { get; }

/// <summary>
/// Sets the Tika server timeout.
/// </summary>
/// <param name="timeout">The timeout for the server in milliseconds.</param>
/// <returns>A configured instance of <see cref="TikaBuilder" />.</returns>
public TikaBuilder WithTimeout(int timeout)
{
return Merge(DockerResourceConfiguration, new TikaConfiguration(timeout: timeout))
.WithEnvironment("TIKA_TIMEOUT", timeout.ToString());
}

public override TikaContainer Build()
{
Validate();
return new TikaContainer(DockerResourceConfiguration);
}

protected override TikaBuilder Init()
{
return base.Init()
.WithImage(TikaImage)
.WithPortBinding(TikaHttpPort, true)
.WithWaitStrategy(Wait.ForUnixContainer().AddCustomWaitStrategy(new WaitUntil()));
}

protected override TikaBuilder Clone(IResourceConfiguration<CreateContainerParameters> resourceConfiguration)
{
return Merge(DockerResourceConfiguration, new TikaConfiguration(resourceConfiguration));
}

protected override TikaBuilder Clone(IContainerConfiguration resourceConfiguration)
{
return Merge(DockerResourceConfiguration, new TikaConfiguration(resourceConfiguration));
}

protected override TikaBuilder Merge(TikaConfiguration oldValue, TikaConfiguration newValue)
{
return new TikaBuilder(new TikaConfiguration(oldValue, newValue));
}

private sealed class WaitUntil : IWaitUntil
{
private const string HealthCheckPath = "tika";
private const int MaxRetryAttempts = 10;
private const int DelayInMilliseconds = 1000;

/// <summary>
/// Waits until the Tika server is available by checking the health check endpoint.
/// </summary>
/// <param name="container">The container instance to check.</param>
/// <returns>
/// A task that represents the asynchronous operation. The task result contains a boolean indicating whether the Tika server is available.
/// </returns>
/// <remarks>
/// This method sends HTTP GET requests to the Tika server's health check endpoint and retries up to a maximum number of times if the server is not available.
/// </remarks>
public async Task<bool> UntilAsync(DotNet.Testcontainers.Containers.IContainer container)
{
string endpoint = $"http://{container.Hostname}:{container.GetMappedPublicPort(TikaBuilder.TikaHttpPort)}/{HealthCheckPath}";

using var client = new HttpClient();

for (int i = 0; i < MaxRetryAttempts; i++)
{
try
{
var response = await client.GetAsync(endpoint);

response.EnsureSuccessStatusCode();
string responseContent = await response.Content.ReadAsStringAsync(); // This is Tika Server (Apache Tika 3.0.0). Please PUT, volendo si può fare questo check
return true;
}
catch
{
// Ignore exceptions and retry
}

await Task.Delay(DelayInMilliseconds);
}

return false;
}
}
}
54 changes: 54 additions & 0 deletions src/Testcontainers.Tika/TikaConfiguration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
namespace Testcontainers.Tika;

/// <inheritdoc cref="ContainerConfiguration" />
[PublicAPI]
public sealed class TikaConfiguration : ContainerConfiguration
{
/// <summary>
/// Initializes a new instance of the <see cref="TikaConfiguration" /> class.
/// </summary>
/// <param name="timeout">The timeout for the Tika server.</param>
public TikaConfiguration(int timeout = 30000)
{
Timeout = timeout;
}

/// <summary>
/// Initializes a new instance of the <see cref="TikaConfiguration" /> class.
/// </summary>
/// <param name="resourceConfiguration">The Docker resource configuration.</param>
public TikaConfiguration(IResourceConfiguration<CreateContainerParameters> resourceConfiguration)
: base(resourceConfiguration)
{
// Passes the configuration upwards to the base implementations to create an updated immutable copy.
}

/// <summary>
/// Initializes a new instance of the <see cref="TikaConfiguration" /> class.
/// </summary>
/// <param name="resourceConfiguration">The Docker resource configuration.</param>
public TikaConfiguration(IContainerConfiguration resourceConfiguration)
: base(resourceConfiguration)
{
// Passes the configuration upwards to the base implementations to create an updated immutable copy.
}

/// <summary>
/// Initializes a new instance of the <see cref="TikaConfiguration" /> class,
/// combining properties from two existing configurations.
/// </summary>
/// <param name="oldValue">The previous configuration values.</param>
/// <param name="newValue">The new configuration values to merge with the old ones.</param>
public TikaConfiguration(TikaConfiguration oldValue, TikaConfiguration newValue)
: base(oldValue, newValue)
{
// Combine values manually
Timeout = BuildConfiguration.Combine(oldValue.Timeout, newValue.Timeout);
}


/// <summary>
/// Gets the Tika server timeout.
/// </summary>
public int Timeout { get; }
}
28 changes: 28 additions & 0 deletions src/Testcontainers.Tika/TikaContainer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace Testcontainers.Tika;

/// <inheritdoc cref="DockerContainer" />
[PublicAPI]
public sealed class TikaContainer : DockerContainer
{
private readonly TikaConfiguration _configuration;

/// <summary>
/// Initializes a new instance of the <see cref="TikaContainer" /> class.
/// </summary>
/// <param name="configuration">The container configuration.</param>
public TikaContainer(TikaConfiguration configuration)
: base(configuration)
{
_configuration = configuration;
}

/// <summary>
/// Gets the Tika connection string.
/// </summary>
/// <returns>The Tika connection string.</returns>
public string GetConnectionString()
{
var endpoint = new UriBuilder(Uri.UriSchemeHttp, Hostname, GetMappedPublicPort(TikaBuilder.TikaHttpPort));
return endpoint.ToString();
}
}
8 changes: 8 additions & 0 deletions src/Testcontainers.Tika/Usings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
global using System;
global using Docker.DotNet.Models;
global using DotNet.Testcontainers.Builders;
global using DotNet.Testcontainers.Configurations;
global using DotNet.Testcontainers.Containers;
global using JetBrains.Annotations;
global using System.Threading.Tasks;
global using System.Net.Http;
19 changes: 19 additions & 0 deletions tests/Testcontainers.Tika.Tests/Testcontainers.Tika.Tests.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net9.0</TargetFrameworks>
<IsPackable>false</IsPackable>
<IsPublishable>false</IsPublishable>
<Configurations>Debug;Release</Configurations>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="coverlet.collector" />
<PackageReference Include="xunit.runner.visualstudio" />
<PackageReference Include="xunit" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../Testcontainers.Commons/Testcontainers.Commons.csproj" />
<ProjectReference Include="../../src/Testcontainers.Tika/Testcontainers.Tika.csproj" />
</ItemGroup>
</Project>

49 changes: 49 additions & 0 deletions tests/Testcontainers.Tika.Tests/TikaContainerTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
namespace Testcontainers.Tika.Tests;

public sealed class TikaContainerTests : IAsyncLifetime
{
private readonly TikaContainer _tikaContainer = new TikaBuilder().Build();

public Task InitializeAsync()
{
return _tikaContainer.StartAsync();
}

public Task DisposeAsync()
{
return _tikaContainer.DisposeAsync().AsTask();
}

[Fact]
[Trait(nameof(DockerCli.DockerPlatform), nameof(DockerCli.DockerPlatform.Linux))]
public async Task GetConnectionStringReturnsValidUrl()
{
// When
var connectionString = await Task.Run(() => _tikaContainer.GetConnectionString());

// Then
Assert.False(string.IsNullOrEmpty(connectionString));
Assert.StartsWith("http://", connectionString, StringComparison.OrdinalIgnoreCase);
}

[Fact]
[Trait(nameof(DockerCli.DockerPlatform), nameof(DockerCli.DockerPlatform.Linux))]
public async Task TikaHealthCheckShouldBeSuccessful()
{
{
// Given
var httpClient = new HttpClient();
var connectionString = await Task.Run(() => _tikaContainer.GetConnectionString());
var requestUrl = $"{connectionString}tika";

// When
var response = await httpClient.GetAsync(requestUrl);

// Then
response.EnsureSuccessStatusCode();
var content = await response.Content.ReadAsStringAsync();
Assert.False(string.IsNullOrEmpty(content));
Assert.StartsWith("This is Tika Server", content);
}
}
}
5 changes: 5 additions & 0 deletions tests/Testcontainers.Tika.Tests/Usings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
global using DotNet.Testcontainers.Commons;
global using System;
global using System.Net.Http;
global using System.Threading.Tasks;
global using Xunit;
Loading