-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathPostgresOptions.cs
More file actions
135 lines (114 loc) · 5.45 KB
/
PostgresOptions.cs
File metadata and controls
135 lines (114 loc) · 5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
using System;
namespace Mythosia.VectorDb.Postgres;
/// <summary>
/// Configuration options for <see cref="PostgresStore"/>.
/// </summary>
public class PostgresOptions
{
/// <summary>
/// PostgreSQL connection string. Required.
/// </summary>
public string ConnectionString { get; set; } = string.Empty;
/// <summary>
/// Embedding vector dimension. Must match the dimension used by the embedding provider.
/// Required (must be greater than 0).
/// </summary>
public int Dimension { get; set; }
/// <summary>
/// Database schema name. Default: "public".
/// </summary>
public string SchemaName { get; set; } = "public";
/// <summary>
/// Table name for vector storage. Default: "vectors".
/// </summary>
public string TableName { get; set; } = "vectors";
/// <summary>
/// When true, automatically creates the pgvector extension, table, and indexes
/// if they do not exist. Recommended for development/testing only.
/// When false (default), the schema must already exist or an exception is thrown.
/// </summary>
public bool EnsureSchema { get; set; } = false;
/// <summary>
/// Distance function for similarity search. Default: <see cref="DistanceStrategy.Cosine"/>.
/// </summary>
public DistanceStrategy DistanceStrategy { get; set; } = DistanceStrategy.Cosine;
/// <summary>
/// Vector index settings. Default: <see cref="HnswIndexOptions"/>.
/// </summary>
public VectorIndexOptions Index { get; set; } = new HnswIndexOptions();
/// <summary>
/// PostgreSQL text search configuration used for <c>to_tsvector</c> / <c>plainto_tsquery</c>
/// in hybrid search. Default: <c>"simple"</c>.
/// Only used when <see cref="TextSearchMode"/> is <see cref="TextSearchMode.TsVector"/>.
/// <para>
/// The <c>simple</c> configuration performs whitespace-based tokenization without
/// morphological analysis. This works well for English but may produce poor recall
/// for agglutinative languages such as Korean, Japanese, or Finnish where particles
/// and inflections are attached to words.
/// </para>
/// <para>
/// To improve hybrid search quality for such languages, install a language-specific
/// text search configuration (e.g. <c>pg_mecab</c> for Korean/Japanese) and set this
/// property accordingly, or use <see cref="TextSearchMode.Trigram"/> instead.
/// </para>
/// </summary>
public string TextSearchConfig { get; set; } = "simple";
/// <summary>
/// Text search mode for the keyword leg of hybrid search.
/// Default: <see cref="TextSearchMode.TsVector"/>.
/// <para>
/// <see cref="TextSearchMode.TsVector"/>: PostgreSQL full-text search
/// (<c>tsvector / tsquery</c>). Suitable for European languages with good
/// built-in text search configurations.
/// </para>
/// <para>
/// <see cref="TextSearchMode.Trigram"/>: <c>pg_trgm word_similarity</c> matching.
/// Better for CJK languages (Korean, Japanese, Chinese) where PostgreSQL
/// lacks built-in morphological analysis. Requires the <c>pg_trgm</c>
/// extension (standard PostgreSQL contrib module, available on most managed services).
/// </para>
/// </summary>
public TextSearchMode TextSearchMode { get; set; } = TextSearchMode.TsVector;
/// <summary>
/// When true (default), index creation failures throw and fail fast.
/// When false, index creation failures are downgraded to warnings and startup continues.
/// </summary>
public bool FailFastOnIndexCreationFailure { get; set; } = true;
/// <summary>
/// Validates the options and throws <see cref="ArgumentException"/> if invalid.
/// </summary>
public void Validate()
{
if (string.IsNullOrWhiteSpace(ConnectionString))
throw new ArgumentException("ConnectionString must not be empty.", nameof(ConnectionString));
if (Dimension <= 0)
throw new ArgumentException("Dimension must be greater than 0.", nameof(Dimension));
if (string.IsNullOrWhiteSpace(TableName))
throw new ArgumentException("TableName must not be empty.", nameof(TableName));
if (string.IsNullOrWhiteSpace(SchemaName))
throw new ArgumentException("SchemaName must not be empty.", nameof(SchemaName));
if (!Enum.IsDefined(typeof(DistanceStrategy), DistanceStrategy))
throw new ArgumentException("DistanceStrategy is invalid.", nameof(DistanceStrategy));
if (string.IsNullOrWhiteSpace(TextSearchConfig))
throw new ArgumentException("TextSearchConfig must not be empty.", nameof(TextSearchConfig));
if (Index is null)
throw new ArgumentException("Index must not be null.", nameof(Index));
Index.Validate();
ValidateIdentifier(SchemaName, nameof(SchemaName));
ValidateIdentifier(TableName, nameof(TableName));
}
/// <summary>
/// Ensures an identifier contains only safe characters (letters, digits, underscores)
/// to prevent SQL injection via schema/table names.
/// </summary>
private static void ValidateIdentifier(string value, string paramName)
{
foreach (var c in value)
{
if (!char.IsLetterOrDigit(c) && c != '_')
throw new ArgumentException(
$"Identifier '{value}' contains invalid character '{c}'. Only letters, digits, and underscores are allowed.",
paramName);
}
}
}