-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathdo-rag-operation.ps1
More file actions
158 lines (136 loc) · 5.82 KB
/
Copy pathdo-rag-operation.ps1
File metadata and controls
158 lines (136 loc) · 5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# generate-markdown-docs.ps1
# Generates markdown documentation, describes images, generates RAG chunks,
# or embeds specifications for each OPC UA specification XML file
# using the Opc.Ua.RagUtility tool
param(
[ValidateSet("markdown", "images", "chunks", "embed")]
[string]$Operation = "markdown",
[string]$SpecificationsPath = ".\specifications",
[string]$ProjectPath = ".\Opc.Ua.RagUtility",
[string]$OllamaUrl = "http://localhost:11434",
[string]$VectorDbType = "pgsql",
[string]$CollectionName = "opcua_specifications",
[int]$Tokens = 400, # Default token count for RAG chunks
[string]$Filter, # Filter to select only directories containing this string
[switch]$WhatIf
)
$ErrorActionPreference = "Stop"
# Resolve to absolute paths
$SpecificationsPath = Resolve-Path $SpecificationsPath
$ProjectPath = Resolve-Path $ProjectPath
$operationDescription = switch ($Operation) {
"markdown" { "markdown documentation" }
"images" { "image descriptions" }
"chunks" { "RAG chunks" }
"embed" { "embeddings" }
}
Write-Host "Generating $operationDescription for files in: $SpecificationsPath" -ForegroundColor Cyan
Write-Host "Using RagUtility project at: $ProjectPath" -ForegroundColor Cyan
if ($Operation -eq "chunks") {
Write-Host "Token count: $Tokens" -ForegroundColor Cyan
}
if ($Operation -eq "embed" -or $Operation -eq "images") {
Write-Host "Ollama URL: $OllamaUrl" -ForegroundColor Cyan
}
if ($Operation -eq "embed") {
Write-Host "Vector DB Type: $VectorDbType" -ForegroundColor Cyan
Write-Host "Collection Name: $CollectionName" -ForegroundColor Cyan
}
if ($Filter) {
Write-Host "Filter: $Filter" -ForegroundColor Cyan
}
# Build the project first for better performance
Write-Host "`nBuilding Opc.Ua.RagUtility..." -ForegroundColor Yellow
dotnet build $ProjectPath --configuration Release --verbosity quiet
if ($LASTEXITCODE -ne 0) {
Write-Host "Failed to build Opc.Ua.RagUtility" -ForegroundColor Red
exit 1
}
Write-Host "Build successful." -ForegroundColor Green
# Find files based on operation
if ($Operation -eq "embed") {
$inputFiles = Get-ChildItem -Path $SpecificationsPath -Filter "rag-chunks.json" -Recurse -File
$fileType = "rag-chunks.json"
} else {
$inputFiles = Get-ChildItem -Path $SpecificationsPath -Filter "*.xml" -Recurse -File
$fileType = "XML"
}
# Apply directory filter if specified (exact match on directory name)
if ($Filter) {
$inputFiles = $inputFiles | Where-Object {
$dirParts = $_.DirectoryName -split '[/\\]'
$dirParts -contains $Filter
}
}
if ($inputFiles.Count -eq 0) {
$filterMsg = if ($Filter) { " matching filter '$Filter'" } else { "" }
Write-Host "No $fileType files found in $SpecificationsPath$filterMsg" -ForegroundColor Yellow
exit 0
}
Write-Host "`nFound $($inputFiles.Count) $fileType file(s) to process." -ForegroundColor Cyan
# Track statistics
$stats = @{
Processed = 0
Failed = 0
}
foreach ($inputFile in $inputFiles) {
$inputPath = $inputFile.FullName
$outputDir = $inputFile.DirectoryName
Write-Host "`n[$($stats.Processed + $stats.Failed + 1)/$($inputFiles.Count)] Processing: $($inputFile.Name)" -ForegroundColor White
switch ($Operation) {
"markdown" {
$outputPath = $outputDir
$commandArgs = @("markdown", "-i", $inputPath, "-o", $outputPath)
$successMessage = "Generated README.md"
}
"images" {
$outputPath = Join-Path $outputDir "image-descriptions.json"
$commandArgs = @("describe-images", "-i", $inputPath, "-o", $outputPath, "-a", $OllamaUrl)
$successMessage = "Generated image-descriptions.json"
}
"chunks" {
$outputPath = Join-Path $outputDir "rag-chunks.json"
$imageDescPath = Join-Path $outputDir "image-descriptions.json"
$commandArgs = @("generate-chunks", "-i", $inputPath, "-o", $outputPath, "-t", $Tokens)
# Check if image descriptions exist and add to arguments
if (Test-Path $imageDescPath) {
$commandArgs += @("-m", $imageDescPath)
Write-Host " Using image descriptions: $imageDescPath" -ForegroundColor DarkGray
}
$successMessage = "Generated rag-chunks.json"
}
"embed" {
$outputPath = "VectorDb: $CollectionName"
$commandArgs = @("embed", "-i", $inputPath, "-a", $OllamaUrl, "-vt", $VectorDbType, "-n", $CollectionName)
$successMessage = "Embedded to VectorDb collection"
}
}
if ($WhatIf) {
Write-Host " [WhatIf] Would run: dotnet run --project `"$ProjectPath`" -- $($commandArgs -join ' ')" -ForegroundColor Yellow
$stats.Processed++
continue
}
Write-Host " Input: $inputPath" -ForegroundColor Gray
Write-Host " Output: $outputPath" -ForegroundColor Gray
# Run dotnet with the command arguments
& dotnet run --project $ProjectPath --configuration Release --no-build -- @commandArgs
if ($LASTEXITCODE -ne 0) {
Write-Host " Failed with exit code: $LASTEXITCODE" -ForegroundColor Red
$stats.Failed++
} else {
Write-Host " Success: $successMessage" -ForegroundColor Green
$stats.Processed++
}
}
# Print summary
Write-Host "`n=== Generation Summary ===" -ForegroundColor Cyan
Write-Host "Operation: $Operation" -ForegroundColor White
if ($Filter) {
Write-Host "Filter: $Filter" -ForegroundColor White
}
Write-Host "Successfully processed: $($stats.Processed)" -ForegroundColor Green
Write-Host "Failed: $($stats.Failed)" -ForegroundColor $(if ($stats.Failed -gt 0) { "Red" } else { "White" })
if ($WhatIf) {
Write-Host "`nThis was a dry run. No files were actually generated." -ForegroundColor Yellow
Write-Host "Run without -WhatIf to perform the actual generation." -ForegroundColor Yellow
}